Skip to content

Data classes and containers

utilz.data

Data containers

Box

Bases: list

Box is a flexible list-like container that allows for dot-notation access to attributes of its elements and methods. This makes it easy for example to perform a .groupby() operation on a list of dataframes.

Boxes can be transparent or opaque. Transparent boxes always return the result of an operation as a list. This is useful for example when you want to call a method on each box element and immediately work with the results.

Opaque boxes always return a new Box who's contents can be accessed using .contents() or slice notation box[:]. This allows for method chaining on the underlying data.

Examples:

>>> # Transparent box by default
>>> box = Box([df1, df2, df3])
>>> # Access content like a list
>>> box[0] # returns df1
>>> box[:] # returns [df1, df2, df3]
>>> box.contents() # equivalent
>>> # Access attributes or call methods just like you would on a single object
>>> box.head(10) # returns a list each df head
>>> box.shape # returns a list each df shape
>>> # Opaque box facilitates method chaining but need
>>> # `.contents()` to access results
>>> black_box = Box([df1, df2, df3], transparent=False)
>>> black_box.groupby('col').mean().contents()
>>> # Apply arbitrary functions to box elements
>>> result = box.map(lambda x: x + 1)
>>> # Can also modify in place without returning anything
>>> box.map(lambda x: x + 1, inplace=True)
>>> # Map respects box transparency for method chaining
>>> box.set_transparent(False)
>>> result = box.map(lambda x: x + 1).head().contents()
Source code in utilz/data.py
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
class Box(list):

    """
    Box is a flexible list-like container that allows for dot-notation access to attributes of its elements and methods. This makes it easy for example to perform a `.groupby()` operation on a list of dataframes.

    Boxes can be transparent or opaque. Transparent boxes always return the result of an operation as a list. This is useful for example when you want to call a method on each box element and immediately work with the results.

    Opaque boxes always return a new `Box` who's contents can be accessed using `.contents()` or slice notation `box[:]`. This allows for method chaining on the underlying data.

    Examples:
        >>> # Transparent box by default
        >>> box = Box([df1, df2, df3])

        >>> # Access content like a list
        >>> box[0] # returns df1
        >>> box[:] # returns [df1, df2, df3]
        >>> box.contents() # equivalent

        >>> # Access attributes or call methods just like you would on a single object
        >>> box.head(10) # returns a list each df head
        >>> box.shape # returns a list each df shape

        >>> # Opaque box facilitates method chaining but need
        >>> # `.contents()` to access results
        >>> black_box = Box([df1, df2, df3], transparent=False)
        >>> black_box.groupby('col').mean().contents()

        >>> # Apply arbitrary functions to box elements
        >>> result = box.map(lambda x: x + 1)

        >>> # Can also modify in place without returning anything
        >>> box.map(lambda x: x + 1, inplace=True)

        >>> # Map respects box transparency for method chaining
        >>> box.set_transparent(False)
        >>> result = box.map(lambda x: x + 1).head().contents()
    """

    def __init__(self, iterable, transparent=True):
        """
        Create a new box from an iterable

        Args:
            list (iterable): iterable of objects to store in the box
            transparent (bool): whether methods should return results (`True`) or a new box (`False`); Default True
        """
        super().__init__(iterable)
        self._transparent_box = transparent

    def __getattr__(self, name, *args, **kwargs):
        if hasattr(self[0], name):
            attr_or_method = getattr(self[0], name)
            if callable(attr_or_method):

                def fn(*args, **kwargs):
                    out = []
                    for elem in self:
                        result = getattr(elem, name)
                        result = result(*args, **kwargs) if callable(result) else result
                        out.append(result)
                    out = (
                        out
                        if self._transparent_box
                        else Box(out, transparent=self._transparent_box)
                    )
                    return out

                return fn
            else:
                out = [getattr(elem, name) for elem in self]
                out = (
                    out
                    if self._transparent_box
                    else Box(out, transparent=self._transparent_box)
                )
                return out

        else:
            raise AttributeError

    def __repr__(self):
        return f"Box(len={len(self)}, transparent={self._transparent_box}, type={self[0].__class__.__module__}.{self[0].__class__.__name__})"

    def map(self, fn, **kwargs):
        """
        Apply a function to each element in the box. Accepts all kwargs that map does,
        including parallelization!

        Args:
            fn (callable): function to apply to each element
            inplace (bool, optional): whether to modify the box in place or return a new box. Defaults to False.
            *args: positional arguments to pass to `fn`
            **kwargs: keyword arguments to pass to `fn`

        Returns:
            Box: new box with the results of applying `fn` to each element
        """

        inplace = kwargs.pop("inplace", False)
        out = map(fn, self, **kwargs)
        if inplace:
            self.__init__(out, transparent=self._transparent_box)
        else:
            out = (
                out
                if self._transparent_box
                else Box(out, transparent=self._transparent_box)
            )
            return out

    def contents(self):
        """
        Convert box to list

        Returns:
            list: list of elements
        """
        return list(self)

    def set_transparent(self, transparent):
        """
        Set the transparency of the box

        Args:
            transparent (bool): whether the box should be transparent or not
        """
        self._transparent_box = transparent
__init__(iterable, transparent=True)

Create a new box from an iterable

Parameters:

Name Type Description Default
list iterable

iterable of objects to store in the box

required
transparent bool

whether methods should return results (True) or a new box (False); Default True

True
Source code in utilz/data.py
46
47
48
49
50
51
52
53
54
55
def __init__(self, iterable, transparent=True):
    """
    Create a new box from an iterable

    Args:
        list (iterable): iterable of objects to store in the box
        transparent (bool): whether methods should return results (`True`) or a new box (`False`); Default True
    """
    super().__init__(iterable)
    self._transparent_box = transparent
contents()

Convert box to list

Returns:

Name Type Description
list

list of elements

Source code in utilz/data.py
118
119
120
121
122
123
124
125
def contents(self):
    """
    Convert box to list

    Returns:
        list: list of elements
    """
    return list(self)
map(fn, **kwargs)

Apply a function to each element in the box. Accepts all kwargs that map does, including parallelization!

Parameters:

Name Type Description Default
fn callable

function to apply to each element

required
inplace bool

whether to modify the box in place or return a new box. Defaults to False.

required
*args

positional arguments to pass to fn

required
**kwargs

keyword arguments to pass to fn

{}

Returns:

Name Type Description
Box

new box with the results of applying fn to each element

Source code in utilz/data.py
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def map(self, fn, **kwargs):
    """
    Apply a function to each element in the box. Accepts all kwargs that map does,
    including parallelization!

    Args:
        fn (callable): function to apply to each element
        inplace (bool, optional): whether to modify the box in place or return a new box. Defaults to False.
        *args: positional arguments to pass to `fn`
        **kwargs: keyword arguments to pass to `fn`

    Returns:
        Box: new box with the results of applying `fn` to each element
    """

    inplace = kwargs.pop("inplace", False)
    out = map(fn, self, **kwargs)
    if inplace:
        self.__init__(out, transparent=self._transparent_box)
    else:
        out = (
            out
            if self._transparent_box
            else Box(out, transparent=self._transparent_box)
        )
        return out
set_transparent(transparent)

Set the transparency of the box

Parameters:

Name Type Description Default
transparent bool

whether the box should be transparent or not

required
Source code in utilz/data.py
127
128
129
130
131
132
133
134
def set_transparent(self, transparent):
    """
    Set the transparency of the box

    Args:
        transparent (bool): whether the box should be transparent or not
    """
    self._transparent_box = transparent