Skip to content

Decorators

utilz.decorators

Custom guards for defensive data analysis compatible with bulwark.

Intended usage is as Python decorators:

from utilz.guards import log_df

@log_df
def myfunc(df):
    do some stuff...

expensive(force=False)

A decorator that wraps joblib.Memory for caching the results of a function to disk. This is useful for expensive functions that take a while to compute, as rerunning them will simply load the last results from disk.

Parameters:

Name Type Description Default
force bool

clear the cache before rerunning; Default False

False
Source code in utilz/decorators.py
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
def expensive(
    force: bool = False,
) -> Any:
    """
    A decorator that wraps `joblib.Memory` for caching the results of a function to disk.
    This is useful for expensive functions that take a while to compute, as rerunning
    them will simply load the last results from disk.

    Args:
        force (bool, optional): clear the cache before rerunning; Default False
    """

    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            memory = Memory("./cachedir")
            if force:
                memory.clear()
            func_cached = memory.cache(func)
            return func_cached(*args, **kwargs)

        return wrapper

    return decorator

log(func)

Log the type and shape/size/len of the output from a function

Source code in utilz/decorators.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def log(func):
    """
    Log the type and shape/size/len of the output from a function

    """

    @wraps(func)
    def wrapper(arg1, *args, **kwargs):
        if isinstance(arg1, pd.DataFrame):
            print(f"before {func.__name__}, {arg1.shape}, df")
        elif isinstance(arg1, np.ndarray):
            print(f"before {func.__name__}, {arg1.shape}, np")
        elif isinstance(arg1, list):
            print(f"before {func.__name__}, {len(arg1)}, []")
        elif isinstance(arg1, dict):
            print(f"bebfore {func.__name__}, {len(arg1.keys())}, {{}}")
        result = func(arg1, *args, **kwargs)
        if isinstance(result, pd.DataFrame):
            print(f"after {func.__name__}, {result.shape}, df")
        elif isinstance(result, np.ndarray):
            print(f"after {func.__name__}, {result.shape}, np")
        elif isinstance(result, list):
            print(f"after {func.__name__}, {len(result)}, []")
        elif isinstance(result, dict):
            print(f"after {func.__name__}, {len(result.keys())}, {{}}")
        return result

    return wrapper

maybe(function)

A decorator that wraps a function which should take a kwarg called out_file. If out_file exists then it's loaded from disk, otherwise the wrapped function is called. If the wrapped function takes a kwarg overwrite = True then it always runs. You can also pass loader_func = callable to use a custom loading function

@maybe
def expensive_func(data, **kwargs'):
    b = something_expensive(data)
    np.save(b, kwargs.get('out_file')
    return b

# First run will execute the function
expensive_func(data, out_file='result.npy')

# Second run will load from 'result.npy'
expensive_func(data, out_file='result.npy')
Source code in utilz/decorators.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
def maybe(function):
    """
    A decorator that wraps a function which should take a kwarg called `out_file`. If
    `out_file` exists then it's loaded from disk, otherwise the wrapped function is
    called. If the wrapped function takes a kwarg `overwrite = True` then it always runs. You can also pass `loader_func = callable` to use a custom loading function

        @maybe
        def expensive_func(data, **kwargs'):
            b = something_expensive(data)
            np.save(b, kwargs.get('out_file')
            return b

        # First run will execute the function
        expensive_func(data, out_file='result.npy')

        # Second run will load from 'result.npy'
        expensive_func(data, out_file='result.npy')

    """

    @wraps(function)
    def wrapper(*args, **kwargs):
        # get out_file from the wrapped function
        out_file = kwargs.get("out_file", None)

        # get out_file from the wrapped function
        overwrite = kwargs.get("overwrite", False)

        if out_file is None:
            raise ValueError(
                "out_file must be provided as a kwarg to the decorated function!"
            )

        out_file = Path(out_file)

        if out_file.exists() and not overwrite:
            print(f"Loading precomputed result from: {out_file}")
            load_kwargs = {
                k: v for k, v in kwargs.items() if k not in ["out_file", "overwrite"]
            }
            return load(out_file, **load_kwargs)

        return function(*args, **kwargs)

    return wrapper

show(func)

Print result of function call in addition to returning it

Source code in utilz/decorators.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def show(func):
    """
    Print result of function call in addition to returning it

    """

    @wraps(func)
    def wrapper(*args, **kwargs):
        result = func(*args, **kwargs)
        if result is None:
            # Return input
            to_show = args[0]
            to_return = to_show
        elif isinstance(result, pd.DataFrame):
            # print head, return result
            to_show = result.head()
            to_return = result
        else:
            # print and return result
            to_show = result
            to_return = to_show
        if _is_notebook():
            from IPython.display import display

            print_func = display
        else:
            print_func = print

        print_func(to_show)
        return to_return

    return wrapper

timeit(func)

Log the run time of a function

Source code in utilz/decorators.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def timeit(func):
    """
    Log the run time of a function

    """

    @wraps(func)
    def wrapper(*args, **kwargs):
        tic = dt.datetime.now()
        result = func(*args, **kwargs)
        time_taken = str(dt.datetime.now() - tic)
        print(f"{func.__name__}, took {time_taken}s")
        return result

    return wrapper