Source code for pandas_paddles.paddles

"""Helpers for pandas-paddles to simplify compositions.

Use as::

    from pandas_paddles import paddles
"""
from functools import reduce
import operator
from typing import Any, Callable, Dict, Iterable, Literal, Union

from .pandas import PandasDataframeContext
try:
    from .dask import DF
except ImportError:
    from .pandas import DF

__all__ = [
    "build_filter",
    "combine",
    "str_join",
]

# Some typing hints
ColSpec = Union[str, PandasDataframeContext]
BinaryOp = Union[
    Callable[[Any, Any], Any],
    Literal["and"],
    Literal["&"],
    Literal["or"],
    Literal["|"],
]


[docs]def ensure_DF_expr(col: ColSpec) -> PandasDataframeContext: """Convert column names to ``DF``-expressions when necessary. Parameters ---------- col The column name (``str``) or a ``DF``-expression. Returns ------- PandasDataframeContext The ``DF``-expression of ``col`` (if a ``str``) or just ``col``. Examples -------- Strings are converted to ``DF``-expressions:: >>> ensure_DF_expr("col-name") DF["col-name"] ``DF``-expressions are passed through:: >>> expr2 = DF["another-column"] >>> expr2 is ensure_DF_expr(expr2) True """ if isinstance(col, str): return DF[col] return col
[docs]def str_join(sep: str, col1: ColSpec, *cols: ColSpec) -> PandasDataframeContext: """Create expression to join multiple columns in a string. This is similar to ``str.join`` Parameters ---------- sep The separator col1, cols The columns to be joined. These can be either ``str`` or ``DF``-expressions. If a ``str`` is passed, it's taken as column name and the respective ``DF``-expression is created. In both cases the expression is first casted to ``str`` using :func:`pandas.Series.astype()`. Returns ------- PandasDataframeContext The ``DF``-expression, a callable taking a :class:`~pandas.DataFrame` as argument. Examples -------- Reference columns with their names:: >>> df = pd.DataFrame({"a": list("abc"), "b": list("XYZ"), "c": range(3)}) >>> df.assign(a_plus_b=str_join("+", "a", "b")) a b c a_plus_b 0 a X 0 a+X 1 b Y 1 b+Y 2 c Z 2 c+Z Reference columns with ``DF``-expressions:: >>> df.assign(a_plus_b=str_join("+", "a", DF["b"].str.lower())) a b c a_plus_b 0 a X 0 a+x 1 b Y 1 b+y 2 c Z 2 c+z Non-string columns are converted:: >>> df.assign(a_plus_c=str_join("+", "a", "c")) a b c a_plus_b 0 a X 0 a+0 1 b Y 1 b+1 2 c Z 2 c+2 """ expr = ensure_DF_expr(col1).astype(str) for col in cols: expr = expr + sep + ensure_DF_expr(col).astype(str) return expr
[docs]def combine( bool_expressions: Iterable[PandasDataframeContext], op: Callable[[Any, Any], Any] = operator.and_, ) -> PandasDataframeContext: """Combine multiple DF-expressions to use in df.loc[]. The ``DF``-expressions must evaluate to a boolean array, e.g.,:: DF["col"] > 1 DF["col"].str.startswith("prefix") DF["col_1"] < DF["col_2"] Parameters ---------- bool_expressions Iterable of ``DF``-expressions that will be combined. op The operator to combine the filters. :func:`operator.and_` and :func:`operator.or_` will be most useful. ``"and"``, ``"&"`` and ``"or"``, ``"|"`` are also accepted and the respective operator is used. Returns ------- The combined expression. """ if isinstance(op, str): if op == "and" or op == "&": op = operator.and_ elif op == "or" or op == "|": op = operator.or_ else: raise ValueError(f"Unsupported operator name: {op!r}") return reduce(op, bool_expressions)
[docs]def build_filter( predicates: Dict[ColSpec, Any], op: BinaryOp = operator.and_ ) -> PandasDataframeContext: """Build a filter expression from column-value pairs :: df.loc[build_filter({"a": "A", "b": "B"}) is equivalent to:: df.loc[ (DF["a"] == "A") & (DF["b"] == "B") ] Parameters ---------- predicates The column-value pairs to filter on. Columns can be either specified as ``str`` or as ``DF``-expressions. Values can be literal values or ``DF``-expressions. op The operator to combine the predicates. :func:`operator.and_` and :func:`operator.or_` will be most useful. ``"and"``, ``"&"`` and ``"or"``, ``"|"`` are also accepted and the respective operator is used. Returns ------- PandasDataframeContext The ``DF``-expression combining the predicates. """ expressions = (ensure_DF_expr(col) == val for col, val in predicates.items()) return combine(expressions, op=op)