API Reference
Welcome to the API Reference for polars-expr-hopper!
Since polars-expr-hopper is a single-module project, all functionality is contained in
polars_hopper/__init__.py
. In this reference, you’ll find details on the library’s main
plugin class (HopperPlugin
), which attaches a “hopper” of Polars expressions (pl.Expr
)
to each DataFrame
under the .hopper
namespace. These expressions can be automatically
applied as soon as their required columns appear.
You’ll see method signatures, class methods, and parameters that let you:
- Add expressions (e.g.,
df.hopper.add_filters(pl.col("age") > 18)
)
- Check which columns are missing and skip or apply expressions dynamically
- Optionally serialize expressions to JSON/binary for Parquet round-trip (if needed)
If you’re new to the project, you might start with the main Get Started guide
or the README to see basic usage and installation steps. Then come back here for a thorough
look at the plugin’s APIs.
Polars hopper plugin with both filter and select 'queues'.
Register a ".hopper" namespace on Polars DataFrame objects for managing
a 'hopper' of Polars expressions (e.g. filters, selects). The expressions are
stored as metadata in df.config_meta
. They apply themselves when the
necessary columns exist, removing themselves once used.
HopperPlugin
HopperPlugin(df: DataFrame)
Hopper plugin for storing and applying Polars filter/select expressions.
By calling df.hopper.add_filters(*exprs)
, you add Polars expressions
that should evaluate to a boolean mask (for filtering).
By calling df.hopper.add_selects(*exprs)
, you add Polars expressions
that transform or select columns when calling df.select(expr)
.
Source code in src/polars_hopper/__init__.py
| def __init__(self, df: pl.DataFrame):
"""Ensure required metadata keys exist if not present."""
self._df = df
meta = df.config_meta.get_metadata()
if "hopper_filters" not in meta:
meta["hopper_filters"] = []
if "hopper_selects" not in meta:
meta["hopper_selects"] = []
if "hopper_addcols" not in meta:
meta["hopper_addcols"] = []
df.config_meta.update(meta)
|
add_exprs
add_exprs(*exprs: Expr, kind: Literal['f', 's', 'a']) -> None
Add one or more Polars expressions to the hopper.
We maintain a monotonically increasing hopper_max_idx
and also serialise each
expression to JSON (using expr.meta.serialize(format="json")
) for
JSON-compatibility in expr registry metadata (stored in the
hopper_expr_register
key).
Parameters
kind : {'f', 's', 'a'}
Specifies which list in metadata we update:
- 'f' => hopper_filters
- 's' => hopper_selects
- 'a' => hopper_addcols
exprs : pl.Expr
The actual Polars expressions to add.
Source code in src/polars_hopper/__init__.py
| def add_exprs(self, *exprs: pl.Expr, kind: Literal["f", "s", "a"]) -> None:
"""Add one or more Polars expressions to the hopper.
We maintain a monotonically increasing `hopper_max_idx` and also serialise each
expression to JSON (using ``expr.meta.serialize(format="json")``) for
JSON-compatibility in expr registry metadata (stored in the
`hopper_expr_register` key).
Parameters
----------
kind : {'f', 's', 'a'}
Specifies which list in metadata we update:
- 'f' => hopper_filters
- 's' => hopper_selects
- 'a' => hopper_addcols
exprs : pl.Expr
The actual Polars expressions to add.
"""
if not exprs:
return
meta = self._df.config_meta.get_metadata()
# Ensure the correct list in metadata
hopper_kind_meta_key = {
"f": "hopper_filters",
"s": "hopper_selects",
"a": "hopper_addcols",
}[kind]
# Append expressions to the chosen list
kind_exprs = meta.get(hopper_kind_meta_key, [])
kind_exprs.extend(exprs)
meta[hopper_kind_meta_key] = kind_exprs
# Initialize hopper_max_idx to -1 if not already present
pre_idx = meta.get(hopper_idx_key, -1)
pre_reg = self._read_expr_registry()
# Increment hopper_max_idx for each newly added expression
post_idx = pre_idx + len(exprs)
registrands = [
{
"idx": expr_offset + pre_idx + 1,
"kind": kind,
"expr": expr.meta.serialize(format="json"),
"applied": False,
"root_names": expr.meta.root_names(),
}
for expr_offset, expr in enumerate(exprs)
]
registry = pl.concat(
[pre_reg, pl.DataFrame(registrands, schema=reg_schema)],
)
self._write_expr_registry(registry)
meta[hopper_idx_key] = post_idx
# Write updated metadata back
self._df.config_meta.update(meta)
|
pop_expr_from_registry
pop_expr_from_registry(expr: Expr) -> bool
Remove earliest row from 'hopper_expr_register' that matches given pl.Expr.
Do so by comparing JSON-serialised expressions.
Returns
True if a matching row was found and removed; False if no match was found.
Source code in src/polars_hopper/__init__.py
| def pop_expr_from_registry(self, expr: pl.Expr) -> bool:
"""Remove earliest row from 'hopper_expr_register' that matches given pl.Expr.
Do so by comparing JSON-serialised expressions.
Returns
-------
True if a matching row was found and removed; False if no match was found.
"""
meta = self._df.config_meta.get_metadata()
hopper_reg_key = "hopper_expr_register"
if hopper_reg_key not in meta:
return False # No registry at all => nothing to remove
# 1) Convert the JSON string back into a DataFrame
reg_json = meta[hopper_reg_key].encode()
registry_df = pl.read_json(reg_json, schema=reg_schema)
# 2) Serialize the incoming expr
serialized_expr = expr.meta.serialize(format="json")
# 3) Find all rows whose 'expr' matches, e.g. ignoring 'applied' or 'kind'
# If you want to consider 'kind' or only rows where 'applied'==False,
# you can refine this filter accordingly.
matching = registry_df.filter(pl.col("expr") == serialized_expr).limit(1)
if matching.is_empty():
return False # No match found => do nothing
# 4) Identify the earliest match by lowest idx
# (If you prefer first added or first in insertion order, 'idx' is that.)
earliest_idx = matching["idx"].min()
# 5) Remove that row from the registry
updated_df = registry_df.filter(pl.col("idx") != earliest_idx)
# 6) Write the updated DF back to NDJSON
meta[hopper_reg_key] = updated_df.write_json()
self._df.config_meta.set(**meta)
return True
|
apply_ready_exprs
apply_ready_exprs(*kinds: Literal['f', 's', 'a']) -> pl.DataFrame
Apply any expressions of all kind(s), if the needed columns exist.
- Filters: we pop from the registry if the expression is successfully applied.
- Selects/Addcols: we do NOT remove from the registry (tests expect that logic).
Each expression is tried in turn
- kind == 'f' => df.filter(expr)
- kind == 's' => df.select(expr)
- kind == 'a' => df.with_columns(expr)
If needed columns are missing, that expression remains pending. If we successfully
apply a filter expression (kind='f'), we call pop_expr_from_registry(expr).
The original code never used the registry for selects/addcols, so we skip it there.
Returns
A new (possibly transformed) DataFrame. If it differs from self._df,
polars-config-meta merges metadata automatically.
Source code in src/polars_hopper/__init__.py
| def apply_ready_exprs(self, *kinds: Literal["f", "s", "a"]) -> pl.DataFrame:
"""Apply any expressions of all kind(s), if the needed columns exist.
- Filters: we pop from the registry if the expression is successfully applied.
- Selects/Addcols: we do NOT remove from the registry (tests expect that logic).
Each expression is tried in turn:
- kind == 'f' => df.filter(expr)
- kind == 's' => df.select(expr)
- kind == 'a' => df.with_columns(expr)
If needed columns are missing, that expression remains pending. If we successfully
apply a filter expression (kind='f'), we call pop_expr_from_registry(expr).
The original code never used the registry for selects/addcols, so we skip it there.
Returns
-------
A new (possibly transformed) DataFrame. If it differs from self._df,
polars-config-meta merges metadata automatically.
"""
return self.apply_ready_exprs_kinds("f", "s", "a")
|
apply_ready_exprs_kinds
apply_ready_exprs_kinds(*kinds: Literal['f', 's', 'a']) -> pl.DataFrame
Apply any expressions of the specified kind(s), if the needed columns exist.
Each expression is tried in turn
- kind == 'f' => df.filter(expr)
- kind == 's' => df.select(expr)
- kind == 'a' => df.with_columns(expr)
If needed columns are missing, that expression remains pending. If we successfully
apply an expression, we call pop_expr_from_registry(expr).
Returns
A new (possibly transformed) DataFrame. If it differs from self._df,
polars-config-meta merges metadata automatically.
Source code in src/polars_hopper/__init__.py
| def apply_ready_exprs_kinds(self, *kinds: Literal["f", "s", "a"]) -> pl.DataFrame:
"""Apply any expressions of the specified kind(s), if the needed columns exist.
Each expression is tried in turn:
- kind == 'f' => df.filter(expr)
- kind == 's' => df.select(expr)
- kind == 'a' => df.with_columns(expr)
If needed columns are missing, that expression remains pending. If we successfully
apply an expression, we call pop_expr_from_registry(expr).
Returns
-------
A new (possibly transformed) DataFrame. If it differs from self._df,
polars-config-meta merges metadata automatically.
"""
if not kinds:
raise ValueError(
"No expression kinds specified. Provide at least one of 'f','s','a'.",
)
# We'll apply them in the order the user specified
new_df = self._df
while True:
registry = self._read_expr_registry()
candidates = registry.filter(pl.col("kind").is_in(kinds)).sort("idx")
if candidates.is_empty():
break
# Which metadata key do we read/write (hopper_filters, hopper_selects, or hopper_addcols)?
meta_pre = self._df.config_meta.get_metadata()
still_pending = {k: [] for k in kinds}
changed_any = False
for row in candidates.iter_rows(named=True):
expr_str = row["expr"]
row_kind = row["kind"]
meta_key = meta_key_lookup[row_kind]
current_exprs = meta_pre.get(meta_key, [])
assert current_exprs, f"Registry is inconsistent with {meta_key}"
expr = next(
(
ck
for ck in current_exprs
if ck.meta.serialize(format="json") == expr_str
),
None,
)
assert expr is not None, f"Registry is inconsistent with {meta_key}"
needed_cols = set(expr.meta.root_names())
# We'll track available columns after each expression is applied
avail_cols = set(new_df.collect_schema())
if needed_cols <= avail_cols:
r0 = self._read_expr_registry()
removed = self.pop_expr_from_registry(expr)
if debug:
print(f"Popped {expr}")
assert removed, f"Expr {expr} was not popped from registry"
r1 = self._read_expr_registry()
assert (n_popped := len(r0) - len(r1)) == 1, (
f"Registry popped {n_popped} items"
)
if not removed:
raise ValueError(f"Inconsistent registry: {expr} not found")
# Actually apply the expression
new_df = self._apply_expression(new_df, row_kind, expr)
changed_any = True
# Update available columns in case columns changed
avail_cols = set(new_df.collect_schema())
else:
# Missing columns => keep it pending
if debug:
print(f"Appending {expr} to still_pending {row_kind}")
still_pending[row_kind].append(expr)
# Update old DF's metadata list (filters/selects/addcols)
pending_updates = {meta_key_lookup[k]: p for k, p in still_pending.items()}
meta_pre.update(pending_updates)
self._df.config_meta.update(meta_pre)
if not changed_any:
break
# If new_df is indeed a new object, also update that DF's metadata
if id(new_df) != id(self._df):
self._refresh_expr_registry()
meta_post = new_df.config_meta.get_metadata()
pending_updates = {
meta_key_lookup[k]: p for k, p in still_pending.items()
}
meta_post.update(pending_updates)
fresh_registry = self._df.config_meta.get_metadata()[hopper_reg_key]
meta_post[hopper_reg_key] = fresh_registry
new_df.config_meta.update(meta_post)
return new_df
|
add_filters
add_filters(*exprs: Expr) -> None
Add one or more Polars filter expressions to the hopper.
Each expression is typically used in df.filter(expr)
, returning
a boolean mask. They remain in the queue until the columns they need
are present, at which point they are applied (and removed).
Source code in src/polars_hopper/__init__.py
| def add_filters(self, *exprs: pl.Expr) -> None:
"""Add one or more Polars filter expressions to the hopper.
Each expression is typically used in `df.filter(expr)`, returning
a boolean mask. They remain in the queue until the columns they need
are present, at which point they are applied (and removed).
"""
self.add_exprs(*exprs, kind="f")
|
list_filters
list_filters() -> list[pl.Expr]
Return the list of pending Polars filter expressions.
Source code in src/polars_hopper/__init__.py
| def list_filters(self) -> list[pl.Expr]:
"""Return the list of pending Polars filter expressions."""
return self._df.config_meta.get_metadata().get("hopper_filters", [])
|
apply_ready_filters
apply_ready_filters() -> pl.DataFrame
Apply any stored filter expressions if referenced columns exist.
Each expression is tried in turn with df.filter(expr)
. If missing
columns, that expression remains pending for later.
Returns
A new (possibly filtered) DataFrame. If it differs from self._df,
polars-config-meta merges metadata automatically.
Source code in src/polars_hopper/__init__.py
| def apply_ready_filters(self) -> pl.DataFrame:
"""Apply any stored filter expressions if referenced columns exist.
Each expression is tried in turn with `df.filter(expr)`. If missing
columns, that expression remains pending for later.
Returns
-------
A new (possibly filtered) DataFrame. If it differs from self._df,
polars-config-meta merges metadata automatically.
"""
return self.apply_ready_exprs_kinds("f")
|
add_selects
add_selects(*exprs: Expr) -> None
Add one or more Polars select expressions to the hopper.
These expressions are used in df.select(expr)
. Each expression
typically yields a column transformation, or just a column reference
(like pl.col("foo").alias("bar")
).
Source code in src/polars_hopper/__init__.py
| def add_selects(self, *exprs: pl.Expr) -> None:
"""Add one or more Polars select expressions to the hopper.
These expressions are used in `df.select(expr)`. Each expression
typically yields a column transformation, or just a column reference
(like `pl.col("foo").alias("bar")`).
"""
self.add_exprs(*exprs, kind="s")
|
list_selects
list_selects() -> list[pl.Expr]
Return the list of pending Polars select expressions.
Source code in src/polars_hopper/__init__.py
| def list_selects(self) -> list[pl.Expr]:
"""Return the list of pending Polars select expressions."""
return self._df.config_meta.get_metadata().get("hopper_selects", [])
|
apply_ready_selects
apply_ready_selects() -> pl.DataFrame
Apply any stored select expressions if columns exist.
We attempt each select expression in turn. Because df.select(expr)
replaces the DataFrame columns entirely, you should be aware that
subsequent select expressions apply to the new shape of the DataFrame.
If any required columns are missing, that expression remains pending.
Returns
A new DataFrame with the successfully selected/transformed columns.
Source code in src/polars_hopper/__init__.py
| def apply_ready_selects(self) -> pl.DataFrame:
"""Apply any stored select expressions if columns exist.
We attempt each select expression in turn. Because `df.select(expr)`
replaces the DataFrame columns entirely, you should be aware that
subsequent select expressions apply to the new shape of the DataFrame.
If any required columns are missing, that expression remains pending.
Returns
-------
A new DataFrame with the successfully selected/transformed columns.
"""
return self.apply_ready_exprs_kinds("s")
|
add_addcols
add_addcols(*exprs: Expr) -> None
Add one or more Polars with_columns expressions to the hopper.
These expressions are used in df.with_columns(expr)
. Each expression
typically yields a column addition or overwrite, or just a column reference
(like pl.col("foo").alias("bar")
).
Source code in src/polars_hopper/__init__.py
| def add_addcols(self, *exprs: pl.Expr) -> None:
"""Add one or more Polars with_columns expressions to the hopper.
These expressions are used in `df.with_columns(expr)`. Each expression
typically yields a column addition or overwrite, or just a column reference
(like `pl.col("foo").alias("bar")`).
"""
self.add_exprs(*exprs, kind="a")
|
list_addcols
list_addcols() -> list[pl.Expr]
Return the list of pending Polars with_columns expressions.
Source code in src/polars_hopper/__init__.py
| def list_addcols(self) -> list[pl.Expr]:
"""Return the list of pending Polars with_columns expressions."""
return self._df.config_meta.get_metadata().get("hopper_addcols", [])
|
apply_ready_addcols
apply_ready_addcols() -> pl.DataFrame
Apply any stored with_columns expressions if columns exist.
We attempt each with_columns expression in turn. Because df.with_columns(expr)
adds the DataFrame columns, you should be aware that subsequent select expressions
apply to the new shape of the DataFrame.
If any required columns are missing, that expression remains pending.
Returns
A new DataFrame with the successfully added/overwritten columns.
Source code in src/polars_hopper/__init__.py
| def apply_ready_addcols(self) -> pl.DataFrame:
"""Apply any stored with_columns expressions if columns exist.
We attempt each with_columns expression in turn. Because `df.with_columns(expr)`
adds the DataFrame columns, you should be aware that subsequent select expressions
apply to the new shape of the DataFrame.
If any required columns are missing, that expression remains pending.
Returns
-------
A new DataFrame with the successfully added/overwritten columns.
"""
return self.apply_ready_exprs_kinds("a")
|