blob: a75d8ade1cb16c93ec572d01323df932b7ba0e29 [file] [log] [blame]
from typing import Optional
import ibis
import ibis.expr.types as ir
def raw_table(raw_data_path: str) -> ir.Table:
"""Load CSV from `raw_data_path` into a Table expression
and format column names to snakecase
"""
return ibis.read_csv(sources=raw_data_path, table_name="absenteism").rename("snake_case")
def feature_table(raw_table: ir.Table) -> ir.Table:
"""Add to `raw_table` the feature columns `has_children`
`has_pet`, and `is_summer_brazil`
"""
return raw_table.mutate(
has_children=(ibis.ifelse(ibis._.son > 0, True, False)),
has_pet=ibis.ifelse(ibis._.pet > 0, True, False),
is_summer_brazil=ibis._.month_of_absence.isin([1, 2, 12]),
)
def feature_set(
feature_table: ir.Table,
feature_selection: list[str],
condition: Optional[ibis.common.deferred.Deferred] = None,
) -> ir.Table:
"""Select feature columns and filter rows"""
return feature_table[feature_selection].filter(condition)