| from typing import Optional |
| |
| import ibis |
| import ibis.expr.types as ir |
| |
| from hamilton.function_modifiers import extract_columns |
| from hamilton.plugins import ibis_extensions # noqa: F401 |
| |
| |
| # extract specific columns from the table |
| @extract_columns("son", "pet", "month_of_absence") |
| def raw_table(raw_data_path: str) -> ir.Table: |
| """Load the CSV found at `raw_data_path` into a Table expression |
| and format columns to snakecase |
| """ |
| return ibis.read_csv(sources=raw_data_path, table_name="absenteism").rename("snake_case") |
| |
| |
| # accesses a single column from `raw_table` |
| def has_children(son: ir.Column) -> ir.BooleanColumn: |
| """True if someone has any children""" |
| return ibis.ifelse(son > 0, True, False) |
| |
| |
| # narrows the return type from `ir.Column` to `ir.BooleanColumn` |
| def has_pet(pet: ir.Column) -> ir.BooleanColumn: |
| """True if someone has any pets""" |
| return ibis.ifelse(pet > 0, True, False).cast(bool) |
| |
| |
| # typing and docstring provides business context to features |
| def is_summer_brazil(month_of_absence: ir.Column) -> ir.BooleanColumn: |
| """True if it is summer in Brazil during this month |
| |
| People in the northern hemisphere are likely to take vacations |
| to warm places when it's cold locally |
| """ |
| return month_of_absence.isin([1, 2, 12]) |
| |
| |
| def feature_table( |
| raw_table: ir.Table, |
| has_children: ir.BooleanColumn, |
| has_pet: ir.BooleanColumn, |
| is_summer_brazil: ir.BooleanColumn, |
| ) -> ir.Table: |
| """Join computed features to the `raw_data` table""" |
| return raw_table.mutate( |
| has_children=has_children, |
| has_pet=has_pet, |
| is_summer_brazil=is_summer_brazil, |
| ) |
| |
| |
| def feature_set( |
| feature_table: ir.Table, |
| feature_selection: list[str], |
| condition: Optional[ibis.common.deferred.Deferred] = None, |
| ) -> ir.Table: |
| """Select feature columns and filter rows""" |
| return feature_table[feature_selection].filter(condition) |