blob: 6df203e82cd9d97f4ba478015b555a67c3b462cf [file] [log] [blame]
import pandas as pd
from hamilton.function_modifiers import does, extract_columns, parameterize, source, value
def _sum_series(**series):
return sum(series.values())
@extract_columns(
"facebook_spend",
"twitter_spend",
"tv_spend",
"youtube_spend",
"radio_spend",
"billboards_spend",
"womens_churn",
"mens_churn",
"womens_signups",
"mens_signups",
)
def joined_data(spend: pd.DataFrame, signups: pd.DataFrame, churn: pd.DataFrame) -> pd.DataFrame:
spend = spend.set_index("date").rename(columns=lambda col: col + "_spend")
churn = churn.set_index("date").rename(columns=lambda col: col + "_churn")
signups = signups.set_index("date").rename(columns=lambda col: col + "_signups")
return pd.concat([spend, churn, signups], axis=1)
@does(_sum_series)
def total_marketing_spend(
facebook_spend: pd.Series,
twitter_spend: pd.Series,
tv_spend: pd.Series,
youtube_spend: pd.Series,
radio_spend: pd.Series,
billboards_spend: pd.Series,
) -> pd.Series:
pass
@does(_sum_series)
def total_signups(mens_signups: pd.Series, womens_signups: pd.Series) -> pd.Series:
pass
@does(_sum_series)
def total_churn(mens_churn: pd.Series, womens_churn: pd.Series) -> pd.Series:
pass
def total_customers(total_signups: pd.Series, total_churn: pd.Series) -> pd.Series:
customer_deltas = total_signups + total_churn
return customer_deltas.cumsum()
def acquisition_cost(total_marketing_spend: pd.Series, total_signups: pd.Series) -> pd.Series:
return total_marketing_spend / total_signups
@parameterize(
twitter_spend_smoothed={"lookback_days": value(7), "spend": source("twitter_spend")},
facebook_spend_smoothed={"lookback_days": value(7), "spend": source("facebook_spend")},
radio_spend_smoothed={"lookback_days": value(21), "spend": source("radio_spend")},
tv_spend_smoothed={"lookback_days": value(21), "spend": source("tv_spend")},
billboards_spend_smoothed={"lookback_days": value(7), "spend": source("billboards_spend")},
youtube_spend_smoothed={"lookback_days": value(7), "spend": source("twitter_spend")},
)
def spend_smoothed(lookback_days: int, spend: pd.Series) -> pd.Series:
"""{spend} smoothed by {lookback_days}. Might want to smooth different ad spends differently,
figuring that it takes different amounts of time to get to the customer. A cheap hack at determining
auto-correlation of a series -- this should be a parameter in a model,
but this is to demonstrate the framework
:param lookback_days: Days to smooth over
:param spend: Spend source
:return:
"""
return spend.rolling(window=lookback_days).mean().fillna(0)