blob: 11dd58445a4147861553d729db0998ea4e2796f3 [file] [log] [blame]
# import importlib
import logging
import sys
import pandas as pd
from hamilton import driver
logging.basicConfig(stream=sys.stdout)
initial_columns = { # load from actuals or wherever -- this is our initial data we use as input.
# Note: these values don't have to be all series, they could be a scalar.
"signups": pd.Series([1, 10, 50, 100, 200, 400]),
"spend": pd.Series([10, 10, 20, 40, 40, 50]),
}
# we need to tell hamilton where to load function definitions from
# programmatic code to load modules:
# module_name = "my_functions"
# my_functions = importlib.import_module(module_name)
# or import module(s) directly:
import my_functions
dr = driver.Driver(initial_columns, my_functions) # can pass in multiple modules
# we need to specify what we want in the final dataframe. These can be string names, or function references.
output_columns = [
"spend",
"signups",
my_functions.avg_3wk_spend, # could just pass "avg_3wk_spend" here
my_functions.spend_per_signup, # could just pass "spend_per_signup" here
my_functions.spend_zero_mean_unit_variance, # could just pass "spend_zero_mean_unit_variance" here
]
# let's create the dataframe!
df = dr.execute(output_columns)
print(df.to_string())
# To visualize do `pip install "sf-hamilton[visualization]"` if you want these to work
dr.visualize_execution(output_columns, "./my_dag.dot", {"format": "png"})
# dr.display_all_functions("./my_full_dag.dot", {"format": "png"})