blob: 73f97a2ac73701fad37b594c61882a436783a180 [file] [log] [blame]
import importlib
import ray
from hamilton import base, driver, log_setup
from hamilton.experimental import h_ray
if __name__ == "__main__":
log_setup.setup_logging()
ray.init()
module_names = [
"data_loaders", # functions to help load data
"business_logic", # where our important logic lives
]
modules = [importlib.import_module(m) for m in module_names]
initial_columns = { # could load data here via some other means, or delegate to a module as we have done.
# 'signups': pd.Series([1, 10, 50, 100, 200, 400]),
"signups_location": "some_path",
# 'spend': pd.Series([10, 10, 20, 40, 40, 50]),
"spend_location": "some_other_path",
}
rga = h_ray.RayGraphAdapter(result_builder=base.PandasDataFrameResult())
dr = driver.Driver(initial_columns, *modules, adapter=rga) # can pass in multiple modules
# we need to specify what we want in the final dataframe.
output_columns = [
"spend",
"signups",
"avg_3wk_spend",
"spend_per_signup",
"spend_zero_mean_unit_variance",
]
# let's create the dataframe!
df = dr.execute(output_columns)
# To visualize do `pip install "sf-hamilton[visualization]"` if you want these to work
# dr.visualize_execution(output_columns, './my_dag.dot', {})
# dr.display_all_functions('./my_full_dag.dot')
print(df.to_string())
ray.shutdown()