| # Cell 1 - import the things you need |
| import logging |
| import sys |
| |
| import numpy as np |
| import pandas as pd |
| from hamilton import driver |
| from hamilton import base |
| |
| logging.basicConfig(stream=sys.stdout) |
| |
| # Cell 2 - import modules to create part of the DAG from |
| import my_functions |
| |
| |
| # Cell 3 - Define your new Hamilton functions & curate them into a TemporaryFunctionModule object. |
| # Look at `my_functions` to see how these functions connect. |
| def signups() -> pd.Series: |
| """Returns sign up values""" |
| return pd.Series([1, 10, 50, 100, 200, 400]) |
| |
| |
| def spend() -> pd.Series: |
| """Returns the spend values""" |
| return pd.Series([10, 10, 20, 40, 40, 50]) |
| |
| |
| def log_spend_per_signup(spend_per_signup: pd.Series) -> pd.Series: |
| """Simple function taking the logarithm of spend over signups.""" |
| return np.log(spend_per_signup) |
| |
| |
| # Place the functions into a curated object -- the idea is that this object should be a curated set of functions. |
| # Don't be afraid to make multiple of them. |
| temp_module = base.TemporaryFunctionModule(spend, signups, log_spend_per_signup) |
| |
| # Cell 4 - Instantiate the Hamilton driver and pass it the right things in. |
| initial_config = {} |
| # we need to tell hamilton where to load function definitions from |
| dr = driver.Driver(initial_config, my_functions, temp_module) # can pass in multiple modules |
| # we need to specify what we want in the final dataframe. |
| output_columns = [ |
| 'spend', |
| 'signups', |
| 'avg_3wk_spend', |
| 'spend_per_signup', |
| 'spend_zero_mean_unit_variance', |
| 'log_spend_per_signup' |
| ] |
| # let's create the dataframe! |
| df = dr.execute(output_columns) |
| print(df.to_string()) |
| |
| # To visualize do `pip install sf-hamilton[visualization]` if you want these to work |
| # dr.visualize_execution(output_columns, './my_dag.dot', {}) |
| # dr.display_all_functions('./my_full_dag.dot') |