blob: da8cefcba30d66830bbe2167e193b3792a2ce39d [file]
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# update the pandas package - optional\n",
"# !pip install --upgrade pandas\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2023-09-17T05:43:33.890006Z",
"start_time": "2023-09-17T05:43:33.866292Z"
}
},
"outputs": [],
"source": [
"import sqlite3\n",
"import sys\n",
"import pandas as pd\n",
"\n",
"# Add the hamilton module to your path - optinal\n",
"# project_dir = \"### ADD PATH HERE ###\"\n",
"# sys.path.append(project_dir)\n",
"\n",
"from hamilton import base, driver\n",
"from hamilton.io.materialization import to"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2023-09-17T05:43:33.890336Z",
"start_time": "2023-09-17T05:43:33.869093Z"
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"# We use the autoreload extension that comes with ipython to automatically reload modules when\n",
"# the code in them changes.\n",
"\n",
"# import the jupyter extension\n",
"%load_ext autoreload\n",
"# set it to only reload the modules imported\n",
"%autoreload 1"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2023-09-17T05:43:33.930823Z",
"start_time": "2023-09-17T05:43:33.875941Z"
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting spend_calculations.py\n"
]
}
],
"source": [
"%%writefile spend_calculations.py\n",
"# Define your new Hamilton functions.\n",
"# The %%writefile magic command creates a new Python module with the functions below.\n",
"# We will import this later and pass it into our Driver.\n",
"\n",
"import pandas as pd\n",
" \n",
"# Look at `my_functions` to see how these functions connect.\n",
"def avg_3wk_spend(spend: pd.Series) -> pd.Series:\n",
" \"\"\"Rolling 3 week average spend.\"\"\"\n",
" return spend.rolling(3).mean()\n",
"\n",
"\n",
"def spend_per_signup(spend: pd.Series, signups: pd.Series) -> pd.Series:\n",
" \"\"\"The cost per signup in relation to spend.\"\"\"\n",
" return spend / signups\n",
"\n",
"\n",
"def spend_mean(spend: pd.Series) -> float:\n",
" \"\"\"Shows function creating a scalar. In this case it computes the mean of the entire column.\"\"\"\n",
" return spend.mean()\n",
"\n",
"\n",
"def spend_zero_mean(spend: pd.Series, spend_mean: float) -> pd.Series:\n",
" \"\"\"Shows function that takes a scalar. In this case to zero mean spend.\"\"\"\n",
" return spend - spend_mean\n",
"\n",
"\n",
"def spend_std_dev(spend: pd.Series) -> float:\n",
" \"\"\"Function that computes the standard deviation of the spend column.\"\"\"\n",
" return spend.std()\n",
"\n",
"\n",
"def spend_zero_mean_unit_variance(spend_zero_mean: pd.Series, spend_std_dev: float) -> pd.Series:\n",
" \"\"\"Function showing one way to make spend have zero mean and unit variance.\"\"\"\n",
" return spend_zero_mean / spend_std_dev"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2023-09-17T05:43:33.931096Z",
"start_time": "2023-09-17T05:43:33.881858Z"
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"initial_columns = { # load from actuals or wherever -- this is our initial data we use as input.\n",
" # Note: these values don't have to be all series, they could be a scalar.\n",
" \"signups\": pd.Series([1, 10, 50, 100, 200, 400]),\n",
" \"spend\": pd.Series([10, 10, 20, 40, 40, 50]),\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2023-09-17T05:43:33.932468Z",
"start_time": "2023-09-17T05:43:33.887774Z"
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Note: Hamilton collects completely anonymous data about usage. This will help us improve Hamilton over time. See https://github.com/dagworks-inc/hamilton#usage-analytics--data-privacy for details.\n"
]
}
],
"source": [
"%aimport spend_calculations\n",
"\n",
"df_builder = base.PandasDataFrameResult()\n",
"dr = driver.Driver({}, spend_calculations) # can pass in multiple modules"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2023-09-17T05:43:33.932900Z",
"start_time": "2023-09-17T05:43:33.892681Z"
}
},
"outputs": [],
"source": [
"# we need to specify what we want in the final dataframe. These can be string names, or function references.\n",
"output_columns = [\n",
" \"spend\",\n",
" \"signups\",\n",
" \"avg_3wk_spend\",\n",
" \"spend_per_signup\",\n",
" \"spend_zero_mean_unit_variance\",\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2023-09-17T05:43:33.933121Z",
"start_time": "2023-09-17T05:43:33.896779Z"
}
},
"outputs": [],
"source": [
"# set up db connection for sql materializer below\n",
"conn = sqlite3.connect(\"df.db\")\n",
"\n",
"# remove an previous instances of the 'test' table that will be created next\n",
"conn.cursor().execute(\"DROP TABLE IF EXISTS test;\")\n",
"conn.commit()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2023-09-17T05:43:34.014270Z",
"start_time": "2023-09-17T05:43:33.912979Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"More than one applicable adapter detected for <class 'pandas.core.frame.DataFrame'>. Using the last one registered <class 'hamilton.plugins.pandas_extensions.PandasPickleWriter'>.\n"
]
},
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 2.43.0 (0)\n",
" -->\n",
"<!-- Title: %3 Pages: 1 -->\n",
"<svg width=\"2561pt\" height=\"404pt\"\n",
" viewBox=\"0.00 0.00 2561.23 404.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 400)\">\n",
"<title>%3</title>\n",
"<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-400 2557.23,-400 2557.23,4 -4,4\"/>\n",
"<!-- df_to_orc_build_result -->\n",
"<g id=\"node1\" class=\"node\">\n",
"<title>df_to_orc_build_result</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"112.44\" cy=\"-90\" rx=\"112.38\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"112.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_orc_build_result</text>\n",
"</g>\n",
"<!-- df_to_orc -->\n",
"<g id=\"node19\" class=\"node\">\n",
"<title>df_to_orc</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"152.94,-36 71.94,-36 71.94,0 152.94,0 152.94,-36\"/>\n",
"<text text-anchor=\"middle\" x=\"112.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_orc</text>\n",
"</g>\n",
"<!-- df_to_orc_build_result&#45;&gt;df_to_orc -->\n",
"<g id=\"edge48\" class=\"edge\">\n",
"<title>df_to_orc_build_result&#45;&gt;df_to_orc</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M112.44,-71.7C112.44,-63.98 112.44,-54.71 112.44,-46.11\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"115.94,-46.1 112.44,-36.1 108.94,-46.1 115.94,-46.1\"/>\n",
"</g>\n",
"<!-- spend_per_signup -->\n",
"<g id=\"node2\" class=\"node\">\n",
"<title>spend_per_signup</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"1087.94,-180 942.94,-180 942.94,-144 1087.94,-144 1087.94,-180\"/>\n",
"<text text-anchor=\"middle\" x=\"1015.44\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">spend_per_signup</text>\n",
"</g>\n",
"<!-- spend_per_signup&#45;&gt;df_to_orc_build_result -->\n",
"<g id=\"edge4\" class=\"edge\">\n",
"<title>spend_per_signup&#45;&gt;df_to_orc_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M942.79,-157.82C804.46,-151.39 493.81,-135.08 233.44,-108 222.94,-106.91 211.95,-105.6 201.06,-104.2\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"201.47,-100.72 191.1,-102.89 200.55,-107.66 201.47,-100.72\"/>\n",
"</g>\n",
"<!-- df_to_html_build_result -->\n",
"<g id=\"node4\" class=\"node\">\n",
"<title>df_to_html_build_result</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"1114.44\" cy=\"-90\" rx=\"118.88\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"1114.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_html_build_result</text>\n",
"</g>\n",
"<!-- spend_per_signup&#45;&gt;df_to_html_build_result -->\n",
"<g id=\"edge11\" class=\"edge\">\n",
"<title>spend_per_signup&#45;&gt;df_to_html_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1039.66,-143.88C1052.55,-134.76 1068.58,-123.43 1082.48,-113.6\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1084.67,-116.34 1090.81,-107.71 1080.63,-110.62 1084.67,-116.34\"/>\n",
"</g>\n",
"<!-- df_to_stata_build_result -->\n",
"<g id=\"node8\" class=\"node\">\n",
"<title>df_to_stata_build_result</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"363.44\" cy=\"-90\" rx=\"120.78\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"363.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_stata_build_result</text>\n",
"</g>\n",
"<!-- spend_per_signup&#45;&gt;df_to_stata_build_result -->\n",
"<g id=\"edge19\" class=\"edge\">\n",
"<title>spend_per_signup&#45;&gt;df_to_stata_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M942.74,-154.29C841.82,-144.81 653.59,-126.6 493.44,-108 482.51,-106.73 471.07,-105.33 459.71,-103.91\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"460.12,-100.43 449.76,-102.64 459.24,-107.37 460.12,-100.43\"/>\n",
"</g>\n",
"<!-- df_to_json_build_result -->\n",
"<g id=\"node13\" class=\"node\">\n",
"<title>df_to_json_build_result</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"861.44\" cy=\"-90\" rx=\"116.18\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"861.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_json_build_result</text>\n",
"</g>\n",
"<!-- spend_per_signup&#45;&gt;df_to_json_build_result -->\n",
"<g id=\"edge28\" class=\"edge\">\n",
"<title>spend_per_signup&#45;&gt;df_to_json_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M977.77,-143.88C956.26,-134.1 929.16,-121.78 906.53,-111.5\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"907.69,-108.18 897.14,-107.23 904.79,-114.55 907.69,-108.18\"/>\n",
"</g>\n",
"<!-- df_to_csv_build_result -->\n",
"<g id=\"node15\" class=\"node\">\n",
"<title>df_to_csv_build_result</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"614.44\" cy=\"-90\" rx=\"112.38\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"614.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_csv_build_result</text>\n",
"</g>\n",
"<!-- spend_per_signup&#45;&gt;df_to_csv_build_result -->\n",
"<g id=\"edge35\" class=\"edge\">\n",
"<title>spend_per_signup&#45;&gt;df_to_csv_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M942.82,-148.32C873.74,-136.26 769.88,-118.13 697.53,-105.5\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"697.99,-102.03 687.53,-103.76 696.78,-108.93 697.99,-102.03\"/>\n",
"</g>\n",
"<!-- df_to_feather_build_result -->\n",
"<g id=\"node16\" class=\"node\">\n",
"<title>df_to_feather_build_result</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"1382.44\" cy=\"-90\" rx=\"131.08\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"1382.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_feather_build_result</text>\n",
"</g>\n",
"<!-- spend_per_signup&#45;&gt;df_to_feather_build_result -->\n",
"<g id=\"edge40\" class=\"edge\">\n",
"<title>spend_per_signup&#45;&gt;df_to_feather_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1087.95,-147.17C1148.72,-135.58 1235.52,-119.02 1299.14,-106.89\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1299.97,-110.29 1309.14,-104.98 1298.66,-103.42 1299.97,-110.29\"/>\n",
"</g>\n",
"<!-- df_to_sql_build_result -->\n",
"<g id=\"node17\" class=\"node\">\n",
"<title>df_to_sql_build_result</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"1642.44\" cy=\"-90\" rx=\"111.28\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"1642.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_sql_build_result</text>\n",
"</g>\n",
"<!-- spend_per_signup&#45;&gt;df_to_sql_build_result -->\n",
"<g id=\"edge45\" class=\"edge\">\n",
"<title>spend_per_signup&#45;&gt;df_to_sql_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1088.19,-153.1C1114.99,-150.21 1145.59,-146.93 1173.44,-144 1328.52,-127.66 1367.66,-126.94 1522.44,-108 1532.31,-106.79 1542.62,-105.45 1552.88,-104.07\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1553.67,-107.49 1563.1,-102.67 1552.72,-100.56 1553.67,-107.49\"/>\n",
"</g>\n",
"<!-- df_to_parquet_build_result -->\n",
"<g id=\"node20\" class=\"node\">\n",
"<title>df_to_parquet_build_result</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"1905.44\" cy=\"-90\" rx=\"133.78\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"1905.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_parquet_build_result</text>\n",
"</g>\n",
"<!-- spend_per_signup&#45;&gt;df_to_parquet_build_result -->\n",
"<g id=\"edge52\" class=\"edge\">\n",
"<title>spend_per_signup&#45;&gt;df_to_parquet_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1088.13,-152.47C1114.92,-149.51 1145.54,-146.35 1173.44,-144 1434.78,-121.97 1501.23,-131.49 1762.44,-108 1775.14,-106.86 1788.45,-105.5 1801.64,-104.05\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1802.13,-107.51 1811.68,-102.93 1801.35,-100.56 1802.13,-107.51\"/>\n",
"</g>\n",
"<!-- df_to_xml_build_result -->\n",
"<g id=\"node22\" class=\"node\">\n",
"<title>df_to_xml_build_result</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"2171.44\" cy=\"-90\" rx=\"114.28\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"2171.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_xml_build_result</text>\n",
"</g>\n",
"<!-- spend_per_signup&#45;&gt;df_to_xml_build_result -->\n",
"<g id=\"edge59\" class=\"edge\">\n",
"<title>spend_per_signup&#45;&gt;df_to_xml_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1088.11,-152.16C1114.9,-149.18 1145.51,-146.07 1173.44,-144 1561.59,-115.17 1660.77,-142.68 2048.44,-108 2059.48,-107.01 2071.05,-105.73 2082.5,-104.32\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"2083.02,-107.79 2092.5,-103.05 2082.14,-100.84 2083.02,-107.79\"/>\n",
"</g>\n",
"<!-- df_to_pickle_build_result -->\n",
"<g id=\"node23\" class=\"node\">\n",
"<title>df_to_pickle_build_result</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"2428.44\" cy=\"-90\" rx=\"124.58\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"2428.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_pickle_build_result</text>\n",
"</g>\n",
"<!-- spend_per_signup&#45;&gt;df_to_pickle_build_result -->\n",
"<g id=\"edge64\" class=\"edge\">\n",
"<title>spend_per_signup&#45;&gt;df_to_pickle_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1088.1,-152.03C1114.88,-149.02 1145.51,-145.95 1173.44,-144 1670.71,-109.31 1797.52,-147.41 2294.44,-108 2306.71,-107.03 2319.57,-105.74 2332.28,-104.31\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"2332.89,-107.76 2342.42,-103.13 2332.09,-100.8 2332.89,-107.76\"/>\n",
"</g>\n",
"<!-- spend -->\n",
"<g id=\"node3\" class=\"node\">\n",
"<title>spend</title>\n",
"<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"1356.94,-396 1249.94,-396 1249.94,-360 1356.94,-360 1356.94,-396\"/>\n",
"<text text-anchor=\"middle\" x=\"1303.44\" y=\"-374.3\" font-family=\"Times,serif\" font-size=\"14.00\">Input: spend</text>\n",
"</g>\n",
"<!-- spend&#45;&gt;df_to_orc_build_result -->\n",
"<g id=\"edge1\" class=\"edge\">\n",
"<title>spend&#45;&gt;df_to_orc_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1249.91,-375.91C1153.24,-373 943.56,-362.41 771.44,-324 533.41,-270.88 262.96,-157.56 157.7,-111.3\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"158.89,-108 148.33,-107.17 156.06,-114.41 158.89,-108\"/>\n",
"</g>\n",
"<!-- spend&#45;&gt;spend_per_signup -->\n",
"<g id=\"edge6\" class=\"edge\">\n",
"<title>spend&#45;&gt;spend_per_signup</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1249.86,-365.81C1189.83,-350.48 1093.82,-317.09 1039.44,-252 1024.96,-234.67 1019.15,-209.41 1016.86,-190.22\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1020.31,-189.63 1015.89,-180.01 1013.35,-190.29 1020.31,-189.63\"/>\n",
"</g>\n",
"<!-- spend&#45;&gt;df_to_html_build_result -->\n",
"<g id=\"edge8\" class=\"edge\">\n",
"<title>spend&#45;&gt;df_to_html_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1249.5,-372.88C1201.49,-367.3 1130.68,-354.28 1076.44,-324 997.68,-280.04 967.81,-263.39 933.44,-180 927.34,-165.21 923.85,-156.81 933.44,-144 945.5,-127.9 987.68,-115.05 1028.29,-106.03\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1029.24,-109.41 1038.28,-103.88 1027.77,-102.56 1029.24,-109.41\"/>\n",
"</g>\n",
"<!-- spend&#45;&gt;df_to_stata_build_result -->\n",
"<g id=\"edge16\" class=\"edge\">\n",
"<title>spend&#45;&gt;df_to_stata_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1249.81,-375.35C1168.66,-371.73 1010.47,-360.24 881.44,-324 692.48,-270.93 484.22,-159.53 400.9,-112.59\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"402.49,-109.47 392.06,-107.59 399.04,-115.56 402.49,-109.47\"/>\n",
"</g>\n",
"<!-- avg_3wk_spend -->\n",
"<g id=\"node10\" class=\"node\">\n",
"<title>avg_3wk_spend</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"1310.44,-180 1182.44,-180 1182.44,-144 1310.44,-144 1310.44,-180\"/>\n",
"<text text-anchor=\"middle\" x=\"1246.44\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">avg_3wk_spend</text>\n",
"</g>\n",
"<!-- spend&#45;&gt;avg_3wk_spend -->\n",
"<g id=\"edge22\" class=\"edge\">\n",
"<title>spend&#45;&gt;avg_3wk_spend</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1256.4,-359.97C1240.49,-351.7 1224.48,-339.99 1215.44,-324 1191.22,-281.14 1213.87,-222.72 1231.12,-189.32\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1234.37,-190.66 1236.02,-180.2 1228.2,-187.35 1234.37,-190.66\"/>\n",
"</g>\n",
"<!-- spend&#45;&gt;df_to_json_build_result -->\n",
"<g id=\"edge25\" class=\"edge\">\n",
"<title>spend&#45;&gt;df_to_json_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1249.82,-377.38C1188.67,-375.6 1087.47,-365.8 1012.44,-324 939.91,-283.59 932.83,-253.09 893.44,-180 882.71,-160.09 874.26,-135.84 868.75,-117.69\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"872.09,-116.65 865.92,-108.04 865.37,-118.62 872.09,-116.65\"/>\n",
"</g>\n",
"<!-- spend&#45;&gt;df_to_csv_build_result -->\n",
"<g id=\"edge32\" class=\"edge\">\n",
"<title>spend&#45;&gt;df_to_csv_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1249.84,-376.01C1180.35,-373.13 1056.46,-362.46 958.44,-324 826.87,-272.38 695.18,-163.28 640.63,-114.93\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"642.64,-112.03 632.85,-107.98 637.98,-117.25 642.64,-112.03\"/>\n",
"</g>\n",
"<!-- spend&#45;&gt;df_to_feather_build_result -->\n",
"<g id=\"edge37\" class=\"edge\">\n",
"<title>spend&#45;&gt;df_to_feather_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1357.12,-373.16C1424.59,-367.16 1534.94,-353.11 1562.44,-324 1617.75,-265.45 1632.6,-207.8 1583.44,-144 1570.16,-126.76 1524.76,-114 1480.47,-105.32\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1480.93,-101.85 1470.45,-103.42 1479.63,-108.73 1480.93,-101.85\"/>\n",
"</g>\n",
"<!-- spend&#45;&gt;df_to_sql_build_result -->\n",
"<g id=\"edge42\" class=\"edge\">\n",
"<title>spend&#45;&gt;df_to_sql_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1357.18,-372.9C1429.91,-366.39 1554.71,-351.51 1590.44,-324 1653.47,-275.48 1651.22,-168.76 1646.14,-118.37\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1649.6,-117.78 1645.01,-108.23 1642.64,-118.56 1649.6,-117.78\"/>\n",
"</g>\n",
"<!-- spend&#45;&gt;df_to_parquet_build_result -->\n",
"<g id=\"edge49\" class=\"edge\">\n",
"<title>spend&#45;&gt;df_to_parquet_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1357.11,-376.22C1421.81,-373.45 1532.52,-362.81 1618.44,-324 1732.57,-272.44 1839.46,-164.1 1883.84,-115.5\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1886.55,-117.73 1890.66,-107.96 1881.36,-113.03 1886.55,-117.73\"/>\n",
"</g>\n",
"<!-- spend_zero_mean -->\n",
"<g id=\"node21\" class=\"node\">\n",
"<title>spend_zero_mean</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"1468.44\" cy=\"-234\" rx=\"92.88\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"1468.44\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">spend_zero_mean</text>\n",
"</g>\n",
"<!-- spend&#45;&gt;spend_zero_mean -->\n",
"<g id=\"edge54\" class=\"edge\">\n",
"<title>spend&#45;&gt;spend_zero_mean</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1357.02,-372.93C1420.98,-366.84 1522.25,-352.82 1545.44,-324 1555.47,-311.53 1552.88,-302.16 1545.44,-288 1538.45,-274.69 1526.43,-264.08 1513.95,-255.95\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1515.65,-252.89 1505.29,-250.71 1512.03,-258.88 1515.65,-252.89\"/>\n",
"</g>\n",
"<!-- spend&#45;&gt;df_to_xml_build_result -->\n",
"<g id=\"edge56\" class=\"edge\">\n",
"<title>spend&#45;&gt;df_to_xml_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1357.23,-373.85C1428.5,-368.61 1557.59,-355.37 1663.44,-324 1848.19,-269.25 2052.02,-159.22 2134.14,-112.63\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"2136.12,-115.53 2143.08,-107.54 2132.66,-109.45 2136.12,-115.53\"/>\n",
"</g>\n",
"<!-- spend&#45;&gt;df_to_pickle_build_result -->\n",
"<g id=\"edge61\" class=\"edge\">\n",
"<title>spend&#45;&gt;df_to_pickle_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1357.16,-373.74C1438.52,-368.03 1597.51,-353.88 1729.44,-324 1979.77,-267.29 2267.11,-156.34 2379.65,-111.01\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"2381.08,-114.21 2389.04,-107.22 2378.46,-107.72 2381.08,-114.21\"/>\n",
"</g>\n",
"<!-- spend_mean -->\n",
"<g id=\"node26\" class=\"node\">\n",
"<title>spend_mean</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"1468.44\" cy=\"-306\" rx=\"68.49\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"1468.44\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">spend_mean</text>\n",
"</g>\n",
"<!-- spend&#45;&gt;spend_mean -->\n",
"<g id=\"edge67\" class=\"edge\">\n",
"<title>spend&#45;&gt;spend_mean</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1343.8,-359.88C1368.34,-349.47 1399.67,-336.18 1424.78,-325.52\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1426.23,-328.71 1434.07,-321.58 1423.5,-322.27 1426.23,-328.71\"/>\n",
"</g>\n",
"<!-- spend_std_dev -->\n",
"<g id=\"node27\" class=\"node\">\n",
"<title>spend_std_dev</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"1303.44\" cy=\"-306\" rx=\"78.79\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"1303.44\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">spend_std_dev</text>\n",
"</g>\n",
"<!-- spend&#45;&gt;spend_std_dev -->\n",
"<g id=\"edge68\" class=\"edge\">\n",
"<title>spend&#45;&gt;spend_std_dev</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1303.44,-359.7C1303.44,-351.98 1303.44,-342.71 1303.44,-334.11\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1306.94,-334.1 1303.44,-324.1 1299.94,-334.1 1306.94,-334.1\"/>\n",
"</g>\n",
"<!-- df_to_html -->\n",
"<g id=\"node6\" class=\"node\">\n",
"<title>df_to_html</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"1160.44,-36 1068.44,-36 1068.44,0 1160.44,0 1160.44,-36\"/>\n",
"<text text-anchor=\"middle\" x=\"1114.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_html</text>\n",
"</g>\n",
"<!-- df_to_html_build_result&#45;&gt;df_to_html -->\n",
"<g id=\"edge14\" class=\"edge\">\n",
"<title>df_to_html_build_result&#45;&gt;df_to_html</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1114.44,-71.7C1114.44,-63.98 1114.44,-54.71 1114.44,-46.11\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1117.94,-46.1 1114.44,-36.1 1110.94,-46.1 1117.94,-46.1\"/>\n",
"</g>\n",
"<!-- df_to_sql -->\n",
"<g id=\"node5\" class=\"node\">\n",
"<title>df_to_sql</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"1682.44,-36 1602.44,-36 1602.44,0 1682.44,0 1682.44,-36\"/>\n",
"<text text-anchor=\"middle\" x=\"1642.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_sql</text>\n",
"</g>\n",
"<!-- df_to_feather -->\n",
"<g id=\"node7\" class=\"node\">\n",
"<title>df_to_feather</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"1437.44,-36 1327.44,-36 1327.44,0 1437.44,0 1437.44,-36\"/>\n",
"<text text-anchor=\"middle\" x=\"1382.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_feather</text>\n",
"</g>\n",
"<!-- df_to_stata -->\n",
"<g id=\"node25\" class=\"node\">\n",
"<title>df_to_stata</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"410.94,-36 315.94,-36 315.94,0 410.94,0 410.94,-36\"/>\n",
"<text text-anchor=\"middle\" x=\"363.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_stata</text>\n",
"</g>\n",
"<!-- df_to_stata_build_result&#45;&gt;df_to_stata -->\n",
"<g id=\"edge66\" class=\"edge\">\n",
"<title>df_to_stata_build_result&#45;&gt;df_to_stata</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M363.44,-71.7C363.44,-63.98 363.44,-54.71 363.44,-46.11\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"366.94,-46.1 363.44,-36.1 359.94,-46.1 366.94,-46.1\"/>\n",
"</g>\n",
"<!-- df_to_csv -->\n",
"<g id=\"node9\" class=\"node\">\n",
"<title>df_to_csv</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"655.44,-36 573.44,-36 573.44,0 655.44,0 655.44,-36\"/>\n",
"<text text-anchor=\"middle\" x=\"614.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_csv</text>\n",
"</g>\n",
"<!-- avg_3wk_spend&#45;&gt;df_to_orc_build_result -->\n",
"<g id=\"edge3\" class=\"edge\">\n",
"<title>avg_3wk_spend&#45;&gt;df_to_orc_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1182.3,-152.71C1155.77,-149.55 1124.66,-146.18 1096.44,-144 713.7,-114.37 615.75,-142.75 233.44,-108 222.68,-107.02 211.42,-105.76 200.28,-104.37\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"200.46,-100.86 190.09,-103.06 199.56,-107.81 200.46,-100.86\"/>\n",
"</g>\n",
"<!-- avg_3wk_spend&#45;&gt;df_to_html_build_result -->\n",
"<g id=\"edge10\" class=\"edge\">\n",
"<title>avg_3wk_spend&#45;&gt;df_to_html_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1214.15,-143.88C1196.18,-134.35 1173.65,-122.4 1154.55,-112.27\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1155.96,-109.06 1145.49,-107.47 1152.68,-115.24 1155.96,-109.06\"/>\n",
"</g>\n",
"<!-- avg_3wk_spend&#45;&gt;df_to_stata_build_result -->\n",
"<g id=\"edge18\" class=\"edge\">\n",
"<title>avg_3wk_spend&#45;&gt;df_to_stata_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1182.28,-152.97C1155.74,-149.84 1124.64,-146.44 1096.44,-144 828.96,-120.87 760.71,-133.42 493.44,-108 481.93,-106.91 469.88,-105.58 457.94,-104.15\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"458.31,-100.67 447.96,-102.93 457.46,-107.62 458.31,-100.67\"/>\n",
"</g>\n",
"<!-- avg_3wk_spend&#45;&gt;df_to_json_build_result -->\n",
"<g id=\"edge27\" class=\"edge\">\n",
"<title>avg_3wk_spend&#45;&gt;df_to_json_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1182.43,-149.36C1116.98,-137.46 1015.06,-118.93 943.67,-105.95\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"944.27,-102.5 933.8,-104.16 943.02,-109.39 944.27,-102.5\"/>\n",
"</g>\n",
"<!-- avg_3wk_spend&#45;&gt;df_to_csv_build_result -->\n",
"<g id=\"edge34\" class=\"edge\">\n",
"<title>avg_3wk_spend&#45;&gt;df_to_csv_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1182.23,-153.53C1155.68,-150.5 1124.59,-147 1096.44,-144 936.55,-126.95 896.1,-127.1 736.44,-108 726.22,-106.78 715.53,-105.41 704.92,-104.01\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"705.15,-100.51 694.77,-102.64 704.22,-107.44 705.15,-100.51\"/>\n",
"</g>\n",
"<!-- avg_3wk_spend&#45;&gt;df_to_feather_build_result -->\n",
"<g id=\"edge39\" class=\"edge\">\n",
"<title>avg_3wk_spend&#45;&gt;df_to_feather_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1279.71,-143.88C1298.31,-134.3 1321.65,-122.29 1341.38,-112.13\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1343.16,-115.15 1350.45,-107.47 1339.96,-108.93 1343.16,-115.15\"/>\n",
"</g>\n",
"<!-- avg_3wk_spend&#45;&gt;df_to_sql_build_result -->\n",
"<g id=\"edge44\" class=\"edge\">\n",
"<title>avg_3wk_spend&#45;&gt;df_to_sql_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1310.48,-145.83C1313.51,-145.2 1316.5,-144.58 1319.44,-144 1397.75,-128.47 1487.38,-114.03 1552.33,-104.15\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1553.17,-107.56 1562.53,-102.61 1552.12,-100.64 1553.17,-107.56\"/>\n",
"</g>\n",
"<!-- avg_3wk_spend&#45;&gt;df_to_parquet_build_result -->\n",
"<g id=\"edge51\" class=\"edge\">\n",
"<title>avg_3wk_spend&#45;&gt;df_to_parquet_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1310.83,-145.46C1313.73,-144.93 1316.61,-144.44 1319.44,-144 1514.61,-113.52 1565.88,-127.66 1762.44,-108 1774.81,-106.76 1787.76,-105.37 1800.61,-103.92\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1801.34,-107.36 1810.88,-102.74 1800.55,-100.4 1801.34,-107.36\"/>\n",
"</g>\n",
"<!-- avg_3wk_spend&#45;&gt;df_to_xml_build_result -->\n",
"<g id=\"edge58\" class=\"edge\">\n",
"<title>avg_3wk_spend&#45;&gt;df_to_xml_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1310.81,-145.35C1313.72,-144.85 1316.6,-144.4 1319.44,-144 1640.7,-99.03 1725.46,-138.24 2048.44,-108 2059.4,-106.97 2070.87,-105.68 2082.23,-104.27\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"2082.68,-107.74 2092.16,-103 2081.8,-100.8 2082.68,-107.74\"/>\n",
"</g>\n",
"<!-- avg_3wk_spend&#45;&gt;df_to_pickle_build_result -->\n",
"<g id=\"edge63\" class=\"edge\">\n",
"<title>avg_3wk_spend&#45;&gt;df_to_pickle_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1310.8,-145.31C1313.71,-144.82 1316.6,-144.38 1319.44,-144 1749.25,-86.56 1862.25,-143.32 2294.44,-108 2306.7,-107 2319.57,-105.69 2332.28,-104.25\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"2332.89,-107.7 2342.42,-103.07 2332.08,-100.75 2332.89,-107.7\"/>\n",
"</g>\n",
"<!-- df_to_json -->\n",
"<g id=\"node11\" class=\"node\">\n",
"<title>df_to_json</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"905.44,-36 817.44,-36 817.44,0 905.44,0 905.44,-36\"/>\n",
"<text text-anchor=\"middle\" x=\"861.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_json</text>\n",
"</g>\n",
"<!-- df_to_parquet -->\n",
"<g id=\"node12\" class=\"node\">\n",
"<title>df_to_parquet</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"1962.94,-36 1847.94,-36 1847.94,0 1962.94,0 1962.94,-36\"/>\n",
"<text text-anchor=\"middle\" x=\"1905.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_parquet</text>\n",
"</g>\n",
"<!-- df_to_json_build_result&#45;&gt;df_to_json -->\n",
"<g id=\"edge23\" class=\"edge\">\n",
"<title>df_to_json_build_result&#45;&gt;df_to_json</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M861.44,-71.7C861.44,-63.98 861.44,-54.71 861.44,-46.11\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"864.94,-46.1 861.44,-36.1 857.94,-46.1 864.94,-46.1\"/>\n",
"</g>\n",
"<!-- spend_zero_mean_unit_variance -->\n",
"<g id=\"node14\" class=\"node\">\n",
"<title>spend_zero_mean_unit_variance</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"1573.94,-180 1328.94,-180 1328.94,-144 1573.94,-144 1573.94,-180\"/>\n",
"<text text-anchor=\"middle\" x=\"1451.44\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">spend_zero_mean_unit_variance</text>\n",
"</g>\n",
"<!-- spend_zero_mean_unit_variance&#45;&gt;df_to_orc_build_result -->\n",
"<g id=\"edge5\" class=\"edge\">\n",
"<title>spend_zero_mean_unit_variance&#45;&gt;df_to_orc_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1328.87,-144.81C1325.7,-144.52 1322.55,-144.25 1319.44,-144 838.08,-105.11 714.57,-149.65 233.44,-108 222.6,-107.06 211.24,-105.82 200.02,-104.43\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"200.12,-100.91 189.76,-103.11 199.23,-107.85 200.12,-100.91\"/>\n",
"</g>\n",
"<!-- spend_zero_mean_unit_variance&#45;&gt;df_to_html_build_result -->\n",
"<g id=\"edge12\" class=\"edge\">\n",
"<title>spend_zero_mean_unit_variance&#45;&gt;df_to_html_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1369.43,-143.97C1315.25,-132.71 1244.71,-118.06 1191.56,-107.02\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1192.01,-103.54 1181.51,-104.93 1190.58,-110.39 1192.01,-103.54\"/>\n",
"</g>\n",
"<!-- spend_zero_mean_unit_variance&#45;&gt;df_to_stata_build_result -->\n",
"<g id=\"edge20\" class=\"edge\">\n",
"<title>spend_zero_mean_unit_variance&#45;&gt;df_to_stata_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1328.87,-144.86C1325.69,-144.55 1322.55,-144.27 1319.44,-144 953.33,-112.5 859.52,-139.85 493.44,-108 481.75,-106.98 469.49,-105.69 457.37,-104.26\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"457.59,-100.76 447.24,-103.04 456.75,-107.71 457.59,-100.76\"/>\n",
"</g>\n",
"<!-- spend_zero_mean_unit_variance&#45;&gt;df_to_json_build_result -->\n",
"<g id=\"edge29\" class=\"edge\">\n",
"<title>spend_zero_mean_unit_variance&#45;&gt;df_to_json_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1328.84,-145.12C1325.68,-144.74 1322.54,-144.37 1319.44,-144 1171.61,-126.49 1134.21,-126.03 986.44,-108 976.23,-106.75 965.55,-105.39 954.93,-104\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"955.15,-100.5 944.77,-102.66 954.23,-107.44 955.15,-100.5\"/>\n",
"</g>\n",
"<!-- spend_zero_mean_unit_variance&#45;&gt;df_to_csv_build_result -->\n",
"<g id=\"edge36\" class=\"edge\">\n",
"<title>spend_zero_mean_unit_variance&#45;&gt;df_to_csv_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1328.86,-144.93C1325.69,-144.61 1322.55,-144.3 1319.44,-144 1061,-119.41 994.75,-133.93 736.44,-108 725.77,-106.93 714.6,-105.62 703.53,-104.22\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"703.78,-100.72 693.41,-102.91 702.88,-107.67 703.78,-100.72\"/>\n",
"</g>\n",
"<!-- spend_zero_mean_unit_variance&#45;&gt;df_to_feather_build_result -->\n",
"<g id=\"edge41\" class=\"edge\">\n",
"<title>spend_zero_mean_unit_variance&#45;&gt;df_to_feather_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1434.38,-143.7C1425.94,-135.14 1415.62,-124.66 1406.4,-115.3\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1408.81,-112.77 1399.3,-108.1 1403.83,-117.68 1408.81,-112.77\"/>\n",
"</g>\n",
"<!-- spend_zero_mean_unit_variance&#45;&gt;df_to_sql_build_result -->\n",
"<g id=\"edge46\" class=\"edge\">\n",
"<title>spend_zero_mean_unit_variance&#45;&gt;df_to_sql_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1498.16,-143.88C1526.05,-133.66 1561.51,-120.66 1590.3,-110.11\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1591.6,-113.36 1599.79,-106.63 1589.19,-106.79 1591.6,-113.36\"/>\n",
"</g>\n",
"<!-- spend_zero_mean_unit_variance&#45;&gt;df_to_parquet_build_result -->\n",
"<g id=\"edge53\" class=\"edge\">\n",
"<title>spend_zero_mean_unit_variance&#45;&gt;df_to_parquet_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1561.92,-143.97C1638.13,-132.22 1738.37,-116.76 1810.89,-105.58\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1811.58,-109.01 1820.93,-104.03 1810.51,-102.1 1811.58,-109.01\"/>\n",
"</g>\n",
"<!-- spend_zero_mean_unit_variance&#45;&gt;df_to_xml_build_result -->\n",
"<g id=\"edge60\" class=\"edge\">\n",
"<title>spend_zero_mean_unit_variance&#45;&gt;df_to_xml_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1573.96,-151.88C1694.76,-142.48 1884.57,-126.59 2048.44,-108 2058.75,-106.83 2069.53,-105.5 2080.23,-104.11\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"2081,-107.54 2090.46,-102.76 2080.09,-100.6 2081,-107.54\"/>\n",
"</g>\n",
"<!-- spend_zero_mean_unit_variance&#45;&gt;df_to_pickle_build_result -->\n",
"<g id=\"edge65\" class=\"edge\">\n",
"<title>spend_zero_mean_unit_variance&#45;&gt;df_to_pickle_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1574.14,-155.73C1738.41,-148.11 2038.78,-132.23 2294.44,-108 2306.2,-106.89 2318.53,-105.55 2330.74,-104.13\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"2331.43,-107.57 2340.95,-102.91 2330.61,-100.62 2331.43,-107.57\"/>\n",
"</g>\n",
"<!-- df_to_csv_build_result&#45;&gt;df_to_csv -->\n",
"<g id=\"edge21\" class=\"edge\">\n",
"<title>df_to_csv_build_result&#45;&gt;df_to_csv</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M614.44,-71.7C614.44,-63.98 614.44,-54.71 614.44,-46.11\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"617.94,-46.1 614.44,-36.1 610.94,-46.1 617.94,-46.1\"/>\n",
"</g>\n",
"<!-- df_to_feather_build_result&#45;&gt;df_to_feather -->\n",
"<g id=\"edge15\" class=\"edge\">\n",
"<title>df_to_feather_build_result&#45;&gt;df_to_feather</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1382.44,-71.7C1382.44,-63.98 1382.44,-54.71 1382.44,-46.11\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1385.94,-46.1 1382.44,-36.1 1378.94,-46.1 1385.94,-46.1\"/>\n",
"</g>\n",
"<!-- df_to_sql_build_result&#45;&gt;df_to_sql -->\n",
"<g id=\"edge13\" class=\"edge\">\n",
"<title>df_to_sql_build_result&#45;&gt;df_to_sql</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1642.44,-71.7C1642.44,-63.98 1642.44,-54.71 1642.44,-46.11\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1645.94,-46.1 1642.44,-36.1 1638.94,-46.1 1645.94,-46.1\"/>\n",
"</g>\n",
"<!-- df_to_pickle -->\n",
"<g id=\"node18\" class=\"node\">\n",
"<title>df_to_pickle</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"2478.94,-36 2377.94,-36 2377.94,0 2478.94,0 2478.94,-36\"/>\n",
"<text text-anchor=\"middle\" x=\"2428.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_pickle</text>\n",
"</g>\n",
"<!-- df_to_parquet_build_result&#45;&gt;df_to_parquet -->\n",
"<g id=\"edge24\" class=\"edge\">\n",
"<title>df_to_parquet_build_result&#45;&gt;df_to_parquet</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1905.44,-71.7C1905.44,-63.98 1905.44,-54.71 1905.44,-46.11\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1908.94,-46.1 1905.44,-36.1 1901.94,-46.1 1908.94,-46.1\"/>\n",
"</g>\n",
"<!-- spend_zero_mean&#45;&gt;spend_zero_mean_unit_variance -->\n",
"<g id=\"edge30\" class=\"edge\">\n",
"<title>spend_zero_mean&#45;&gt;spend_zero_mean_unit_variance</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1464.24,-215.7C1462.34,-207.9 1460.06,-198.51 1457.96,-189.83\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1461.36,-189 1455.59,-180.1 1454.55,-190.65 1461.36,-189\"/>\n",
"</g>\n",
"<!-- df_to_xml -->\n",
"<g id=\"node28\" class=\"node\">\n",
"<title>df_to_xml</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"2213.94,-36 2128.94,-36 2128.94,0 2213.94,0 2213.94,-36\"/>\n",
"<text text-anchor=\"middle\" x=\"2171.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_xml</text>\n",
"</g>\n",
"<!-- df_to_xml_build_result&#45;&gt;df_to_xml -->\n",
"<g id=\"edge69\" class=\"edge\">\n",
"<title>df_to_xml_build_result&#45;&gt;df_to_xml</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M2171.44,-71.7C2171.44,-63.98 2171.44,-54.71 2171.44,-46.11\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"2174.94,-46.1 2171.44,-36.1 2167.94,-46.1 2174.94,-46.1\"/>\n",
"</g>\n",
"<!-- df_to_pickle_build_result&#45;&gt;df_to_pickle -->\n",
"<g id=\"edge47\" class=\"edge\">\n",
"<title>df_to_pickle_build_result&#45;&gt;df_to_pickle</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M2428.44,-71.7C2428.44,-63.98 2428.44,-54.71 2428.44,-46.11\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"2431.94,-46.1 2428.44,-36.1 2424.94,-46.1 2431.94,-46.1\"/>\n",
"</g>\n",
"<!-- signups -->\n",
"<g id=\"node24\" class=\"node\">\n",
"<title>signups</title>\n",
"<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"1167.94,-252 1048.94,-252 1048.94,-216 1167.94,-216 1167.94,-252\"/>\n",
"<text text-anchor=\"middle\" x=\"1108.44\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">Input: signups</text>\n",
"</g>\n",
"<!-- signups&#45;&gt;df_to_orc_build_result -->\n",
"<g id=\"edge2\" class=\"edge\">\n",
"<title>signups&#45;&gt;df_to_orc_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1048.94,-228.25C954.03,-220.35 762.64,-203.06 601.44,-180 454.38,-158.96 283.97,-125.97 188.12,-106.62\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"188.72,-103.17 178.22,-104.61 187.33,-110.03 188.72,-103.17\"/>\n",
"</g>\n",
"<!-- signups&#45;&gt;spend_per_signup -->\n",
"<g id=\"edge7\" class=\"edge\">\n",
"<title>signups&#45;&gt;spend_per_signup</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1085.45,-215.7C1073.63,-206.8 1059.05,-195.82 1046.26,-186.2\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1048.26,-183.32 1038.16,-180.1 1044.05,-188.91 1048.26,-183.32\"/>\n",
"</g>\n",
"<!-- signups&#45;&gt;df_to_html_build_result -->\n",
"<g id=\"edge9\" class=\"edge\">\n",
"<title>signups&#45;&gt;df_to_html_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1109.16,-215.87C1110.19,-191.67 1112.06,-147.21 1113.28,-118.39\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1116.79,-118.33 1113.71,-108.19 1109.79,-118.03 1116.79,-118.33\"/>\n",
"</g>\n",
"<!-- signups&#45;&gt;df_to_stata_build_result -->\n",
"<g id=\"edge17\" class=\"edge\">\n",
"<title>signups&#45;&gt;df_to_stata_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1048.84,-224.57C981.89,-214.86 870.09,-197.93 774.44,-180 654.92,-157.6 516.95,-126.67 435.13,-107.8\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"435.72,-104.34 425.19,-105.5 434.15,-111.16 435.72,-104.34\"/>\n",
"</g>\n",
"<!-- signups&#45;&gt;df_to_json_build_result -->\n",
"<g id=\"edge26\" class=\"edge\">\n",
"<title>signups&#45;&gt;df_to_json_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1048.69,-224.04C1012.96,-216.55 967.94,-203.22 933.44,-180 909.13,-163.64 888.94,-136.61 876.19,-116.62\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"879.1,-114.67 870.87,-108 873.14,-118.35 879.1,-114.67\"/>\n",
"</g>\n",
"<!-- signups&#45;&gt;df_to_csv_build_result -->\n",
"<g id=\"edge33\" class=\"edge\">\n",
"<title>signups&#45;&gt;df_to_csv_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1048.84,-219.44C1005.53,-209.31 945.61,-194.73 893.44,-180 815.2,-157.91 725.85,-128.69 669.8,-109.87\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"670.66,-106.47 660.07,-106.59 668.43,-113.1 670.66,-106.47\"/>\n",
"</g>\n",
"<!-- signups&#45;&gt;df_to_feather_build_result -->\n",
"<g id=\"edge38\" class=\"edge\">\n",
"<title>signups&#45;&gt;df_to_feather_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1116.39,-215.78C1126.67,-195.48 1146.49,-162.21 1173.44,-144 1202.8,-124.16 1239.02,-111.75 1273.02,-103.98\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1273.98,-107.36 1283.01,-101.82 1272.5,-100.51 1273.98,-107.36\"/>\n",
"</g>\n",
"<!-- signups&#45;&gt;df_to_sql_build_result -->\n",
"<g id=\"edge43\" class=\"edge\">\n",
"<title>signups&#45;&gt;df_to_sql_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1168.31,-229.15C1288.07,-221.05 1548.7,-201.22 1583.44,-180 1606.93,-165.65 1623.02,-138.01 1632.39,-117.32\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1635.62,-118.66 1636.34,-108.09 1629.19,-115.91 1635.62,-118.66\"/>\n",
"</g>\n",
"<!-- signups&#45;&gt;df_to_parquet_build_result -->\n",
"<g id=\"edge50\" class=\"edge\">\n",
"<title>signups&#45;&gt;df_to_parquet_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1168.09,-230.44C1263.14,-225.53 1454.47,-212.26 1613.44,-180 1698.62,-162.71 1794.51,-131.05 1852.42,-110.52\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1853.84,-113.73 1862.08,-107.07 1851.49,-107.14 1853.84,-113.73\"/>\n",
"</g>\n",
"<!-- signups&#45;&gt;df_to_xml_build_result -->\n",
"<g id=\"edge57\" class=\"edge\">\n",
"<title>signups&#45;&gt;df_to_xml_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1168.23,-229.48C1274.61,-222.8 1503.62,-206.53 1695.44,-180 1838.66,-160.19 2004.28,-126.83 2097.56,-107.06\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"2098.41,-110.46 2107.47,-104.96 2096.96,-103.62 2098.41,-110.46\"/>\n",
"</g>\n",
"<!-- signups&#45;&gt;df_to_pickle_build_result -->\n",
"<g id=\"edge62\" class=\"edge\">\n",
"<title>signups&#45;&gt;df_to_pickle_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1167.94,-229.06C1314.28,-219.27 1688.84,-193.68 1813.44,-180 2001.52,-159.35 2220.56,-125.22 2339.85,-105.77\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"2340.72,-109.18 2350.02,-104.11 2339.59,-102.27 2340.72,-109.18\"/>\n",
"</g>\n",
"<!-- spend_mean&#45;&gt;spend_zero_mean -->\n",
"<g id=\"edge55\" class=\"edge\">\n",
"<title>spend_mean&#45;&gt;spend_zero_mean</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1468.44,-287.7C1468.44,-279.98 1468.44,-270.71 1468.44,-262.11\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1471.94,-262.1 1468.44,-252.1 1464.94,-262.1 1471.94,-262.1\"/>\n",
"</g>\n",
"<!-- spend_std_dev&#45;&gt;spend_zero_mean_unit_variance -->\n",
"<g id=\"edge31\" class=\"edge\">\n",
"<title>spend_std_dev&#45;&gt;spend_zero_mean_unit_variance</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1312.65,-288.07C1323.64,-268.96 1343.44,-237.69 1366.44,-216 1378.91,-204.24 1394.37,-193.68 1408.68,-185.14\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1410.5,-188.13 1417.4,-180.09 1406.99,-182.07 1410.5,-188.13\"/>\n",
"</g>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.graphs.Digraph at 0x7f2ab46e3d60>"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"materializers = [\n",
" # materialize the dataframe to a pickle file\n",
" to.pickle(\n",
" dependencies=output_columns,\n",
" id=\"df_to_pickle\",\n",
" path=\"./df.pkl\",\n",
" combine=df_builder,\n",
" ),\n",
" # materialize the dataframe to a JSON file\n",
" to.json(\n",
" dependencies=output_columns,\n",
" id=\"df_to_json\",\n",
" filepath_or_buffer=\"./df.json\",\n",
" combine=df_builder,\n",
" ),\n",
" to.sql(\n",
" dependencies=output_columns,\n",
" id=\"df_to_sql\",\n",
" table_name=\"test\",\n",
" db_connection=conn,\n",
" combine=df_builder,\n",
" ),\n",
" # materialize the dataframe to a XML file\n",
" to.xml(\n",
" dependencies=output_columns,\n",
" id=\"df_to_xml\",\n",
" path_or_buffer=\"./df.xml\",\n",
" combine=df_builder,\n",
" ),\n",
" to.html(\n",
" dependencies=output_columns,\n",
" id=\"df_to_html\",\n",
" buf=\"./df.html\",\n",
" combine=df_builder,\n",
" ),\n",
" to.stata(\n",
" dependencies=output_columns,\n",
" id=\"df_to_stata\",\n",
" path=\"./df.dta\",\n",
" combine=df_builder,\n",
" ),\n",
" to.feather(\n",
" dependencies=output_columns,\n",
" id=\"df_to_feather\",\n",
" path=\"./df.feather\",\n",
" combine=df_builder,\n",
" ),\n",
" to.parquet(\n",
" dependencies=output_columns,\n",
" id=\"df_to_parquet\",\n",
" path=\"./df.parquet.gzip\",\n",
" combine=df_builder,\n",
" ),\n",
" to.csv(\n",
" dependencies=output_columns,\n",
" id=\"df_to_csv\",\n",
" path=\"./df.csv\",\n",
" combine=df_builder,\n",
" ),\n",
" to.orc(\n",
" dependencies=output_columns,\n",
" id=\"df_to_orc\",\n",
" path=\"./df.orc\",\n",
" combine=df_builder,\n",
" ),\n",
"]\n",
"# Visualize what is happening\n",
"dr.visualize_materialization(\n",
" *materializers,\n",
" additional_vars=output_columns,\n",
" inputs=initial_columns,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"ExecuteTime": {
"end_time": "2023-09-17T05:43:34.026203Z",
"start_time": "2023-09-17T05:43:34.016610Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"More than one applicable adapter detected for <class 'pandas.core.frame.DataFrame'>. Using the last one registered <class 'hamilton.plugins.pandas_extensions.PandasPickleWriter'>.\n"
]
}
],
"source": [
"# Materialize a result, i.e. execute the DAG!\n",
"materialization_results, additional_outputs = dr.materialize(\n",
" *materializers,\n",
" additional_vars=[\n",
" \"df_to_pickle_build_result\",\n",
" \"df_to_json_build_result\",\n",
" \"df_to_sql_build_result\",\n",
" \"df_to_xml_build_result\",\n",
" \"df_to_html_build_result\",\n",
" \"df_to_stata_build_result\",\n",
" \"df_to_feather_build_result\",\n",
" \"df_to_parquet_build_result\",\n",
" \"df_to_csv_build_result\",\n",
" \"df_to_orc_build_result\",\n",
" ], # because combine is used, we can get that result here.\n",
" inputs=initial_columns,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"ExecuteTime": {
"end_time": "2023-09-17T05:43:34.041466Z",
"start_time": "2023-09-17T05:43:34.028346Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"{'df_to_pickle': {'size': 1088,\n",
" 'path': './df.pkl',\n",
" 'last_modified': 1698716078.9096448,\n",
" 'timestamp': 1698741278.914183},\n",
" 'df_to_json': {'size': 428,\n",
" 'path': './df.json',\n",
" 'last_modified': 1698716078.9096448,\n",
" 'timestamp': 1698741278.916944},\n",
" 'df_to_sql': {'rows': 6,\n",
" 'query': None,\n",
" 'table_name': 'test',\n",
" 'timestamp': 1698741278.927032},\n",
" 'df_to_xml': {'size': 1622,\n",
" 'path': './df.xml',\n",
" 'last_modified': 1698716078.9616463,\n",
" 'timestamp': 1698741278.966294},\n",
" 'df_to_html': {'size': 1145,\n",
" 'path': './df.html',\n",
" 'last_modified': 1698716078.9696465,\n",
" 'timestamp': 1698741278.974224},\n",
" 'df_to_stata': {'size': 1526,\n",
" 'path': './df.dta',\n",
" 'last_modified': 1698716078.985647,\n",
" 'timestamp': 1698741278.992052},\n",
" 'df_to_feather': {'size': 3522,\n",
" 'path': './df.feather',\n",
" 'last_modified': 1698716079.021648,\n",
" 'timestamp': 1698741279.028492},\n",
" 'df_to_parquet': {'size': 4705,\n",
" 'path': './df.parquet.gzip',\n",
" 'last_modified': 1698716079.0456486,\n",
" 'timestamp': 1698741279.053797},\n",
" 'df_to_csv': {'size': 335,\n",
" 'path': './df.csv',\n",
" 'last_modified': 1698716079.0536487,\n",
" 'timestamp': 1698741279.059399},\n",
" 'df_to_orc': {'size': 954,\n",
" 'path': './df.orc',\n",
" 'last_modified': 1698716079.065649,\n",
" 'timestamp': 1698741279.07081}}"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"materialization_results"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"ExecuteTime": {
"end_time": "2023-09-17T05:43:34.051151Z",
"start_time": "2023-09-17T05:43:34.043320Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>spend</th>\n",
" <th>signups</th>\n",
" <th>avg_3wk_spend</th>\n",
" <th>spend_per_signup</th>\n",
" <th>spend_zero_mean_unit_variance</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>10.000</td>\n",
" <td>-1.064405</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>10</td>\n",
" <td>NaN</td>\n",
" <td>1.000</td>\n",
" <td>-1.064405</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>20</td>\n",
" <td>50</td>\n",
" <td>13.333333</td>\n",
" <td>0.400</td>\n",
" <td>-0.483821</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>40</td>\n",
" <td>100</td>\n",
" <td>23.333333</td>\n",
" <td>0.400</td>\n",
" <td>0.677349</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>40</td>\n",
" <td>200</td>\n",
" <td>33.333333</td>\n",
" <td>0.200</td>\n",
" <td>0.677349</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>50</td>\n",
" <td>400</td>\n",
" <td>43.333333</td>\n",
" <td>0.125</td>\n",
" <td>1.257934</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" spend signups avg_3wk_spend spend_per_signup \\\n",
"0 10 1 NaN 10.000 \n",
"1 10 10 NaN 1.000 \n",
"2 20 50 13.333333 0.400 \n",
"3 40 100 23.333333 0.400 \n",
"4 40 200 33.333333 0.200 \n",
"5 50 400 43.333333 0.125 \n",
"\n",
" spend_zero_mean_unit_variance \n",
"0 -1.064405 \n",
"1 -1.064405 \n",
"2 -0.483821 \n",
"3 0.677349 \n",
"4 0.677349 \n",
"5 1.257934 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"additional_outputs[\"df_to_pickle_build_result\"]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"ExecuteTime": {
"end_time": "2023-09-17T05:43:34.058608Z",
"start_time": "2023-09-17T05:43:34.048662Z"
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>spend</th>\n",
" <th>signups</th>\n",
" <th>avg_3wk_spend</th>\n",
" <th>spend_per_signup</th>\n",
" <th>spend_zero_mean_unit_variance</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>10.000</td>\n",
" <td>-1.064405</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>10</td>\n",
" <td>NaN</td>\n",
" <td>1.000</td>\n",
" <td>-1.064405</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>20</td>\n",
" <td>50</td>\n",
" <td>13.333333</td>\n",
" <td>0.400</td>\n",
" <td>-0.483821</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>40</td>\n",
" <td>100</td>\n",
" <td>23.333333</td>\n",
" <td>0.400</td>\n",
" <td>0.677349</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>40</td>\n",
" <td>200</td>\n",
" <td>33.333333</td>\n",
" <td>0.200</td>\n",
" <td>0.677349</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>50</td>\n",
" <td>400</td>\n",
" <td>43.333333</td>\n",
" <td>0.125</td>\n",
" <td>1.257934</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" spend signups avg_3wk_spend spend_per_signup \\\n",
"0 10 1 NaN 10.000 \n",
"1 10 10 NaN 1.000 \n",
"2 20 50 13.333333 0.400 \n",
"3 40 100 23.333333 0.400 \n",
"4 40 200 33.333333 0.200 \n",
"5 50 400 43.333333 0.125 \n",
"\n",
" spend_zero_mean_unit_variance \n",
"0 -1.064405 \n",
"1 -1.064405 \n",
"2 -0.483821 \n",
"3 0.677349 \n",
"4 0.677349 \n",
"5 1.257934 "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"additional_outputs[\"df_to_json_build_result\"]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"ExecuteTime": {
"end_time": "2023-09-17T05:43:34.089706Z",
"start_time": "2023-09-17T05:43:34.060251Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" spend signups avg_3wk_spend spend_per_signup \\\n",
"0 10 1 NaN 10.000 \n",
"1 10 10 NaN 1.000 \n",
"2 20 50 13.333333 0.400 \n",
"3 40 100 23.333333 0.400 \n",
"4 40 200 33.333333 0.200 \n",
"5 50 400 43.333333 0.125 \n",
"\n",
" spend_zero_mean_unit_variance \n",
"0 -1.064405 \n",
"1 -1.064405 \n",
"2 -0.483821 \n",
"3 0.677349 \n",
"4 0.677349 \n",
"5 1.257934 \n"
]
}
],
"source": [
"print(additional_outputs[\"df_to_sql_build_result\"])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" spend signups avg_3wk_spend spend_per_signup \\\n",
"0 10 1 NaN 10.000 \n",
"1 10 10 NaN 1.000 \n",
"2 20 50 13.333333 0.400 \n",
"3 40 100 23.333333 0.400 \n",
"4 40 200 33.333333 0.200 \n",
"5 50 400 43.333333 0.125 \n",
"\n",
" spend_zero_mean_unit_variance \n",
"0 -1.064405 \n",
"1 -1.064405 \n",
"2 -0.483821 \n",
"3 0.677349 \n",
"4 0.677349 \n",
"5 1.257934 \n"
]
}
],
"source": [
"print(additional_outputs[\"df_to_xml_build_result\"])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" spend signups avg_3wk_spend spend_per_signup \\\n",
"0 10 1 NaN 10.000 \n",
"1 10 10 NaN 1.000 \n",
"2 20 50 13.333333 0.400 \n",
"3 40 100 23.333333 0.400 \n",
"4 40 200 33.333333 0.200 \n",
"5 50 400 43.333333 0.125 \n",
"\n",
" spend_zero_mean_unit_variance \n",
"0 -1.064405 \n",
"1 -1.064405 \n",
"2 -0.483821 \n",
"3 0.677349 \n",
"4 0.677349 \n",
"5 1.257934 \n"
]
}
],
"source": [
"print(additional_outputs[\"df_to_html_build_result\"])"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" spend signups avg_3wk_spend spend_per_signup \\\n",
"0 10 1 NaN 10.000 \n",
"1 10 10 NaN 1.000 \n",
"2 20 50 13.333333 0.400 \n",
"3 40 100 23.333333 0.400 \n",
"4 40 200 33.333333 0.200 \n",
"5 50 400 43.333333 0.125 \n",
"\n",
" spend_zero_mean_unit_variance \n",
"0 -1.064405 \n",
"1 -1.064405 \n",
"2 -0.483821 \n",
"3 0.677349 \n",
"4 0.677349 \n",
"5 1.257934 \n"
]
}
],
"source": [
"print(additional_outputs[\"df_to_stata_build_result\"])"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" spend signups avg_3wk_spend spend_per_signup \\\n",
"0 10 1 NaN 10.000 \n",
"1 10 10 NaN 1.000 \n",
"2 20 50 13.333333 0.400 \n",
"3 40 100 23.333333 0.400 \n",
"4 40 200 33.333333 0.200 \n",
"5 50 400 43.333333 0.125 \n",
"\n",
" spend_zero_mean_unit_variance \n",
"0 -1.064405 \n",
"1 -1.064405 \n",
"2 -0.483821 \n",
"3 0.677349 \n",
"4 0.677349 \n",
"5 1.257934 \n"
]
}
],
"source": [
"print(additional_outputs[\"df_to_feather_build_result\"])"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" spend signups avg_3wk_spend spend_per_signup \\\n",
"0 10 1 NaN 10.000 \n",
"1 10 10 NaN 1.000 \n",
"2 20 50 13.333333 0.400 \n",
"3 40 100 23.333333 0.400 \n",
"4 40 200 33.333333 0.200 \n",
"5 50 400 43.333333 0.125 \n",
"\n",
" spend_zero_mean_unit_variance \n",
"0 -1.064405 \n",
"1 -1.064405 \n",
"2 -0.483821 \n",
"3 0.677349 \n",
"4 0.677349 \n",
"5 1.257934 \n"
]
}
],
"source": [
"print(additional_outputs[\"df_to_parquet_build_result\"])"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" spend signups avg_3wk_spend spend_per_signup \\\n",
"0 10 1 NaN 10.000 \n",
"1 10 10 NaN 1.000 \n",
"2 20 50 13.333333 0.400 \n",
"3 40 100 23.333333 0.400 \n",
"4 40 200 33.333333 0.200 \n",
"5 50 400 43.333333 0.125 \n",
"\n",
" spend_zero_mean_unit_variance \n",
"0 -1.064405 \n",
"1 -1.064405 \n",
"2 -0.483821 \n",
"3 0.677349 \n",
"4 0.677349 \n",
"5 1.257934 \n"
]
}
],
"source": [
"print(additional_outputs[\"df_to_csv_build_result\"])"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" spend signups avg_3wk_spend spend_per_signup \\\n",
"0 10 1 NaN 10.000 \n",
"1 10 10 NaN 1.000 \n",
"2 20 50 13.333333 0.400 \n",
"3 40 100 23.333333 0.400 \n",
"4 40 200 33.333333 0.200 \n",
"5 50 400 43.333333 0.125 \n",
"\n",
" spend_zero_mean_unit_variance \n",
"0 -1.064405 \n",
"1 -1.064405 \n",
"2 -0.483821 \n",
"3 0.677349 \n",
"4 0.677349 \n",
"5 1.257934 \n"
]
}
],
"source": [
"print(additional_outputs[\"df_to_orc_build_result\"])"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"ExecuteTime": {
"end_time": "2023-09-17T05:43:34.090208Z",
"start_time": "2023-09-17T05:43:34.066483Z"
}
},
"outputs": [],
"source": [
"# closing out db connection\n",
"conn.close()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 4
}