| { |
| "cells": [ |
| { |
| "cell_type": "code", |
| "execution_count": 2, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# update the pandas package - optional\n", |
| "# !pip install --upgrade pandas\n" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 1, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-09-17T05:43:33.890006Z", |
| "start_time": "2023-09-17T05:43:33.866292Z" |
| } |
| }, |
| "outputs": [], |
| "source": [ |
| "import sqlite3\n", |
| "import sys\n", |
| "import pandas as pd\n", |
| "\n", |
| "# Add the hamilton module to your path - optinal\n", |
| "# project_dir = \"### ADD PATH HERE ###\"\n", |
| "# sys.path.append(project_dir)\n", |
| "\n", |
| "from hamilton import base, driver\n", |
| "from hamilton.io.materialization import to" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 2, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-09-17T05:43:33.890336Z", |
| "start_time": "2023-09-17T05:43:33.869093Z" |
| }, |
| "pycharm": { |
| "name": "#%%\n" |
| } |
| }, |
| "outputs": [], |
| "source": [ |
| "# We use the autoreload extension that comes with ipython to automatically reload modules when\n", |
| "# the code in them changes.\n", |
| "\n", |
| "# import the jupyter extension\n", |
| "%load_ext autoreload\n", |
| "# set it to only reload the modules imported\n", |
| "%autoreload 1" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 3, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-09-17T05:43:33.930823Z", |
| "start_time": "2023-09-17T05:43:33.875941Z" |
| }, |
| "pycharm": { |
| "name": "#%%\n" |
| } |
| }, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Overwriting spend_calculations.py\n" |
| ] |
| } |
| ], |
| "source": [ |
| "%%writefile spend_calculations.py\n", |
| "# Define your new Hamilton functions.\n", |
| "# The %%writefile magic command creates a new Python module with the functions below.\n", |
| "# We will import this later and pass it into our Driver.\n", |
| "\n", |
| "import pandas as pd\n", |
| " \n", |
| "# Look at `my_functions` to see how these functions connect.\n", |
| "def avg_3wk_spend(spend: pd.Series) -> pd.Series:\n", |
| " \"\"\"Rolling 3 week average spend.\"\"\"\n", |
| " return spend.rolling(3).mean()\n", |
| "\n", |
| "\n", |
| "def spend_per_signup(spend: pd.Series, signups: pd.Series) -> pd.Series:\n", |
| " \"\"\"The cost per signup in relation to spend.\"\"\"\n", |
| " return spend / signups\n", |
| "\n", |
| "\n", |
| "def spend_mean(spend: pd.Series) -> float:\n", |
| " \"\"\"Shows function creating a scalar. In this case it computes the mean of the entire column.\"\"\"\n", |
| " return spend.mean()\n", |
| "\n", |
| "\n", |
| "def spend_zero_mean(spend: pd.Series, spend_mean: float) -> pd.Series:\n", |
| " \"\"\"Shows function that takes a scalar. In this case to zero mean spend.\"\"\"\n", |
| " return spend - spend_mean\n", |
| "\n", |
| "\n", |
| "def spend_std_dev(spend: pd.Series) -> float:\n", |
| " \"\"\"Function that computes the standard deviation of the spend column.\"\"\"\n", |
| " return spend.std()\n", |
| "\n", |
| "\n", |
| "def spend_zero_mean_unit_variance(spend_zero_mean: pd.Series, spend_std_dev: float) -> pd.Series:\n", |
| " \"\"\"Function showing one way to make spend have zero mean and unit variance.\"\"\"\n", |
| " return spend_zero_mean / spend_std_dev" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 4, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-09-17T05:43:33.931096Z", |
| "start_time": "2023-09-17T05:43:33.881858Z" |
| }, |
| "pycharm": { |
| "name": "#%%\n" |
| } |
| }, |
| "outputs": [], |
| "source": [ |
| "initial_columns = { # load from actuals or wherever -- this is our initial data we use as input.\n", |
| " # Note: these values don't have to be all series, they could be a scalar.\n", |
| " \"signups\": pd.Series([1, 10, 50, 100, 200, 400]),\n", |
| " \"spend\": pd.Series([10, 10, 20, 40, 40, 50]),\n", |
| "}" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 5, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-09-17T05:43:33.932468Z", |
| "start_time": "2023-09-17T05:43:33.887774Z" |
| }, |
| "pycharm": { |
| "name": "#%%\n" |
| } |
| }, |
| "outputs": [ |
| { |
| "name": "stderr", |
| "output_type": "stream", |
| "text": [ |
| "Note: Hamilton collects completely anonymous data about usage. This will help us improve Hamilton over time. See https://github.com/dagworks-inc/hamilton#usage-analytics--data-privacy for details.\n" |
| ] |
| } |
| ], |
| "source": [ |
| "%aimport spend_calculations\n", |
| "\n", |
| "df_builder = base.PandasDataFrameResult()\n", |
| "dr = driver.Driver({}, spend_calculations) # can pass in multiple modules" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 6, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-09-17T05:43:33.932900Z", |
| "start_time": "2023-09-17T05:43:33.892681Z" |
| } |
| }, |
| "outputs": [], |
| "source": [ |
| "# we need to specify what we want in the final dataframe. These can be string names, or function references.\n", |
| "output_columns = [\n", |
| " \"spend\",\n", |
| " \"signups\",\n", |
| " \"avg_3wk_spend\",\n", |
| " \"spend_per_signup\",\n", |
| " \"spend_zero_mean_unit_variance\",\n", |
| "]" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 7, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-09-17T05:43:33.933121Z", |
| "start_time": "2023-09-17T05:43:33.896779Z" |
| } |
| }, |
| "outputs": [], |
| "source": [ |
| "# set up db connection for sql materializer below\n", |
| "conn = sqlite3.connect(\"df.db\")\n", |
| "\n", |
| "# remove an previous instances of the 'test' table that will be created next\n", |
| "conn.cursor().execute(\"DROP TABLE IF EXISTS test;\")\n", |
| "conn.commit()" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 9, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-09-17T05:43:34.014270Z", |
| "start_time": "2023-09-17T05:43:33.912979Z" |
| } |
| }, |
| "outputs": [ |
| { |
| "name": "stderr", |
| "output_type": "stream", |
| "text": [ |
| "More than one applicable adapter detected for <class 'pandas.core.frame.DataFrame'>. Using the last one registered <class 'hamilton.plugins.pandas_extensions.PandasPickleWriter'>.\n" |
| ] |
| }, |
| { |
| "data": { |
| "image/svg+xml": [ |
| "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", |
| "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", |
| " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", |
| "<!-- Generated by graphviz version 2.43.0 (0)\n", |
| " -->\n", |
| "<!-- Title: %3 Pages: 1 -->\n", |
| "<svg width=\"2561pt\" height=\"404pt\"\n", |
| " viewBox=\"0.00 0.00 2561.23 404.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", |
| "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 400)\">\n", |
| "<title>%3</title>\n", |
| "<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-400 2557.23,-400 2557.23,4 -4,4\"/>\n", |
| "<!-- df_to_orc_build_result -->\n", |
| "<g id=\"node1\" class=\"node\">\n", |
| "<title>df_to_orc_build_result</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"112.44\" cy=\"-90\" rx=\"112.38\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"112.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_orc_build_result</text>\n", |
| "</g>\n", |
| "<!-- df_to_orc -->\n", |
| "<g id=\"node19\" class=\"node\">\n", |
| "<title>df_to_orc</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"152.94,-36 71.94,-36 71.94,0 152.94,0 152.94,-36\"/>\n", |
| "<text text-anchor=\"middle\" x=\"112.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_orc</text>\n", |
| "</g>\n", |
| "<!-- df_to_orc_build_result->df_to_orc -->\n", |
| "<g id=\"edge48\" class=\"edge\">\n", |
| "<title>df_to_orc_build_result->df_to_orc</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M112.44,-71.7C112.44,-63.98 112.44,-54.71 112.44,-46.11\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"115.94,-46.1 112.44,-36.1 108.94,-46.1 115.94,-46.1\"/>\n", |
| "</g>\n", |
| "<!-- spend_per_signup -->\n", |
| "<g id=\"node2\" class=\"node\">\n", |
| "<title>spend_per_signup</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"1087.94,-180 942.94,-180 942.94,-144 1087.94,-144 1087.94,-180\"/>\n", |
| "<text text-anchor=\"middle\" x=\"1015.44\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">spend_per_signup</text>\n", |
| "</g>\n", |
| "<!-- spend_per_signup->df_to_orc_build_result -->\n", |
| "<g id=\"edge4\" class=\"edge\">\n", |
| "<title>spend_per_signup->df_to_orc_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M942.79,-157.82C804.46,-151.39 493.81,-135.08 233.44,-108 222.94,-106.91 211.95,-105.6 201.06,-104.2\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"201.47,-100.72 191.1,-102.89 200.55,-107.66 201.47,-100.72\"/>\n", |
| "</g>\n", |
| "<!-- df_to_html_build_result -->\n", |
| "<g id=\"node4\" class=\"node\">\n", |
| "<title>df_to_html_build_result</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"1114.44\" cy=\"-90\" rx=\"118.88\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"1114.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_html_build_result</text>\n", |
| "</g>\n", |
| "<!-- spend_per_signup->df_to_html_build_result -->\n", |
| "<g id=\"edge11\" class=\"edge\">\n", |
| "<title>spend_per_signup->df_to_html_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1039.66,-143.88C1052.55,-134.76 1068.58,-123.43 1082.48,-113.6\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1084.67,-116.34 1090.81,-107.71 1080.63,-110.62 1084.67,-116.34\"/>\n", |
| "</g>\n", |
| "<!-- df_to_stata_build_result -->\n", |
| "<g id=\"node8\" class=\"node\">\n", |
| "<title>df_to_stata_build_result</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"363.44\" cy=\"-90\" rx=\"120.78\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"363.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_stata_build_result</text>\n", |
| "</g>\n", |
| "<!-- spend_per_signup->df_to_stata_build_result -->\n", |
| "<g id=\"edge19\" class=\"edge\">\n", |
| "<title>spend_per_signup->df_to_stata_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M942.74,-154.29C841.82,-144.81 653.59,-126.6 493.44,-108 482.51,-106.73 471.07,-105.33 459.71,-103.91\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"460.12,-100.43 449.76,-102.64 459.24,-107.37 460.12,-100.43\"/>\n", |
| "</g>\n", |
| "<!-- df_to_json_build_result -->\n", |
| "<g id=\"node13\" class=\"node\">\n", |
| "<title>df_to_json_build_result</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"861.44\" cy=\"-90\" rx=\"116.18\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"861.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_json_build_result</text>\n", |
| "</g>\n", |
| "<!-- spend_per_signup->df_to_json_build_result -->\n", |
| "<g id=\"edge28\" class=\"edge\">\n", |
| "<title>spend_per_signup->df_to_json_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M977.77,-143.88C956.26,-134.1 929.16,-121.78 906.53,-111.5\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"907.69,-108.18 897.14,-107.23 904.79,-114.55 907.69,-108.18\"/>\n", |
| "</g>\n", |
| "<!-- df_to_csv_build_result -->\n", |
| "<g id=\"node15\" class=\"node\">\n", |
| "<title>df_to_csv_build_result</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"614.44\" cy=\"-90\" rx=\"112.38\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"614.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_csv_build_result</text>\n", |
| "</g>\n", |
| "<!-- spend_per_signup->df_to_csv_build_result -->\n", |
| "<g id=\"edge35\" class=\"edge\">\n", |
| "<title>spend_per_signup->df_to_csv_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M942.82,-148.32C873.74,-136.26 769.88,-118.13 697.53,-105.5\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"697.99,-102.03 687.53,-103.76 696.78,-108.93 697.99,-102.03\"/>\n", |
| "</g>\n", |
| "<!-- df_to_feather_build_result -->\n", |
| "<g id=\"node16\" class=\"node\">\n", |
| "<title>df_to_feather_build_result</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"1382.44\" cy=\"-90\" rx=\"131.08\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"1382.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_feather_build_result</text>\n", |
| "</g>\n", |
| "<!-- spend_per_signup->df_to_feather_build_result -->\n", |
| "<g id=\"edge40\" class=\"edge\">\n", |
| "<title>spend_per_signup->df_to_feather_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1087.95,-147.17C1148.72,-135.58 1235.52,-119.02 1299.14,-106.89\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1299.97,-110.29 1309.14,-104.98 1298.66,-103.42 1299.97,-110.29\"/>\n", |
| "</g>\n", |
| "<!-- df_to_sql_build_result -->\n", |
| "<g id=\"node17\" class=\"node\">\n", |
| "<title>df_to_sql_build_result</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"1642.44\" cy=\"-90\" rx=\"111.28\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"1642.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_sql_build_result</text>\n", |
| "</g>\n", |
| "<!-- spend_per_signup->df_to_sql_build_result -->\n", |
| "<g id=\"edge45\" class=\"edge\">\n", |
| "<title>spend_per_signup->df_to_sql_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1088.19,-153.1C1114.99,-150.21 1145.59,-146.93 1173.44,-144 1328.52,-127.66 1367.66,-126.94 1522.44,-108 1532.31,-106.79 1542.62,-105.45 1552.88,-104.07\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1553.67,-107.49 1563.1,-102.67 1552.72,-100.56 1553.67,-107.49\"/>\n", |
| "</g>\n", |
| "<!-- df_to_parquet_build_result -->\n", |
| "<g id=\"node20\" class=\"node\">\n", |
| "<title>df_to_parquet_build_result</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"1905.44\" cy=\"-90\" rx=\"133.78\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"1905.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_parquet_build_result</text>\n", |
| "</g>\n", |
| "<!-- spend_per_signup->df_to_parquet_build_result -->\n", |
| "<g id=\"edge52\" class=\"edge\">\n", |
| "<title>spend_per_signup->df_to_parquet_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1088.13,-152.47C1114.92,-149.51 1145.54,-146.35 1173.44,-144 1434.78,-121.97 1501.23,-131.49 1762.44,-108 1775.14,-106.86 1788.45,-105.5 1801.64,-104.05\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1802.13,-107.51 1811.68,-102.93 1801.35,-100.56 1802.13,-107.51\"/>\n", |
| "</g>\n", |
| "<!-- df_to_xml_build_result -->\n", |
| "<g id=\"node22\" class=\"node\">\n", |
| "<title>df_to_xml_build_result</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"2171.44\" cy=\"-90\" rx=\"114.28\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"2171.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_xml_build_result</text>\n", |
| "</g>\n", |
| "<!-- spend_per_signup->df_to_xml_build_result -->\n", |
| "<g id=\"edge59\" class=\"edge\">\n", |
| "<title>spend_per_signup->df_to_xml_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1088.11,-152.16C1114.9,-149.18 1145.51,-146.07 1173.44,-144 1561.59,-115.17 1660.77,-142.68 2048.44,-108 2059.48,-107.01 2071.05,-105.73 2082.5,-104.32\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"2083.02,-107.79 2092.5,-103.05 2082.14,-100.84 2083.02,-107.79\"/>\n", |
| "</g>\n", |
| "<!-- df_to_pickle_build_result -->\n", |
| "<g id=\"node23\" class=\"node\">\n", |
| "<title>df_to_pickle_build_result</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"2428.44\" cy=\"-90\" rx=\"124.58\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"2428.44\" y=\"-86.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_pickle_build_result</text>\n", |
| "</g>\n", |
| "<!-- spend_per_signup->df_to_pickle_build_result -->\n", |
| "<g id=\"edge64\" class=\"edge\">\n", |
| "<title>spend_per_signup->df_to_pickle_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1088.1,-152.03C1114.88,-149.02 1145.51,-145.95 1173.44,-144 1670.71,-109.31 1797.52,-147.41 2294.44,-108 2306.71,-107.03 2319.57,-105.74 2332.28,-104.31\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"2332.89,-107.76 2342.42,-103.13 2332.09,-100.8 2332.89,-107.76\"/>\n", |
| "</g>\n", |
| "<!-- spend -->\n", |
| "<g id=\"node3\" class=\"node\">\n", |
| "<title>spend</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"1356.94,-396 1249.94,-396 1249.94,-360 1356.94,-360 1356.94,-396\"/>\n", |
| "<text text-anchor=\"middle\" x=\"1303.44\" y=\"-374.3\" font-family=\"Times,serif\" font-size=\"14.00\">Input: spend</text>\n", |
| "</g>\n", |
| "<!-- spend->df_to_orc_build_result -->\n", |
| "<g id=\"edge1\" class=\"edge\">\n", |
| "<title>spend->df_to_orc_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1249.91,-375.91C1153.24,-373 943.56,-362.41 771.44,-324 533.41,-270.88 262.96,-157.56 157.7,-111.3\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"158.89,-108 148.33,-107.17 156.06,-114.41 158.89,-108\"/>\n", |
| "</g>\n", |
| "<!-- spend->spend_per_signup -->\n", |
| "<g id=\"edge6\" class=\"edge\">\n", |
| "<title>spend->spend_per_signup</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1249.86,-365.81C1189.83,-350.48 1093.82,-317.09 1039.44,-252 1024.96,-234.67 1019.15,-209.41 1016.86,-190.22\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1020.31,-189.63 1015.89,-180.01 1013.35,-190.29 1020.31,-189.63\"/>\n", |
| "</g>\n", |
| "<!-- spend->df_to_html_build_result -->\n", |
| "<g id=\"edge8\" class=\"edge\">\n", |
| "<title>spend->df_to_html_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1249.5,-372.88C1201.49,-367.3 1130.68,-354.28 1076.44,-324 997.68,-280.04 967.81,-263.39 933.44,-180 927.34,-165.21 923.85,-156.81 933.44,-144 945.5,-127.9 987.68,-115.05 1028.29,-106.03\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1029.24,-109.41 1038.28,-103.88 1027.77,-102.56 1029.24,-109.41\"/>\n", |
| "</g>\n", |
| "<!-- spend->df_to_stata_build_result -->\n", |
| "<g id=\"edge16\" class=\"edge\">\n", |
| "<title>spend->df_to_stata_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1249.81,-375.35C1168.66,-371.73 1010.47,-360.24 881.44,-324 692.48,-270.93 484.22,-159.53 400.9,-112.59\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"402.49,-109.47 392.06,-107.59 399.04,-115.56 402.49,-109.47\"/>\n", |
| "</g>\n", |
| "<!-- avg_3wk_spend -->\n", |
| "<g id=\"node10\" class=\"node\">\n", |
| "<title>avg_3wk_spend</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"1310.44,-180 1182.44,-180 1182.44,-144 1310.44,-144 1310.44,-180\"/>\n", |
| "<text text-anchor=\"middle\" x=\"1246.44\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">avg_3wk_spend</text>\n", |
| "</g>\n", |
| "<!-- spend->avg_3wk_spend -->\n", |
| "<g id=\"edge22\" class=\"edge\">\n", |
| "<title>spend->avg_3wk_spend</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1256.4,-359.97C1240.49,-351.7 1224.48,-339.99 1215.44,-324 1191.22,-281.14 1213.87,-222.72 1231.12,-189.32\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1234.37,-190.66 1236.02,-180.2 1228.2,-187.35 1234.37,-190.66\"/>\n", |
| "</g>\n", |
| "<!-- spend->df_to_json_build_result -->\n", |
| "<g id=\"edge25\" class=\"edge\">\n", |
| "<title>spend->df_to_json_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1249.82,-377.38C1188.67,-375.6 1087.47,-365.8 1012.44,-324 939.91,-283.59 932.83,-253.09 893.44,-180 882.71,-160.09 874.26,-135.84 868.75,-117.69\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"872.09,-116.65 865.92,-108.04 865.37,-118.62 872.09,-116.65\"/>\n", |
| "</g>\n", |
| "<!-- spend->df_to_csv_build_result -->\n", |
| "<g id=\"edge32\" class=\"edge\">\n", |
| "<title>spend->df_to_csv_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1249.84,-376.01C1180.35,-373.13 1056.46,-362.46 958.44,-324 826.87,-272.38 695.18,-163.28 640.63,-114.93\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"642.64,-112.03 632.85,-107.98 637.98,-117.25 642.64,-112.03\"/>\n", |
| "</g>\n", |
| "<!-- spend->df_to_feather_build_result -->\n", |
| "<g id=\"edge37\" class=\"edge\">\n", |
| "<title>spend->df_to_feather_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1357.12,-373.16C1424.59,-367.16 1534.94,-353.11 1562.44,-324 1617.75,-265.45 1632.6,-207.8 1583.44,-144 1570.16,-126.76 1524.76,-114 1480.47,-105.32\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1480.93,-101.85 1470.45,-103.42 1479.63,-108.73 1480.93,-101.85\"/>\n", |
| "</g>\n", |
| "<!-- spend->df_to_sql_build_result -->\n", |
| "<g id=\"edge42\" class=\"edge\">\n", |
| "<title>spend->df_to_sql_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1357.18,-372.9C1429.91,-366.39 1554.71,-351.51 1590.44,-324 1653.47,-275.48 1651.22,-168.76 1646.14,-118.37\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1649.6,-117.78 1645.01,-108.23 1642.64,-118.56 1649.6,-117.78\"/>\n", |
| "</g>\n", |
| "<!-- spend->df_to_parquet_build_result -->\n", |
| "<g id=\"edge49\" class=\"edge\">\n", |
| "<title>spend->df_to_parquet_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1357.11,-376.22C1421.81,-373.45 1532.52,-362.81 1618.44,-324 1732.57,-272.44 1839.46,-164.1 1883.84,-115.5\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1886.55,-117.73 1890.66,-107.96 1881.36,-113.03 1886.55,-117.73\"/>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean -->\n", |
| "<g id=\"node21\" class=\"node\">\n", |
| "<title>spend_zero_mean</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"1468.44\" cy=\"-234\" rx=\"92.88\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"1468.44\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">spend_zero_mean</text>\n", |
| "</g>\n", |
| "<!-- spend->spend_zero_mean -->\n", |
| "<g id=\"edge54\" class=\"edge\">\n", |
| "<title>spend->spend_zero_mean</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1357.02,-372.93C1420.98,-366.84 1522.25,-352.82 1545.44,-324 1555.47,-311.53 1552.88,-302.16 1545.44,-288 1538.45,-274.69 1526.43,-264.08 1513.95,-255.95\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1515.65,-252.89 1505.29,-250.71 1512.03,-258.88 1515.65,-252.89\"/>\n", |
| "</g>\n", |
| "<!-- spend->df_to_xml_build_result -->\n", |
| "<g id=\"edge56\" class=\"edge\">\n", |
| "<title>spend->df_to_xml_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1357.23,-373.85C1428.5,-368.61 1557.59,-355.37 1663.44,-324 1848.19,-269.25 2052.02,-159.22 2134.14,-112.63\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"2136.12,-115.53 2143.08,-107.54 2132.66,-109.45 2136.12,-115.53\"/>\n", |
| "</g>\n", |
| "<!-- spend->df_to_pickle_build_result -->\n", |
| "<g id=\"edge61\" class=\"edge\">\n", |
| "<title>spend->df_to_pickle_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1357.16,-373.74C1438.52,-368.03 1597.51,-353.88 1729.44,-324 1979.77,-267.29 2267.11,-156.34 2379.65,-111.01\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"2381.08,-114.21 2389.04,-107.22 2378.46,-107.72 2381.08,-114.21\"/>\n", |
| "</g>\n", |
| "<!-- spend_mean -->\n", |
| "<g id=\"node26\" class=\"node\">\n", |
| "<title>spend_mean</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"1468.44\" cy=\"-306\" rx=\"68.49\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"1468.44\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">spend_mean</text>\n", |
| "</g>\n", |
| "<!-- spend->spend_mean -->\n", |
| "<g id=\"edge67\" class=\"edge\">\n", |
| "<title>spend->spend_mean</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1343.8,-359.88C1368.34,-349.47 1399.67,-336.18 1424.78,-325.52\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1426.23,-328.71 1434.07,-321.58 1423.5,-322.27 1426.23,-328.71\"/>\n", |
| "</g>\n", |
| "<!-- spend_std_dev -->\n", |
| "<g id=\"node27\" class=\"node\">\n", |
| "<title>spend_std_dev</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"1303.44\" cy=\"-306\" rx=\"78.79\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"1303.44\" y=\"-302.3\" font-family=\"Times,serif\" font-size=\"14.00\">spend_std_dev</text>\n", |
| "</g>\n", |
| "<!-- spend->spend_std_dev -->\n", |
| "<g id=\"edge68\" class=\"edge\">\n", |
| "<title>spend->spend_std_dev</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1303.44,-359.7C1303.44,-351.98 1303.44,-342.71 1303.44,-334.11\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1306.94,-334.1 1303.44,-324.1 1299.94,-334.1 1306.94,-334.1\"/>\n", |
| "</g>\n", |
| "<!-- df_to_html -->\n", |
| "<g id=\"node6\" class=\"node\">\n", |
| "<title>df_to_html</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"1160.44,-36 1068.44,-36 1068.44,0 1160.44,0 1160.44,-36\"/>\n", |
| "<text text-anchor=\"middle\" x=\"1114.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_html</text>\n", |
| "</g>\n", |
| "<!-- df_to_html_build_result->df_to_html -->\n", |
| "<g id=\"edge14\" class=\"edge\">\n", |
| "<title>df_to_html_build_result->df_to_html</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1114.44,-71.7C1114.44,-63.98 1114.44,-54.71 1114.44,-46.11\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1117.94,-46.1 1114.44,-36.1 1110.94,-46.1 1117.94,-46.1\"/>\n", |
| "</g>\n", |
| "<!-- df_to_sql -->\n", |
| "<g id=\"node5\" class=\"node\">\n", |
| "<title>df_to_sql</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"1682.44,-36 1602.44,-36 1602.44,0 1682.44,0 1682.44,-36\"/>\n", |
| "<text text-anchor=\"middle\" x=\"1642.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_sql</text>\n", |
| "</g>\n", |
| "<!-- df_to_feather -->\n", |
| "<g id=\"node7\" class=\"node\">\n", |
| "<title>df_to_feather</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"1437.44,-36 1327.44,-36 1327.44,0 1437.44,0 1437.44,-36\"/>\n", |
| "<text text-anchor=\"middle\" x=\"1382.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_feather</text>\n", |
| "</g>\n", |
| "<!-- df_to_stata -->\n", |
| "<g id=\"node25\" class=\"node\">\n", |
| "<title>df_to_stata</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"410.94,-36 315.94,-36 315.94,0 410.94,0 410.94,-36\"/>\n", |
| "<text text-anchor=\"middle\" x=\"363.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_stata</text>\n", |
| "</g>\n", |
| "<!-- df_to_stata_build_result->df_to_stata -->\n", |
| "<g id=\"edge66\" class=\"edge\">\n", |
| "<title>df_to_stata_build_result->df_to_stata</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M363.44,-71.7C363.44,-63.98 363.44,-54.71 363.44,-46.11\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"366.94,-46.1 363.44,-36.1 359.94,-46.1 366.94,-46.1\"/>\n", |
| "</g>\n", |
| "<!-- df_to_csv -->\n", |
| "<g id=\"node9\" class=\"node\">\n", |
| "<title>df_to_csv</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"655.44,-36 573.44,-36 573.44,0 655.44,0 655.44,-36\"/>\n", |
| "<text text-anchor=\"middle\" x=\"614.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_csv</text>\n", |
| "</g>\n", |
| "<!-- avg_3wk_spend->df_to_orc_build_result -->\n", |
| "<g id=\"edge3\" class=\"edge\">\n", |
| "<title>avg_3wk_spend->df_to_orc_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1182.3,-152.71C1155.77,-149.55 1124.66,-146.18 1096.44,-144 713.7,-114.37 615.75,-142.75 233.44,-108 222.68,-107.02 211.42,-105.76 200.28,-104.37\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"200.46,-100.86 190.09,-103.06 199.56,-107.81 200.46,-100.86\"/>\n", |
| "</g>\n", |
| "<!-- avg_3wk_spend->df_to_html_build_result -->\n", |
| "<g id=\"edge10\" class=\"edge\">\n", |
| "<title>avg_3wk_spend->df_to_html_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1214.15,-143.88C1196.18,-134.35 1173.65,-122.4 1154.55,-112.27\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1155.96,-109.06 1145.49,-107.47 1152.68,-115.24 1155.96,-109.06\"/>\n", |
| "</g>\n", |
| "<!-- avg_3wk_spend->df_to_stata_build_result -->\n", |
| "<g id=\"edge18\" class=\"edge\">\n", |
| "<title>avg_3wk_spend->df_to_stata_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1182.28,-152.97C1155.74,-149.84 1124.64,-146.44 1096.44,-144 828.96,-120.87 760.71,-133.42 493.44,-108 481.93,-106.91 469.88,-105.58 457.94,-104.15\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"458.31,-100.67 447.96,-102.93 457.46,-107.62 458.31,-100.67\"/>\n", |
| "</g>\n", |
| "<!-- avg_3wk_spend->df_to_json_build_result -->\n", |
| "<g id=\"edge27\" class=\"edge\">\n", |
| "<title>avg_3wk_spend->df_to_json_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1182.43,-149.36C1116.98,-137.46 1015.06,-118.93 943.67,-105.95\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"944.27,-102.5 933.8,-104.16 943.02,-109.39 944.27,-102.5\"/>\n", |
| "</g>\n", |
| "<!-- avg_3wk_spend->df_to_csv_build_result -->\n", |
| "<g id=\"edge34\" class=\"edge\">\n", |
| "<title>avg_3wk_spend->df_to_csv_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1182.23,-153.53C1155.68,-150.5 1124.59,-147 1096.44,-144 936.55,-126.95 896.1,-127.1 736.44,-108 726.22,-106.78 715.53,-105.41 704.92,-104.01\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"705.15,-100.51 694.77,-102.64 704.22,-107.44 705.15,-100.51\"/>\n", |
| "</g>\n", |
| "<!-- avg_3wk_spend->df_to_feather_build_result -->\n", |
| "<g id=\"edge39\" class=\"edge\">\n", |
| "<title>avg_3wk_spend->df_to_feather_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1279.71,-143.88C1298.31,-134.3 1321.65,-122.29 1341.38,-112.13\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1343.16,-115.15 1350.45,-107.47 1339.96,-108.93 1343.16,-115.15\"/>\n", |
| "</g>\n", |
| "<!-- avg_3wk_spend->df_to_sql_build_result -->\n", |
| "<g id=\"edge44\" class=\"edge\">\n", |
| "<title>avg_3wk_spend->df_to_sql_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1310.48,-145.83C1313.51,-145.2 1316.5,-144.58 1319.44,-144 1397.75,-128.47 1487.38,-114.03 1552.33,-104.15\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1553.17,-107.56 1562.53,-102.61 1552.12,-100.64 1553.17,-107.56\"/>\n", |
| "</g>\n", |
| "<!-- avg_3wk_spend->df_to_parquet_build_result -->\n", |
| "<g id=\"edge51\" class=\"edge\">\n", |
| "<title>avg_3wk_spend->df_to_parquet_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1310.83,-145.46C1313.73,-144.93 1316.61,-144.44 1319.44,-144 1514.61,-113.52 1565.88,-127.66 1762.44,-108 1774.81,-106.76 1787.76,-105.37 1800.61,-103.92\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1801.34,-107.36 1810.88,-102.74 1800.55,-100.4 1801.34,-107.36\"/>\n", |
| "</g>\n", |
| "<!-- avg_3wk_spend->df_to_xml_build_result -->\n", |
| "<g id=\"edge58\" class=\"edge\">\n", |
| "<title>avg_3wk_spend->df_to_xml_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1310.81,-145.35C1313.72,-144.85 1316.6,-144.4 1319.44,-144 1640.7,-99.03 1725.46,-138.24 2048.44,-108 2059.4,-106.97 2070.87,-105.68 2082.23,-104.27\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"2082.68,-107.74 2092.16,-103 2081.8,-100.8 2082.68,-107.74\"/>\n", |
| "</g>\n", |
| "<!-- avg_3wk_spend->df_to_pickle_build_result -->\n", |
| "<g id=\"edge63\" class=\"edge\">\n", |
| "<title>avg_3wk_spend->df_to_pickle_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1310.8,-145.31C1313.71,-144.82 1316.6,-144.38 1319.44,-144 1749.25,-86.56 1862.25,-143.32 2294.44,-108 2306.7,-107 2319.57,-105.69 2332.28,-104.25\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"2332.89,-107.7 2342.42,-103.07 2332.08,-100.75 2332.89,-107.7\"/>\n", |
| "</g>\n", |
| "<!-- df_to_json -->\n", |
| "<g id=\"node11\" class=\"node\">\n", |
| "<title>df_to_json</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"905.44,-36 817.44,-36 817.44,0 905.44,0 905.44,-36\"/>\n", |
| "<text text-anchor=\"middle\" x=\"861.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_json</text>\n", |
| "</g>\n", |
| "<!-- df_to_parquet -->\n", |
| "<g id=\"node12\" class=\"node\">\n", |
| "<title>df_to_parquet</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"1962.94,-36 1847.94,-36 1847.94,0 1962.94,0 1962.94,-36\"/>\n", |
| "<text text-anchor=\"middle\" x=\"1905.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_parquet</text>\n", |
| "</g>\n", |
| "<!-- df_to_json_build_result->df_to_json -->\n", |
| "<g id=\"edge23\" class=\"edge\">\n", |
| "<title>df_to_json_build_result->df_to_json</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M861.44,-71.7C861.44,-63.98 861.44,-54.71 861.44,-46.11\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"864.94,-46.1 861.44,-36.1 857.94,-46.1 864.94,-46.1\"/>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean_unit_variance -->\n", |
| "<g id=\"node14\" class=\"node\">\n", |
| "<title>spend_zero_mean_unit_variance</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"1573.94,-180 1328.94,-180 1328.94,-144 1573.94,-144 1573.94,-180\"/>\n", |
| "<text text-anchor=\"middle\" x=\"1451.44\" y=\"-158.3\" font-family=\"Times,serif\" font-size=\"14.00\">spend_zero_mean_unit_variance</text>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean_unit_variance->df_to_orc_build_result -->\n", |
| "<g id=\"edge5\" class=\"edge\">\n", |
| "<title>spend_zero_mean_unit_variance->df_to_orc_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1328.87,-144.81C1325.7,-144.52 1322.55,-144.25 1319.44,-144 838.08,-105.11 714.57,-149.65 233.44,-108 222.6,-107.06 211.24,-105.82 200.02,-104.43\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"200.12,-100.91 189.76,-103.11 199.23,-107.85 200.12,-100.91\"/>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean_unit_variance->df_to_html_build_result -->\n", |
| "<g id=\"edge12\" class=\"edge\">\n", |
| "<title>spend_zero_mean_unit_variance->df_to_html_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1369.43,-143.97C1315.25,-132.71 1244.71,-118.06 1191.56,-107.02\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1192.01,-103.54 1181.51,-104.93 1190.58,-110.39 1192.01,-103.54\"/>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean_unit_variance->df_to_stata_build_result -->\n", |
| "<g id=\"edge20\" class=\"edge\">\n", |
| "<title>spend_zero_mean_unit_variance->df_to_stata_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1328.87,-144.86C1325.69,-144.55 1322.55,-144.27 1319.44,-144 953.33,-112.5 859.52,-139.85 493.44,-108 481.75,-106.98 469.49,-105.69 457.37,-104.26\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"457.59,-100.76 447.24,-103.04 456.75,-107.71 457.59,-100.76\"/>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean_unit_variance->df_to_json_build_result -->\n", |
| "<g id=\"edge29\" class=\"edge\">\n", |
| "<title>spend_zero_mean_unit_variance->df_to_json_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1328.84,-145.12C1325.68,-144.74 1322.54,-144.37 1319.44,-144 1171.61,-126.49 1134.21,-126.03 986.44,-108 976.23,-106.75 965.55,-105.39 954.93,-104\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"955.15,-100.5 944.77,-102.66 954.23,-107.44 955.15,-100.5\"/>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean_unit_variance->df_to_csv_build_result -->\n", |
| "<g id=\"edge36\" class=\"edge\">\n", |
| "<title>spend_zero_mean_unit_variance->df_to_csv_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1328.86,-144.93C1325.69,-144.61 1322.55,-144.3 1319.44,-144 1061,-119.41 994.75,-133.93 736.44,-108 725.77,-106.93 714.6,-105.62 703.53,-104.22\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"703.78,-100.72 693.41,-102.91 702.88,-107.67 703.78,-100.72\"/>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean_unit_variance->df_to_feather_build_result -->\n", |
| "<g id=\"edge41\" class=\"edge\">\n", |
| "<title>spend_zero_mean_unit_variance->df_to_feather_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1434.38,-143.7C1425.94,-135.14 1415.62,-124.66 1406.4,-115.3\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1408.81,-112.77 1399.3,-108.1 1403.83,-117.68 1408.81,-112.77\"/>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean_unit_variance->df_to_sql_build_result -->\n", |
| "<g id=\"edge46\" class=\"edge\">\n", |
| "<title>spend_zero_mean_unit_variance->df_to_sql_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1498.16,-143.88C1526.05,-133.66 1561.51,-120.66 1590.3,-110.11\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1591.6,-113.36 1599.79,-106.63 1589.19,-106.79 1591.6,-113.36\"/>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean_unit_variance->df_to_parquet_build_result -->\n", |
| "<g id=\"edge53\" class=\"edge\">\n", |
| "<title>spend_zero_mean_unit_variance->df_to_parquet_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1561.92,-143.97C1638.13,-132.22 1738.37,-116.76 1810.89,-105.58\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1811.58,-109.01 1820.93,-104.03 1810.51,-102.1 1811.58,-109.01\"/>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean_unit_variance->df_to_xml_build_result -->\n", |
| "<g id=\"edge60\" class=\"edge\">\n", |
| "<title>spend_zero_mean_unit_variance->df_to_xml_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1573.96,-151.88C1694.76,-142.48 1884.57,-126.59 2048.44,-108 2058.75,-106.83 2069.53,-105.5 2080.23,-104.11\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"2081,-107.54 2090.46,-102.76 2080.09,-100.6 2081,-107.54\"/>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean_unit_variance->df_to_pickle_build_result -->\n", |
| "<g id=\"edge65\" class=\"edge\">\n", |
| "<title>spend_zero_mean_unit_variance->df_to_pickle_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1574.14,-155.73C1738.41,-148.11 2038.78,-132.23 2294.44,-108 2306.2,-106.89 2318.53,-105.55 2330.74,-104.13\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"2331.43,-107.57 2340.95,-102.91 2330.61,-100.62 2331.43,-107.57\"/>\n", |
| "</g>\n", |
| "<!-- df_to_csv_build_result->df_to_csv -->\n", |
| "<g id=\"edge21\" class=\"edge\">\n", |
| "<title>df_to_csv_build_result->df_to_csv</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M614.44,-71.7C614.44,-63.98 614.44,-54.71 614.44,-46.11\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"617.94,-46.1 614.44,-36.1 610.94,-46.1 617.94,-46.1\"/>\n", |
| "</g>\n", |
| "<!-- df_to_feather_build_result->df_to_feather -->\n", |
| "<g id=\"edge15\" class=\"edge\">\n", |
| "<title>df_to_feather_build_result->df_to_feather</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1382.44,-71.7C1382.44,-63.98 1382.44,-54.71 1382.44,-46.11\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1385.94,-46.1 1382.44,-36.1 1378.94,-46.1 1385.94,-46.1\"/>\n", |
| "</g>\n", |
| "<!-- df_to_sql_build_result->df_to_sql -->\n", |
| "<g id=\"edge13\" class=\"edge\">\n", |
| "<title>df_to_sql_build_result->df_to_sql</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1642.44,-71.7C1642.44,-63.98 1642.44,-54.71 1642.44,-46.11\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1645.94,-46.1 1642.44,-36.1 1638.94,-46.1 1645.94,-46.1\"/>\n", |
| "</g>\n", |
| "<!-- df_to_pickle -->\n", |
| "<g id=\"node18\" class=\"node\">\n", |
| "<title>df_to_pickle</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"2478.94,-36 2377.94,-36 2377.94,0 2478.94,0 2478.94,-36\"/>\n", |
| "<text text-anchor=\"middle\" x=\"2428.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_pickle</text>\n", |
| "</g>\n", |
| "<!-- df_to_parquet_build_result->df_to_parquet -->\n", |
| "<g id=\"edge24\" class=\"edge\">\n", |
| "<title>df_to_parquet_build_result->df_to_parquet</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1905.44,-71.7C1905.44,-63.98 1905.44,-54.71 1905.44,-46.11\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1908.94,-46.1 1905.44,-36.1 1901.94,-46.1 1908.94,-46.1\"/>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean->spend_zero_mean_unit_variance -->\n", |
| "<g id=\"edge30\" class=\"edge\">\n", |
| "<title>spend_zero_mean->spend_zero_mean_unit_variance</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1464.24,-215.7C1462.34,-207.9 1460.06,-198.51 1457.96,-189.83\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1461.36,-189 1455.59,-180.1 1454.55,-190.65 1461.36,-189\"/>\n", |
| "</g>\n", |
| "<!-- df_to_xml -->\n", |
| "<g id=\"node28\" class=\"node\">\n", |
| "<title>df_to_xml</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"2213.94,-36 2128.94,-36 2128.94,0 2213.94,0 2213.94,-36\"/>\n", |
| "<text text-anchor=\"middle\" x=\"2171.44\" y=\"-14.3\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_xml</text>\n", |
| "</g>\n", |
| "<!-- df_to_xml_build_result->df_to_xml -->\n", |
| "<g id=\"edge69\" class=\"edge\">\n", |
| "<title>df_to_xml_build_result->df_to_xml</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M2171.44,-71.7C2171.44,-63.98 2171.44,-54.71 2171.44,-46.11\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"2174.94,-46.1 2171.44,-36.1 2167.94,-46.1 2174.94,-46.1\"/>\n", |
| "</g>\n", |
| "<!-- df_to_pickle_build_result->df_to_pickle -->\n", |
| "<g id=\"edge47\" class=\"edge\">\n", |
| "<title>df_to_pickle_build_result->df_to_pickle</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M2428.44,-71.7C2428.44,-63.98 2428.44,-54.71 2428.44,-46.11\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"2431.94,-46.1 2428.44,-36.1 2424.94,-46.1 2431.94,-46.1\"/>\n", |
| "</g>\n", |
| "<!-- signups -->\n", |
| "<g id=\"node24\" class=\"node\">\n", |
| "<title>signups</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"1167.94,-252 1048.94,-252 1048.94,-216 1167.94,-216 1167.94,-252\"/>\n", |
| "<text text-anchor=\"middle\" x=\"1108.44\" y=\"-230.3\" font-family=\"Times,serif\" font-size=\"14.00\">Input: signups</text>\n", |
| "</g>\n", |
| "<!-- signups->df_to_orc_build_result -->\n", |
| "<g id=\"edge2\" class=\"edge\">\n", |
| "<title>signups->df_to_orc_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1048.94,-228.25C954.03,-220.35 762.64,-203.06 601.44,-180 454.38,-158.96 283.97,-125.97 188.12,-106.62\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"188.72,-103.17 178.22,-104.61 187.33,-110.03 188.72,-103.17\"/>\n", |
| "</g>\n", |
| "<!-- signups->spend_per_signup -->\n", |
| "<g id=\"edge7\" class=\"edge\">\n", |
| "<title>signups->spend_per_signup</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1085.45,-215.7C1073.63,-206.8 1059.05,-195.82 1046.26,-186.2\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1048.26,-183.32 1038.16,-180.1 1044.05,-188.91 1048.26,-183.32\"/>\n", |
| "</g>\n", |
| "<!-- signups->df_to_html_build_result -->\n", |
| "<g id=\"edge9\" class=\"edge\">\n", |
| "<title>signups->df_to_html_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1109.16,-215.87C1110.19,-191.67 1112.06,-147.21 1113.28,-118.39\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1116.79,-118.33 1113.71,-108.19 1109.79,-118.03 1116.79,-118.33\"/>\n", |
| "</g>\n", |
| "<!-- signups->df_to_stata_build_result -->\n", |
| "<g id=\"edge17\" class=\"edge\">\n", |
| "<title>signups->df_to_stata_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1048.84,-224.57C981.89,-214.86 870.09,-197.93 774.44,-180 654.92,-157.6 516.95,-126.67 435.13,-107.8\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"435.72,-104.34 425.19,-105.5 434.15,-111.16 435.72,-104.34\"/>\n", |
| "</g>\n", |
| "<!-- signups->df_to_json_build_result -->\n", |
| "<g id=\"edge26\" class=\"edge\">\n", |
| "<title>signups->df_to_json_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1048.69,-224.04C1012.96,-216.55 967.94,-203.22 933.44,-180 909.13,-163.64 888.94,-136.61 876.19,-116.62\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"879.1,-114.67 870.87,-108 873.14,-118.35 879.1,-114.67\"/>\n", |
| "</g>\n", |
| "<!-- signups->df_to_csv_build_result -->\n", |
| "<g id=\"edge33\" class=\"edge\">\n", |
| "<title>signups->df_to_csv_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1048.84,-219.44C1005.53,-209.31 945.61,-194.73 893.44,-180 815.2,-157.91 725.85,-128.69 669.8,-109.87\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"670.66,-106.47 660.07,-106.59 668.43,-113.1 670.66,-106.47\"/>\n", |
| "</g>\n", |
| "<!-- signups->df_to_feather_build_result -->\n", |
| "<g id=\"edge38\" class=\"edge\">\n", |
| "<title>signups->df_to_feather_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1116.39,-215.78C1126.67,-195.48 1146.49,-162.21 1173.44,-144 1202.8,-124.16 1239.02,-111.75 1273.02,-103.98\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1273.98,-107.36 1283.01,-101.82 1272.5,-100.51 1273.98,-107.36\"/>\n", |
| "</g>\n", |
| "<!-- signups->df_to_sql_build_result -->\n", |
| "<g id=\"edge43\" class=\"edge\">\n", |
| "<title>signups->df_to_sql_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1168.31,-229.15C1288.07,-221.05 1548.7,-201.22 1583.44,-180 1606.93,-165.65 1623.02,-138.01 1632.39,-117.32\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1635.62,-118.66 1636.34,-108.09 1629.19,-115.91 1635.62,-118.66\"/>\n", |
| "</g>\n", |
| "<!-- signups->df_to_parquet_build_result -->\n", |
| "<g id=\"edge50\" class=\"edge\">\n", |
| "<title>signups->df_to_parquet_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1168.09,-230.44C1263.14,-225.53 1454.47,-212.26 1613.44,-180 1698.62,-162.71 1794.51,-131.05 1852.42,-110.52\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1853.84,-113.73 1862.08,-107.07 1851.49,-107.14 1853.84,-113.73\"/>\n", |
| "</g>\n", |
| "<!-- signups->df_to_xml_build_result -->\n", |
| "<g id=\"edge57\" class=\"edge\">\n", |
| "<title>signups->df_to_xml_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1168.23,-229.48C1274.61,-222.8 1503.62,-206.53 1695.44,-180 1838.66,-160.19 2004.28,-126.83 2097.56,-107.06\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"2098.41,-110.46 2107.47,-104.96 2096.96,-103.62 2098.41,-110.46\"/>\n", |
| "</g>\n", |
| "<!-- signups->df_to_pickle_build_result -->\n", |
| "<g id=\"edge62\" class=\"edge\">\n", |
| "<title>signups->df_to_pickle_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1167.94,-229.06C1314.28,-219.27 1688.84,-193.68 1813.44,-180 2001.52,-159.35 2220.56,-125.22 2339.85,-105.77\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"2340.72,-109.18 2350.02,-104.11 2339.59,-102.27 2340.72,-109.18\"/>\n", |
| "</g>\n", |
| "<!-- spend_mean->spend_zero_mean -->\n", |
| "<g id=\"edge55\" class=\"edge\">\n", |
| "<title>spend_mean->spend_zero_mean</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1468.44,-287.7C1468.44,-279.98 1468.44,-270.71 1468.44,-262.11\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1471.94,-262.1 1468.44,-252.1 1464.94,-262.1 1471.94,-262.1\"/>\n", |
| "</g>\n", |
| "<!-- spend_std_dev->spend_zero_mean_unit_variance -->\n", |
| "<g id=\"edge31\" class=\"edge\">\n", |
| "<title>spend_std_dev->spend_zero_mean_unit_variance</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1312.65,-288.07C1323.64,-268.96 1343.44,-237.69 1366.44,-216 1378.91,-204.24 1394.37,-193.68 1408.68,-185.14\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1410.5,-188.13 1417.4,-180.09 1406.99,-182.07 1410.5,-188.13\"/>\n", |
| "</g>\n", |
| "</g>\n", |
| "</svg>\n" |
| ], |
| "text/plain": [ |
| "<graphviz.graphs.Digraph at 0x7f2ab46e3d60>" |
| ] |
| }, |
| "execution_count": 9, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "materializers = [\n", |
| " # materialize the dataframe to a pickle file\n", |
| " to.pickle(\n", |
| " dependencies=output_columns,\n", |
| " id=\"df_to_pickle\",\n", |
| " path=\"./df.pkl\",\n", |
| " combine=df_builder,\n", |
| " ),\n", |
| " # materialize the dataframe to a JSON file\n", |
| " to.json(\n", |
| " dependencies=output_columns,\n", |
| " id=\"df_to_json\",\n", |
| " filepath_or_buffer=\"./df.json\",\n", |
| " combine=df_builder,\n", |
| " ),\n", |
| " to.sql(\n", |
| " dependencies=output_columns,\n", |
| " id=\"df_to_sql\",\n", |
| " table_name=\"test\",\n", |
| " db_connection=conn,\n", |
| " combine=df_builder,\n", |
| " ),\n", |
| " # materialize the dataframe to a XML file\n", |
| " to.xml(\n", |
| " dependencies=output_columns,\n", |
| " id=\"df_to_xml\",\n", |
| " path_or_buffer=\"./df.xml\",\n", |
| " combine=df_builder,\n", |
| " ),\n", |
| " to.html(\n", |
| " dependencies=output_columns,\n", |
| " id=\"df_to_html\",\n", |
| " buf=\"./df.html\",\n", |
| " combine=df_builder,\n", |
| " ),\n", |
| " to.stata(\n", |
| " dependencies=output_columns,\n", |
| " id=\"df_to_stata\",\n", |
| " path=\"./df.dta\",\n", |
| " combine=df_builder,\n", |
| " ),\n", |
| " to.feather(\n", |
| " dependencies=output_columns,\n", |
| " id=\"df_to_feather\",\n", |
| " path=\"./df.feather\",\n", |
| " combine=df_builder,\n", |
| " ),\n", |
| " to.parquet(\n", |
| " dependencies=output_columns,\n", |
| " id=\"df_to_parquet\",\n", |
| " path=\"./df.parquet.gzip\",\n", |
| " combine=df_builder,\n", |
| " ),\n", |
| " to.csv(\n", |
| " dependencies=output_columns,\n", |
| " id=\"df_to_csv\",\n", |
| " path=\"./df.csv\",\n", |
| " combine=df_builder,\n", |
| " ),\n", |
| " to.orc(\n", |
| " dependencies=output_columns,\n", |
| " id=\"df_to_orc\",\n", |
| " path=\"./df.orc\",\n", |
| " combine=df_builder,\n", |
| " ),\n", |
| "]\n", |
| "# Visualize what is happening\n", |
| "dr.visualize_materialization(\n", |
| " *materializers,\n", |
| " additional_vars=output_columns,\n", |
| " inputs=initial_columns,\n", |
| ")" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 10, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-09-17T05:43:34.026203Z", |
| "start_time": "2023-09-17T05:43:34.016610Z" |
| } |
| }, |
| "outputs": [ |
| { |
| "name": "stderr", |
| "output_type": "stream", |
| "text": [ |
| "More than one applicable adapter detected for <class 'pandas.core.frame.DataFrame'>. Using the last one registered <class 'hamilton.plugins.pandas_extensions.PandasPickleWriter'>.\n" |
| ] |
| } |
| ], |
| "source": [ |
| "# Materialize a result, i.e. execute the DAG!\n", |
| "materialization_results, additional_outputs = dr.materialize(\n", |
| " *materializers,\n", |
| " additional_vars=[\n", |
| " \"df_to_pickle_build_result\",\n", |
| " \"df_to_json_build_result\",\n", |
| " \"df_to_sql_build_result\",\n", |
| " \"df_to_xml_build_result\",\n", |
| " \"df_to_html_build_result\",\n", |
| " \"df_to_stata_build_result\",\n", |
| " \"df_to_feather_build_result\",\n", |
| " \"df_to_parquet_build_result\",\n", |
| " \"df_to_csv_build_result\",\n", |
| " \"df_to_orc_build_result\",\n", |
| " ], # because combine is used, we can get that result here.\n", |
| " inputs=initial_columns,\n", |
| ")" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 11, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-09-17T05:43:34.041466Z", |
| "start_time": "2023-09-17T05:43:34.028346Z" |
| } |
| }, |
| "outputs": [ |
| { |
| "data": { |
| "text/plain": [ |
| "{'df_to_pickle': {'size': 1088,\n", |
| " 'path': './df.pkl',\n", |
| " 'last_modified': 1698716078.9096448,\n", |
| " 'timestamp': 1698741278.914183},\n", |
| " 'df_to_json': {'size': 428,\n", |
| " 'path': './df.json',\n", |
| " 'last_modified': 1698716078.9096448,\n", |
| " 'timestamp': 1698741278.916944},\n", |
| " 'df_to_sql': {'rows': 6,\n", |
| " 'query': None,\n", |
| " 'table_name': 'test',\n", |
| " 'timestamp': 1698741278.927032},\n", |
| " 'df_to_xml': {'size': 1622,\n", |
| " 'path': './df.xml',\n", |
| " 'last_modified': 1698716078.9616463,\n", |
| " 'timestamp': 1698741278.966294},\n", |
| " 'df_to_html': {'size': 1145,\n", |
| " 'path': './df.html',\n", |
| " 'last_modified': 1698716078.9696465,\n", |
| " 'timestamp': 1698741278.974224},\n", |
| " 'df_to_stata': {'size': 1526,\n", |
| " 'path': './df.dta',\n", |
| " 'last_modified': 1698716078.985647,\n", |
| " 'timestamp': 1698741278.992052},\n", |
| " 'df_to_feather': {'size': 3522,\n", |
| " 'path': './df.feather',\n", |
| " 'last_modified': 1698716079.021648,\n", |
| " 'timestamp': 1698741279.028492},\n", |
| " 'df_to_parquet': {'size': 4705,\n", |
| " 'path': './df.parquet.gzip',\n", |
| " 'last_modified': 1698716079.0456486,\n", |
| " 'timestamp': 1698741279.053797},\n", |
| " 'df_to_csv': {'size': 335,\n", |
| " 'path': './df.csv',\n", |
| " 'last_modified': 1698716079.0536487,\n", |
| " 'timestamp': 1698741279.059399},\n", |
| " 'df_to_orc': {'size': 954,\n", |
| " 'path': './df.orc',\n", |
| " 'last_modified': 1698716079.065649,\n", |
| " 'timestamp': 1698741279.07081}}" |
| ] |
| }, |
| "execution_count": 11, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "materialization_results" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 12, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-09-17T05:43:34.051151Z", |
| "start_time": "2023-09-17T05:43:34.043320Z" |
| } |
| }, |
| "outputs": [ |
| { |
| "data": { |
| "text/html": [ |
| "<div>\n", |
| "<style scoped>\n", |
| " .dataframe tbody tr th:only-of-type {\n", |
| " vertical-align: middle;\n", |
| " }\n", |
| "\n", |
| " .dataframe tbody tr th {\n", |
| " vertical-align: top;\n", |
| " }\n", |
| "\n", |
| " .dataframe thead th {\n", |
| " text-align: right;\n", |
| " }\n", |
| "</style>\n", |
| "<table border=\"1\" class=\"dataframe\">\n", |
| " <thead>\n", |
| " <tr style=\"text-align: right;\">\n", |
| " <th></th>\n", |
| " <th>spend</th>\n", |
| " <th>signups</th>\n", |
| " <th>avg_3wk_spend</th>\n", |
| " <th>spend_per_signup</th>\n", |
| " <th>spend_zero_mean_unit_variance</th>\n", |
| " </tr>\n", |
| " </thead>\n", |
| " <tbody>\n", |
| " <tr>\n", |
| " <th>0</th>\n", |
| " <td>10</td>\n", |
| " <td>1</td>\n", |
| " <td>NaN</td>\n", |
| " <td>10.000</td>\n", |
| " <td>-1.064405</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <th>1</th>\n", |
| " <td>10</td>\n", |
| " <td>10</td>\n", |
| " <td>NaN</td>\n", |
| " <td>1.000</td>\n", |
| " <td>-1.064405</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <th>2</th>\n", |
| " <td>20</td>\n", |
| " <td>50</td>\n", |
| " <td>13.333333</td>\n", |
| " <td>0.400</td>\n", |
| " <td>-0.483821</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <th>3</th>\n", |
| " <td>40</td>\n", |
| " <td>100</td>\n", |
| " <td>23.333333</td>\n", |
| " <td>0.400</td>\n", |
| " <td>0.677349</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <th>4</th>\n", |
| " <td>40</td>\n", |
| " <td>200</td>\n", |
| " <td>33.333333</td>\n", |
| " <td>0.200</td>\n", |
| " <td>0.677349</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <th>5</th>\n", |
| " <td>50</td>\n", |
| " <td>400</td>\n", |
| " <td>43.333333</td>\n", |
| " <td>0.125</td>\n", |
| " <td>1.257934</td>\n", |
| " </tr>\n", |
| " </tbody>\n", |
| "</table>\n", |
| "</div>" |
| ], |
| "text/plain": [ |
| " spend signups avg_3wk_spend spend_per_signup \\\n", |
| "0 10 1 NaN 10.000 \n", |
| "1 10 10 NaN 1.000 \n", |
| "2 20 50 13.333333 0.400 \n", |
| "3 40 100 23.333333 0.400 \n", |
| "4 40 200 33.333333 0.200 \n", |
| "5 50 400 43.333333 0.125 \n", |
| "\n", |
| " spend_zero_mean_unit_variance \n", |
| "0 -1.064405 \n", |
| "1 -1.064405 \n", |
| "2 -0.483821 \n", |
| "3 0.677349 \n", |
| "4 0.677349 \n", |
| "5 1.257934 " |
| ] |
| }, |
| "execution_count": 12, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "additional_outputs[\"df_to_pickle_build_result\"]" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 13, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-09-17T05:43:34.058608Z", |
| "start_time": "2023-09-17T05:43:34.048662Z" |
| }, |
| "pycharm": { |
| "name": "#%%\n" |
| } |
| }, |
| "outputs": [ |
| { |
| "data": { |
| "text/html": [ |
| "<div>\n", |
| "<style scoped>\n", |
| " .dataframe tbody tr th:only-of-type {\n", |
| " vertical-align: middle;\n", |
| " }\n", |
| "\n", |
| " .dataframe tbody tr th {\n", |
| " vertical-align: top;\n", |
| " }\n", |
| "\n", |
| " .dataframe thead th {\n", |
| " text-align: right;\n", |
| " }\n", |
| "</style>\n", |
| "<table border=\"1\" class=\"dataframe\">\n", |
| " <thead>\n", |
| " <tr style=\"text-align: right;\">\n", |
| " <th></th>\n", |
| " <th>spend</th>\n", |
| " <th>signups</th>\n", |
| " <th>avg_3wk_spend</th>\n", |
| " <th>spend_per_signup</th>\n", |
| " <th>spend_zero_mean_unit_variance</th>\n", |
| " </tr>\n", |
| " </thead>\n", |
| " <tbody>\n", |
| " <tr>\n", |
| " <th>0</th>\n", |
| " <td>10</td>\n", |
| " <td>1</td>\n", |
| " <td>NaN</td>\n", |
| " <td>10.000</td>\n", |
| " <td>-1.064405</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <th>1</th>\n", |
| " <td>10</td>\n", |
| " <td>10</td>\n", |
| " <td>NaN</td>\n", |
| " <td>1.000</td>\n", |
| " <td>-1.064405</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <th>2</th>\n", |
| " <td>20</td>\n", |
| " <td>50</td>\n", |
| " <td>13.333333</td>\n", |
| " <td>0.400</td>\n", |
| " <td>-0.483821</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <th>3</th>\n", |
| " <td>40</td>\n", |
| " <td>100</td>\n", |
| " <td>23.333333</td>\n", |
| " <td>0.400</td>\n", |
| " <td>0.677349</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <th>4</th>\n", |
| " <td>40</td>\n", |
| " <td>200</td>\n", |
| " <td>33.333333</td>\n", |
| " <td>0.200</td>\n", |
| " <td>0.677349</td>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <th>5</th>\n", |
| " <td>50</td>\n", |
| " <td>400</td>\n", |
| " <td>43.333333</td>\n", |
| " <td>0.125</td>\n", |
| " <td>1.257934</td>\n", |
| " </tr>\n", |
| " </tbody>\n", |
| "</table>\n", |
| "</div>" |
| ], |
| "text/plain": [ |
| " spend signups avg_3wk_spend spend_per_signup \\\n", |
| "0 10 1 NaN 10.000 \n", |
| "1 10 10 NaN 1.000 \n", |
| "2 20 50 13.333333 0.400 \n", |
| "3 40 100 23.333333 0.400 \n", |
| "4 40 200 33.333333 0.200 \n", |
| "5 50 400 43.333333 0.125 \n", |
| "\n", |
| " spend_zero_mean_unit_variance \n", |
| "0 -1.064405 \n", |
| "1 -1.064405 \n", |
| "2 -0.483821 \n", |
| "3 0.677349 \n", |
| "4 0.677349 \n", |
| "5 1.257934 " |
| ] |
| }, |
| "execution_count": 13, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "additional_outputs[\"df_to_json_build_result\"]" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 14, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-09-17T05:43:34.089706Z", |
| "start_time": "2023-09-17T05:43:34.060251Z" |
| } |
| }, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| " spend signups avg_3wk_spend spend_per_signup \\\n", |
| "0 10 1 NaN 10.000 \n", |
| "1 10 10 NaN 1.000 \n", |
| "2 20 50 13.333333 0.400 \n", |
| "3 40 100 23.333333 0.400 \n", |
| "4 40 200 33.333333 0.200 \n", |
| "5 50 400 43.333333 0.125 \n", |
| "\n", |
| " spend_zero_mean_unit_variance \n", |
| "0 -1.064405 \n", |
| "1 -1.064405 \n", |
| "2 -0.483821 \n", |
| "3 0.677349 \n", |
| "4 0.677349 \n", |
| "5 1.257934 \n" |
| ] |
| } |
| ], |
| "source": [ |
| "print(additional_outputs[\"df_to_sql_build_result\"])" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 15, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| " spend signups avg_3wk_spend spend_per_signup \\\n", |
| "0 10 1 NaN 10.000 \n", |
| "1 10 10 NaN 1.000 \n", |
| "2 20 50 13.333333 0.400 \n", |
| "3 40 100 23.333333 0.400 \n", |
| "4 40 200 33.333333 0.200 \n", |
| "5 50 400 43.333333 0.125 \n", |
| "\n", |
| " spend_zero_mean_unit_variance \n", |
| "0 -1.064405 \n", |
| "1 -1.064405 \n", |
| "2 -0.483821 \n", |
| "3 0.677349 \n", |
| "4 0.677349 \n", |
| "5 1.257934 \n" |
| ] |
| } |
| ], |
| "source": [ |
| "print(additional_outputs[\"df_to_xml_build_result\"])" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 16, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| " spend signups avg_3wk_spend spend_per_signup \\\n", |
| "0 10 1 NaN 10.000 \n", |
| "1 10 10 NaN 1.000 \n", |
| "2 20 50 13.333333 0.400 \n", |
| "3 40 100 23.333333 0.400 \n", |
| "4 40 200 33.333333 0.200 \n", |
| "5 50 400 43.333333 0.125 \n", |
| "\n", |
| " spend_zero_mean_unit_variance \n", |
| "0 -1.064405 \n", |
| "1 -1.064405 \n", |
| "2 -0.483821 \n", |
| "3 0.677349 \n", |
| "4 0.677349 \n", |
| "5 1.257934 \n" |
| ] |
| } |
| ], |
| "source": [ |
| "print(additional_outputs[\"df_to_html_build_result\"])" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 17, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| " spend signups avg_3wk_spend spend_per_signup \\\n", |
| "0 10 1 NaN 10.000 \n", |
| "1 10 10 NaN 1.000 \n", |
| "2 20 50 13.333333 0.400 \n", |
| "3 40 100 23.333333 0.400 \n", |
| "4 40 200 33.333333 0.200 \n", |
| "5 50 400 43.333333 0.125 \n", |
| "\n", |
| " spend_zero_mean_unit_variance \n", |
| "0 -1.064405 \n", |
| "1 -1.064405 \n", |
| "2 -0.483821 \n", |
| "3 0.677349 \n", |
| "4 0.677349 \n", |
| "5 1.257934 \n" |
| ] |
| } |
| ], |
| "source": [ |
| "print(additional_outputs[\"df_to_stata_build_result\"])" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 18, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| " spend signups avg_3wk_spend spend_per_signup \\\n", |
| "0 10 1 NaN 10.000 \n", |
| "1 10 10 NaN 1.000 \n", |
| "2 20 50 13.333333 0.400 \n", |
| "3 40 100 23.333333 0.400 \n", |
| "4 40 200 33.333333 0.200 \n", |
| "5 50 400 43.333333 0.125 \n", |
| "\n", |
| " spend_zero_mean_unit_variance \n", |
| "0 -1.064405 \n", |
| "1 -1.064405 \n", |
| "2 -0.483821 \n", |
| "3 0.677349 \n", |
| "4 0.677349 \n", |
| "5 1.257934 \n" |
| ] |
| } |
| ], |
| "source": [ |
| "print(additional_outputs[\"df_to_feather_build_result\"])" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 19, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| " spend signups avg_3wk_spend spend_per_signup \\\n", |
| "0 10 1 NaN 10.000 \n", |
| "1 10 10 NaN 1.000 \n", |
| "2 20 50 13.333333 0.400 \n", |
| "3 40 100 23.333333 0.400 \n", |
| "4 40 200 33.333333 0.200 \n", |
| "5 50 400 43.333333 0.125 \n", |
| "\n", |
| " spend_zero_mean_unit_variance \n", |
| "0 -1.064405 \n", |
| "1 -1.064405 \n", |
| "2 -0.483821 \n", |
| "3 0.677349 \n", |
| "4 0.677349 \n", |
| "5 1.257934 \n" |
| ] |
| } |
| ], |
| "source": [ |
| "print(additional_outputs[\"df_to_parquet_build_result\"])" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 20, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| " spend signups avg_3wk_spend spend_per_signup \\\n", |
| "0 10 1 NaN 10.000 \n", |
| "1 10 10 NaN 1.000 \n", |
| "2 20 50 13.333333 0.400 \n", |
| "3 40 100 23.333333 0.400 \n", |
| "4 40 200 33.333333 0.200 \n", |
| "5 50 400 43.333333 0.125 \n", |
| "\n", |
| " spend_zero_mean_unit_variance \n", |
| "0 -1.064405 \n", |
| "1 -1.064405 \n", |
| "2 -0.483821 \n", |
| "3 0.677349 \n", |
| "4 0.677349 \n", |
| "5 1.257934 \n" |
| ] |
| } |
| ], |
| "source": [ |
| "print(additional_outputs[\"df_to_csv_build_result\"])" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 21, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| " spend signups avg_3wk_spend spend_per_signup \\\n", |
| "0 10 1 NaN 10.000 \n", |
| "1 10 10 NaN 1.000 \n", |
| "2 20 50 13.333333 0.400 \n", |
| "3 40 100 23.333333 0.400 \n", |
| "4 40 200 33.333333 0.200 \n", |
| "5 50 400 43.333333 0.125 \n", |
| "\n", |
| " spend_zero_mean_unit_variance \n", |
| "0 -1.064405 \n", |
| "1 -1.064405 \n", |
| "2 -0.483821 \n", |
| "3 0.677349 \n", |
| "4 0.677349 \n", |
| "5 1.257934 \n" |
| ] |
| } |
| ], |
| "source": [ |
| "print(additional_outputs[\"df_to_orc_build_result\"])" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 21, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-09-17T05:43:34.090208Z", |
| "start_time": "2023-09-17T05:43:34.066483Z" |
| } |
| }, |
| "outputs": [], |
| "source": [ |
| "# closing out db connection\n", |
| "conn.close()" |
| ] |
| } |
| ], |
| "metadata": { |
| "kernelspec": { |
| "display_name": "Python 3", |
| "language": "python", |
| "name": "python3" |
| }, |
| "language_info": { |
| "codemirror_mode": { |
| "name": "ipython", |
| "version": 3 |
| }, |
| "file_extension": ".py", |
| "mimetype": "text/x-python", |
| "name": "python", |
| "nbconvert_exporter": "python", |
| "pygments_lexer": "ipython3", |
| "version": "3.10.12" |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 4 |
| } |