blob: e652de8d2c82739e5444d6cde87bac115bfd7a56 [file]
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-05T16:36:33.343763Z",
"start_time": "2023-10-05T16:36:33.337662Z"
}
},
"outputs": [],
"source": [
"# update the polars package - optional\n",
"# !pip install --upgrade polars"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-05T16:36:35.401649Z",
"start_time": "2023-10-05T16:36:33.838727Z"
}
},
"outputs": [],
"source": [
"import polars as pl\n",
"\n",
"# Add the hamilton module to your path - optinal\n",
"# project_dir = \"### ADD PATH HERE ###\"\n",
"# sys.path.append(project_dir)\n",
"\n",
"from hamilton import driver\n",
"from hamilton.io.materialization import to\n",
"from hamilton.plugins import h_polars"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-05T16:36:35.465307Z",
"start_time": "2023-10-05T16:36:35.406399Z"
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"# We use the autoreload extension that comes with ipython to automatically reload modules when\n",
"# the code in them changes.\n",
"\n",
"# import the jupyter extension\n",
"%load_ext autoreload\n",
"# set it to only reload the modules imported\n",
"%autoreload 1"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-05T16:36:36.389233Z",
"start_time": "2023-10-05T16:36:36.380788Z"
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting spend_calculations.py\n"
]
}
],
"source": [
"%%writefile spend_calculations.py\n",
"# Define your new Hamilton functions.\n",
"# The %%writefile magic command creates a new Python module with the functions below.\n",
"# We will import this later and pass it into our Driver.\n",
"\n",
"import polars as pl\n",
"\n",
"# Look at `my_functions` to see how these functions connect.\n",
"def avg_3wk_spend(spend: pl.Series) -> pl.Series:\n",
" \"\"\"Rolling 3 week average spend.\"\"\"\n",
" return spend.rolling_mean(3)\n",
"\n",
"\n",
"def spend_per_signup(spend: pl.Series, signups: pl.Series) -> pl.Series:\n",
" \"\"\"The cost per signup in relation to spend.\"\"\"\n",
" return spend / signups\n",
"\n",
"\n",
"def spend_mean(spend: pl.Series) -> float:\n",
" \"\"\"Shows function creating a scalar. In this case it computes the mean of the entire column.\"\"\"\n",
" return spend.mean()\n",
"\n",
"\n",
"def spend_zero_mean(spend: pl.Series, spend_mean: float) -> pl.Series:\n",
" \"\"\"Shows function that takes a scalar. In this case to zero mean spend.\"\"\"\n",
" return spend - spend_mean\n",
"\n",
"\n",
"def spend_std_dev(spend: pl.Series) -> float:\n",
" \"\"\"Function that computes the standard deviation of the spend column.\"\"\"\n",
" return spend.std()\n",
"\n",
"\n",
"def spend_zero_mean_unit_variance(spend_zero_mean: pl.Series, spend_std_dev: float) -> pl.Series:\n",
" \"\"\"Function showing one way to make spend have zero mean and unit variance.\"\"\"\n",
" return spend_zero_mean / spend_std_dev"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-05T16:36:36.947063Z",
"start_time": "2023-10-05T16:36:36.938015Z"
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"initial_columns = { # load from actuals or wherever -- this is our initial data we use as input.\n",
" # Note: these values don't have to be all series, they could be a scalar.\n",
" \"signups\": pl.Series([1, 10, 50, 100, 200, 400]),\n",
" \"spend\": pl.Series([10, 10, 20, 40, 40, 50]),\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-05T16:36:37.496013Z",
"start_time": "2023-10-05T16:36:37.447521Z"
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Note: Hamilton collects completely anonymous data about usage. This will help us improve Hamilton over time. See https://github.com/dagworks-inc/hamilton#usage-analytics--data-privacy for details.\n"
]
}
],
"source": [
"%aimport spend_calculations\n",
"\n",
"df_builder = h_polars.PolarsDataFrameResult()\n",
"dr = driver.Driver({}, spend_calculations) # can pass in multiple modules"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-05T16:36:38.178430Z",
"start_time": "2023-10-05T16:36:38.162359Z"
}
},
"outputs": [],
"source": [
"# we need to specify what we want in the final dataframe. These can be string names, or function references.\n",
"output_columns = [\n",
" \"spend\",\n",
" \"signups\",\n",
" \"avg_3wk_spend\",\n",
" \"spend_per_signup\",\n",
" \"spend_zero_mean_unit_variance\",\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-05T16:36:39.212897Z",
"start_time": "2023-10-05T16:36:39.202499Z"
},
"collapsed": false
},
"outputs": [],
"source": [
"# pass"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-05T16:36:39.908587Z",
"start_time": "2023-10-05T16:36:39.637374Z"
}
},
"outputs": [
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 8.1.0 (20230707.0739)\n",
" -->\n",
"<!-- Pages: 1 -->\n",
"<svg width=\"900pt\" height=\"404pt\"\n",
" viewBox=\"0.00 0.00 899.92 404.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 400)\">\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-400 895.92,-400 895.92,4 -4,4\"/>\n",
"<!-- df_to_json -->\n",
"<g id=\"node1\" class=\"node\">\n",
"<title>df_to_json</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"135.82,-36 62.07,-36 62.07,0 135.82,0 135.82,-36\"/>\n",
"<text text-anchor=\"middle\" x=\"98.95\" y=\"-12.95\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_json</text>\n",
"</g>\n",
"<!-- spend_zero_mean_unit_variance -->\n",
"<g id=\"node2\" class=\"node\">\n",
"<title>spend_zero_mean_unit_variance</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"359.2,-180 164.7,-180 164.7,-144 359.2,-144 359.2,-180\"/>\n",
"<text text-anchor=\"middle\" x=\"261.95\" y=\"-156.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_zero_mean_unit_variance</text>\n",
"</g>\n",
"<!-- df_to_avro_build_result -->\n",
"<g id=\"node6\" class=\"node\">\n",
"<title>df_to_avro_build_result</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"791.95\" cy=\"-90\" rx=\"99.97\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"791.95\" y=\"-84.95\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_avro_build_result</text>\n",
"</g>\n",
"<!-- spend_zero_mean_unit_variance&#45;&gt;df_to_avro_build_result -->\n",
"<g id=\"edge12\" class=\"edge\">\n",
"<title>spend_zero_mean_unit_variance&#45;&gt;df_to_avro_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M359.46,-149.55C444.86,-139.33 572.25,-123.6 682.95,-108 691.15,-106.84 699.69,-105.59 708.22,-104.31\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"708.63,-107.64 718,-102.68 707.59,-100.72 708.63,-107.64\"/>\n",
"</g>\n",
"<!-- df_to_json_build_result -->\n",
"<g id=\"node7\" class=\"node\">\n",
"<title>df_to_json_build_result</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"98.95\" cy=\"-90\" rx=\"98.95\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"98.95\" y=\"-84.95\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_json_build_result</text>\n",
"</g>\n",
"<!-- spend_zero_mean_unit_variance&#45;&gt;df_to_json_build_result -->\n",
"<g id=\"edge17\" class=\"edge\">\n",
"<title>spend_zero_mean_unit_variance&#45;&gt;df_to_json_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M221.24,-143.52C198.47,-133.74 169.97,-121.5 146.21,-111.3\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"147.92,-107.79 137.35,-107.06 145.16,-114.23 147.92,-107.79\"/>\n",
"</g>\n",
"<!-- df_to_feather_build_result -->\n",
"<g id=\"node10\" class=\"node\">\n",
"<title>df_to_feather_build_result</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"324.95\" cy=\"-90\" rx=\"109.19\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"324.95\" y=\"-84.95\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_feather_build_result</text>\n",
"</g>\n",
"<!-- spend_zero_mean_unit_variance&#45;&gt;df_to_feather_build_result -->\n",
"<g id=\"edge25\" class=\"edge\">\n",
"<title>spend_zero_mean_unit_variance&#45;&gt;df_to_feather_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M277.52,-143.7C285,-135.39 294.1,-125.28 302.32,-116.14\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"304.47,-118.88 308.56,-109.1 299.26,-114.2 304.47,-118.88\"/>\n",
"</g>\n",
"<!-- df_to_parquet_build_result -->\n",
"<g id=\"node13\" class=\"node\">\n",
"<title>df_to_parquet_build_result</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"562.95\" cy=\"-90\" rx=\"111.23\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"562.95\" y=\"-84.95\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_parquet_build_result</text>\n",
"</g>\n",
"<!-- spend_zero_mean_unit_variance&#45;&gt;df_to_parquet_build_result -->\n",
"<g id=\"edge31\" class=\"edge\">\n",
"<title>spend_zero_mean_unit_variance&#45;&gt;df_to_parquet_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M337.13,-143.52C384.14,-132.58 444.36,-118.58 490.79,-107.78\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"491.43,-110.99 500.37,-105.32 489.84,-104.18 491.43,-110.99\"/>\n",
"</g>\n",
"<!-- spend_zero_mean -->\n",
"<g id=\"node3\" class=\"node\">\n",
"<title>spend_zero_mean</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"371.95\" cy=\"-234\" rx=\"77.97\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"371.95\" y=\"-228.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_zero_mean</text>\n",
"</g>\n",
"<!-- spend_zero_mean&#45;&gt;spend_zero_mean_unit_variance -->\n",
"<g id=\"edge2\" class=\"edge\">\n",
"<title>spend_zero_mean&#45;&gt;spend_zero_mean_unit_variance</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M346.16,-216.59C331.83,-207.47 313.77,-195.97 298.05,-185.97\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"300.43,-182.7 290.11,-180.29 296.67,-188.61 300.43,-182.7\"/>\n",
"</g>\n",
"<!-- df_to_feather -->\n",
"<g id=\"node4\" class=\"node\">\n",
"<title>df_to_feather</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"369.32,-36 280.57,-36 280.57,0 369.32,0 369.32,-36\"/>\n",
"<text text-anchor=\"middle\" x=\"324.95\" y=\"-12.95\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_feather</text>\n",
"</g>\n",
"<!-- avg_3wk_spend -->\n",
"<g id=\"node5\" class=\"node\">\n",
"<title>avg_3wk_spend</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"650.82,-180 547.07,-180 547.07,-144 650.82,-144 650.82,-180\"/>\n",
"<text text-anchor=\"middle\" x=\"598.95\" y=\"-156.95\" font-family=\"Times,serif\" font-size=\"14.00\">avg_3wk_spend</text>\n",
"</g>\n",
"<!-- avg_3wk_spend&#45;&gt;df_to_avro_build_result -->\n",
"<g id=\"edge10\" class=\"edge\">\n",
"<title>avg_3wk_spend&#45;&gt;df_to_avro_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M647.15,-143.52C675,-133.42 710.07,-120.7 738.72,-110.31\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"739.75,-113.29 747.96,-106.59 737.37,-106.71 739.75,-113.29\"/>\n",
"</g>\n",
"<!-- avg_3wk_spend&#45;&gt;df_to_json_build_result -->\n",
"<g id=\"edge15\" class=\"edge\">\n",
"<title>avg_3wk_spend&#45;&gt;df_to_json_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M546.84,-145.87C543.84,-145.19 540.86,-144.56 537.95,-144 392.64,-116.04 353.64,-127.51 206.95,-108 198.68,-106.9 190.07,-105.68 181.47,-104.41\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"182.03,-100.8 171.63,-102.77 180.99,-107.72 182.03,-100.8\"/>\n",
"</g>\n",
"<!-- avg_3wk_spend&#45;&gt;df_to_feather_build_result -->\n",
"<g id=\"edge23\" class=\"edge\">\n",
"<title>avg_3wk_spend&#45;&gt;df_to_feather_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M546.73,-146.34C543.76,-145.54 540.82,-144.76 537.95,-144 490.49,-131.49 436.88,-118.15 395.31,-107.98\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"396.29,-104.38 385.74,-105.41 394.63,-111.18 396.29,-104.38\"/>\n",
"</g>\n",
"<!-- avg_3wk_spend&#45;&gt;df_to_parquet_build_result -->\n",
"<g id=\"edge29\" class=\"edge\">\n",
"<title>avg_3wk_spend&#45;&gt;df_to_parquet_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M590.05,-143.7C586.04,-135.9 581.21,-126.51 576.75,-117.83\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"579.43,-116.4 571.75,-109.1 573.21,-119.6 579.43,-116.4\"/>\n",
"</g>\n",
"<!-- df_to_avro -->\n",
"<g id=\"node16\" class=\"node\">\n",
"<title>df_to_avro</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"829.57,-36 754.32,-36 754.32,0 829.57,0 829.57,-36\"/>\n",
"<text text-anchor=\"middle\" x=\"791.95\" y=\"-12.95\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_avro</text>\n",
"</g>\n",
"<!-- df_to_avro_build_result&#45;&gt;df_to_avro -->\n",
"<g id=\"edge33\" class=\"edge\">\n",
"<title>df_to_avro_build_result&#45;&gt;df_to_avro</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M791.95,-71.7C791.95,-64.24 791.95,-55.32 791.95,-46.97\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"795.45,-47.1 791.95,-37.1 788.45,-47.1 795.45,-47.1\"/>\n",
"</g>\n",
"<!-- df_to_json_build_result&#45;&gt;df_to_json -->\n",
"<g id=\"edge1\" class=\"edge\">\n",
"<title>df_to_json_build_result&#45;&gt;df_to_json</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M98.95,-71.7C98.95,-64.24 98.95,-55.32 98.95,-46.97\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"102.45,-47.1 98.95,-37.1 95.45,-47.1 102.45,-47.1\"/>\n",
"</g>\n",
"<!-- df_to_parquet -->\n",
"<g id=\"node8\" class=\"node\">\n",
"<title>df_to_parquet</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"608.82,-36 517.07,-36 517.07,0 608.82,0 608.82,-36\"/>\n",
"<text text-anchor=\"middle\" x=\"562.95\" y=\"-12.95\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_parquet</text>\n",
"</g>\n",
"<!-- spend_per_signup -->\n",
"<g id=\"node9\" class=\"node\">\n",
"<title>spend_per_signup</title>\n",
"<polygon fill=\"none\" stroke=\"black\" points=\"529.07,-180 414.82,-180 414.82,-144 529.07,-144 529.07,-180\"/>\n",
"<text text-anchor=\"middle\" x=\"471.95\" y=\"-156.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_per_signup</text>\n",
"</g>\n",
"<!-- spend_per_signup&#45;&gt;df_to_avro_build_result -->\n",
"<g id=\"edge11\" class=\"edge\">\n",
"<title>spend_per_signup&#45;&gt;df_to_avro_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M529.5,-146.01C532.36,-145.32 535.18,-144.64 537.95,-144 597.34,-130.16 664.94,-116.14 715.47,-106\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"715.86,-109.29 724.98,-103.89 714.49,-102.42 715.86,-109.29\"/>\n",
"</g>\n",
"<!-- spend_per_signup&#45;&gt;df_to_json_build_result -->\n",
"<g id=\"edge16\" class=\"edge\">\n",
"<title>spend_per_signup&#45;&gt;df_to_json_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M414.49,-150.22C350.73,-138.25 247.51,-118.88 176.6,-105.57\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"177.42,-101.98 166.94,-103.57 176.13,-108.86 177.42,-101.98\"/>\n",
"</g>\n",
"<!-- spend_per_signup&#45;&gt;df_to_feather_build_result -->\n",
"<g id=\"edge24\" class=\"edge\">\n",
"<title>spend_per_signup&#45;&gt;df_to_feather_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M435.23,-143.52C415.27,-134.01 390.42,-122.18 369.36,-112.15\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"371.22,-108.68 360.68,-107.54 368.21,-115 371.22,-108.68\"/>\n",
"</g>\n",
"<!-- spend_per_signup&#45;&gt;df_to_parquet_build_result -->\n",
"<g id=\"edge30\" class=\"edge\">\n",
"<title>spend_per_signup&#45;&gt;df_to_parquet_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M494.44,-143.7C505.79,-134.97 519.74,-124.24 532.07,-114.75\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"533.92,-116.98 539.71,-108.1 529.65,-111.43 533.92,-116.98\"/>\n",
"</g>\n",
"<!-- df_to_feather_build_result&#45;&gt;df_to_feather -->\n",
"<g id=\"edge6\" class=\"edge\">\n",
"<title>df_to_feather_build_result&#45;&gt;df_to_feather</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M324.95,-71.7C324.95,-64.24 324.95,-55.32 324.95,-46.97\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"328.45,-47.1 324.95,-37.1 321.45,-47.1 328.45,-47.1\"/>\n",
"</g>\n",
"<!-- signups -->\n",
"<g id=\"node11\" class=\"node\">\n",
"<title>signups</title>\n",
"<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"607.57,-252 514.32,-252 514.32,-216 607.57,-216 607.57,-252\"/>\n",
"<text text-anchor=\"middle\" x=\"560.95\" y=\"-228.95\" font-family=\"Times,serif\" font-size=\"14.00\">Input: signups</text>\n",
"</g>\n",
"<!-- signups&#45;&gt;df_to_avro_build_result -->\n",
"<g id=\"edge9\" class=\"edge\">\n",
"<title>signups&#45;&gt;df_to_avro_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M605.4,-215.54C627.52,-206.14 654.3,-193.69 676.95,-180 708.95,-160.66 742.4,-133.92 764.93,-114.8\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"766.69,-117.05 772,-107.88 762.13,-111.74 766.69,-117.05\"/>\n",
"</g>\n",
"<!-- signups&#45;&gt;df_to_json_build_result -->\n",
"<g id=\"edge14\" class=\"edge\">\n",
"<title>signups&#45;&gt;df_to_json_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M514.05,-224.71C496.76,-221.78 476.99,-218.57 458.95,-216 391.82,-206.42 213.41,-216 155.95,-180 133.31,-165.81 117.98,-138.97 108.97,-118.46\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"111.88,-117.32 104.83,-109.41 105.41,-119.99 111.88,-117.32\"/>\n",
"</g>\n",
"<!-- signups&#45;&gt;spend_per_signup -->\n",
"<g id=\"edge20\" class=\"edge\">\n",
"<title>signups&#45;&gt;spend_per_signup</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M538.95,-215.7C527.93,-207.03 514.42,-196.4 502.42,-186.96\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"505.03,-183.78 495,-180.35 500.7,-189.28 505.03,-183.78\"/>\n",
"</g>\n",
"<!-- signups&#45;&gt;df_to_feather_build_result -->\n",
"<g id=\"edge22\" class=\"edge\">\n",
"<title>signups&#45;&gt;df_to_feather_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M514.12,-223.5C481.85,-215.46 439,-201.7 405.95,-180 392.24,-171 364.9,-139.58 345.79,-116.62\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"347.98,-114.78 338.91,-109.3 342.58,-119.24 347.98,-114.78\"/>\n",
"</g>\n",
"<!-- signups&#45;&gt;df_to_parquet_build_result -->\n",
"<g id=\"edge28\" class=\"edge\">\n",
"<title>signups&#45;&gt;df_to_parquet_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M608,-219.77C627.65,-211.66 648.53,-199.07 659.95,-180 668.17,-166.27 668.11,-157.76 659.95,-144 651.52,-129.78 637.78,-119.13 623.26,-111.26\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"625.16,-107.8 614.65,-106.48 622.04,-114.07 625.16,-107.8\"/>\n",
"</g>\n",
"<!-- spend_std_dev -->\n",
"<g id=\"node12\" class=\"node\">\n",
"<title>spend_std_dev</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"230.95\" cy=\"-306\" rx=\"65.68\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"230.95\" y=\"-300.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_std_dev</text>\n",
"</g>\n",
"<!-- spend_std_dev&#45;&gt;spend_zero_mean_unit_variance -->\n",
"<g id=\"edge3\" class=\"edge\">\n",
"<title>spend_std_dev&#45;&gt;spend_zero_mean_unit_variance</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M234.75,-287.59C240.03,-263.4 249.64,-219.36 255.91,-190.65\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"259.5,-191.63 258.21,-181.11 252.66,-190.13 259.5,-191.63\"/>\n",
"</g>\n",
"<!-- df_to_parquet_build_result&#45;&gt;df_to_parquet -->\n",
"<g id=\"edge18\" class=\"edge\">\n",
"<title>df_to_parquet_build_result&#45;&gt;df_to_parquet</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M562.95,-71.7C562.95,-64.24 562.95,-55.32 562.95,-46.97\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"566.45,-47.1 562.95,-37.1 559.45,-47.1 566.45,-47.1\"/>\n",
"</g>\n",
"<!-- spend -->\n",
"<g id=\"node14\" class=\"node\">\n",
"<title>spend</title>\n",
"<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"498.7,-396 415.2,-396 415.2,-360 498.7,-360 498.7,-396\"/>\n",
"<text text-anchor=\"middle\" x=\"456.95\" y=\"-372.95\" font-family=\"Times,serif\" font-size=\"14.00\">Input: spend</text>\n",
"</g>\n",
"<!-- spend&#45;&gt;spend_zero_mean -->\n",
"<g id=\"edge4\" class=\"edge\">\n",
"<title>spend&#45;&gt;spend_zero_mean</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M456.64,-359.67C455.57,-340.75 451.63,-310.19 437.95,-288 430.59,-276.06 419.54,-265.65 408.57,-257.29\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"411.04,-254.03 400.87,-251.04 406.95,-259.72 411.04,-254.03\"/>\n",
"</g>\n",
"<!-- spend&#45;&gt;avg_3wk_spend -->\n",
"<g id=\"edge7\" class=\"edge\">\n",
"<title>spend&#45;&gt;avg_3wk_spend</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M495.7,-359.58C534.49,-339.75 591.92,-303.33 616.95,-252 626.42,-232.58 620.59,-208.43 613.12,-190.19\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"615.93,-188.91 608.64,-181.22 609.55,-191.77 615.93,-188.91\"/>\n",
"</g>\n",
"<!-- spend&#45;&gt;df_to_avro_build_result -->\n",
"<g id=\"edge8\" class=\"edge\">\n",
"<title>spend&#45;&gt;df_to_avro_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M499.09,-370.54C533.83,-363.59 583.05,-349.85 618.95,-324 696.84,-267.9 754.61,-166.25 779.07,-117.92\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"782.6,-119.7 783.91,-109.18 776.33,-116.58 782.6,-119.7\"/>\n",
"</g>\n",
"<!-- spend&#45;&gt;df_to_json_build_result -->\n",
"<g id=\"edge13\" class=\"edge\">\n",
"<title>spend&#45;&gt;df_to_json_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M414.71,-376C335.49,-373.21 168.98,-362.78 128.95,-324 73.37,-270.16 83.69,-168.15 92.76,-118.94\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"96.33,-119.94 94.83,-109.45 89.46,-118.58 96.33,-119.94\"/>\n",
"</g>\n",
"<!-- spend&#45;&gt;spend_per_signup -->\n",
"<g id=\"edge19\" class=\"edge\">\n",
"<title>spend&#45;&gt;spend_per_signup</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M458.15,-359.85C460.74,-322.99 466.84,-235.92 469.99,-190.96\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"473.53,-191.45 470.74,-181.23 466.55,-190.96 473.53,-191.45\"/>\n",
"</g>\n",
"<!-- spend&#45;&gt;df_to_feather_build_result -->\n",
"<g id=\"edge21\" class=\"edge\">\n",
"<title>spend&#45;&gt;df_to_feather_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M414.89,-375.99C340.05,-373.21 188.53,-362.81 155.95,-324 104.52,-262.72 108.6,-208.48 155.95,-144 167.14,-128.77 206.56,-115.94 244.51,-106.72\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"245.22,-109.92 254.15,-104.22 243.62,-103.1 245.22,-109.92\"/>\n",
"</g>\n",
"<!-- spend&#45;&gt;spend_std_dev -->\n",
"<g id=\"edge26\" class=\"edge\">\n",
"<title>spend&#45;&gt;spend_std_dev</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M414.71,-363.92C377.65,-352.44 323.6,-335.7 283.7,-323.34\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"284.96,-319.75 274.37,-320.14 282.89,-326.44 284.96,-319.75\"/>\n",
"</g>\n",
"<!-- spend&#45;&gt;df_to_parquet_build_result -->\n",
"<g id=\"edge27\" class=\"edge\">\n",
"<title>spend&#45;&gt;df_to_parquet_build_result</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M499.08,-366.97C526.39,-358.85 561.48,-345.22 586.95,-324 644.93,-275.69 656.51,-252.65 676.95,-180 681.28,-164.6 685.55,-157.49 676.95,-144 667.04,-128.46 651.27,-117.41 634.58,-109.57\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"636.25,-106.05 625.68,-105.32 633.49,-112.48 636.25,-106.05\"/>\n",
"</g>\n",
"<!-- spend_mean -->\n",
"<g id=\"node15\" class=\"node\">\n",
"<title>spend_mean</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"371.95\" cy=\"-306\" rx=\"57.49\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"371.95\" y=\"-300.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_mean</text>\n",
"</g>\n",
"<!-- spend&#45;&gt;spend_mean -->\n",
"<g id=\"edge32\" class=\"edge\">\n",
"<title>spend&#45;&gt;spend_mean</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M435.94,-359.7C425.02,-350.7 411.53,-339.6 399.76,-329.9\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"402.5,-326.8 392.55,-323.14 398.05,-332.2 402.5,-326.8\"/>\n",
"</g>\n",
"<!-- spend_mean&#45;&gt;spend_zero_mean -->\n",
"<g id=\"edge5\" class=\"edge\">\n",
"<title>spend_mean&#45;&gt;spend_zero_mean</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M371.95,-287.7C371.95,-280.24 371.95,-271.32 371.95,-262.97\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"375.45,-263.1 371.95,-253.1 368.45,-263.1 375.45,-263.1\"/>\n",
"</g>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.graphs.Digraph at 0x123b620e0>"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"materializers = [\n",
" # materialize the dataframe to a parquet file\n",
" to.parquet(\n",
" dependencies=output_columns,\n",
" id=\"df_to_parquet\",\n",
" file=\"./df.parquet\",\n",
" combine=df_builder,\n",
" ),\n",
" # materialize the dataframe to a feather file\n",
" to.feather(\n",
" dependencies=output_columns,\n",
" id=\"df_to_feather\",\n",
" file=\"./df.feather\",\n",
" combine=df_builder,\n",
" ),\n",
" # materialize the dataframe to a json file\n",
" to.json(\n",
" dependencies=output_columns,\n",
" id=\"df_to_json\",\n",
" file=\"./df.json\",\n",
" combine=df_builder,\n",
" ),\n",
" # materialize the dataframe to an avro file\n",
" to.avro(\n",
" dependencies=output_columns,\n",
" id=\"df_to_avro\",\n",
" file=\"./df.avro\",\n",
" combine=df_builder,\n",
" ),\n",
"]\n",
"# Visualize what is happening\n",
"dr.visualize_materialization(\n",
" *materializers,\n",
" additional_vars=output_columns,\n",
" inputs=initial_columns,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-05T16:36:56.565384Z",
"start_time": "2023-10-05T16:36:56.531421Z"
}
},
"outputs": [],
"source": [
"# Materialize a result, i.e. execute the DAG!\n",
"materialization_results, additional_outputs = dr.materialize(\n",
" *materializers,\n",
" additional_vars=[\n",
" \"df_to_parquet_build_result\",\n",
" \"df_to_feather_build_result\",\n",
" \"df_to_json_build_result\",\n",
" \"df_to_avro_build_result\",\n",
" ], # because combine is used, we can get that result here.\n",
" inputs=initial_columns,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-05T16:36:57.122715Z",
"start_time": "2023-10-05T16:36:57.101770Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"{'df_to_parquet': {'size': 1605,\n",
" 'path': './df.parquet',\n",
" 'last_modified': 1696820306.5002773,\n",
" 'timestamp': 1696800506.500361},\n",
" 'df_to_feather': {'size': 1696,\n",
" 'path': './df.feather',\n",
" 'last_modified': 1696820306.5006804,\n",
" 'timestamp': 1696800506.500732},\n",
" 'df_to_json': {'size': 657,\n",
" 'path': './df.json',\n",
" 'last_modified': 1696820306.5011163,\n",
" 'timestamp': 1696800506.501182},\n",
" 'df_to_avro': {'size': 517,\n",
" 'path': './df.avro',\n",
" 'last_modified': 1696820306.5013201,\n",
" 'timestamp': 1696800506.501356}}"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"materialization_results"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-05T16:36:57.826266Z",
"start_time": "2023-10-05T16:36:57.805459Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr > th,\n",
".dataframe > tbody > tr > td {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (6, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>spend</th><th>signups</th><th>avg_3wk_spend</th><th>spend_per_signup</th><th>spend_zero_mean_unit_variance</th></tr><tr><td>i64</td><td>i64</td><td>f64</td><td>f64</td><td>f64</td></tr></thead><tbody><tr><td>10</td><td>1</td><td>null</td><td>10.0</td><td>-1.064405</td></tr><tr><td>10</td><td>10</td><td>null</td><td>1.0</td><td>-1.064405</td></tr><tr><td>20</td><td>50</td><td>13.333333</td><td>0.4</td><td>-0.483821</td></tr><tr><td>40</td><td>100</td><td>23.333333</td><td>0.4</td><td>0.677349</td></tr><tr><td>40</td><td>200</td><td>33.333333</td><td>0.2</td><td>0.677349</td></tr><tr><td>50</td><td>400</td><td>43.333333</td><td>0.125</td><td>1.257934</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (6, 5)\n",
"┌───────┬─────────┬───────────────┬──────────────────┬───────────────────────────────┐\n",
"│ spend ┆ signups ┆ avg_3wk_spend ┆ spend_per_signup ┆ spend_zero_mean_unit_variance │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ i64 ┆ i64 ┆ f64 ┆ f64 ┆ f64 │\n",
"╞═══════╪═════════╪═══════════════╪══════════════════╪═══════════════════════════════╡\n",
"│ 10 ┆ 1 ┆ null ┆ 10.0 ┆ -1.064405 │\n",
"│ 10 ┆ 10 ┆ null ┆ 1.0 ┆ -1.064405 │\n",
"│ 20 ┆ 50 ┆ 13.333333 ┆ 0.4 ┆ -0.483821 │\n",
"│ 40 ┆ 100 ┆ 23.333333 ┆ 0.4 ┆ 0.677349 │\n",
"│ 40 ┆ 200 ┆ 33.333333 ┆ 0.2 ┆ 0.677349 │\n",
"│ 50 ┆ 400 ┆ 43.333333 ┆ 0.125 ┆ 1.257934 │\n",
"└───────┴─────────┴───────────────┴──────────────────┴───────────────────────────────┘"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"additional_outputs[\"df_to_parquet_build_result\"]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-05T16:36:58.919203Z",
"start_time": "2023-10-05T16:36:58.869761Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr > th,\n",
".dataframe > tbody > tr > td {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (6, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>spend</th><th>signups</th><th>avg_3wk_spend</th><th>spend_per_signup</th><th>spend_zero_mean_unit_variance</th></tr><tr><td>i64</td><td>i64</td><td>f64</td><td>f64</td><td>f64</td></tr></thead><tbody><tr><td>10</td><td>1</td><td>null</td><td>10.0</td><td>-1.064405</td></tr><tr><td>10</td><td>10</td><td>null</td><td>1.0</td><td>-1.064405</td></tr><tr><td>20</td><td>50</td><td>13.333333</td><td>0.4</td><td>-0.483821</td></tr><tr><td>40</td><td>100</td><td>23.333333</td><td>0.4</td><td>0.677349</td></tr><tr><td>40</td><td>200</td><td>33.333333</td><td>0.2</td><td>0.677349</td></tr><tr><td>50</td><td>400</td><td>43.333333</td><td>0.125</td><td>1.257934</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (6, 5)\n",
"┌───────┬─────────┬───────────────┬──────────────────┬───────────────────────────────┐\n",
"│ spend ┆ signups ┆ avg_3wk_spend ┆ spend_per_signup ┆ spend_zero_mean_unit_variance │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ i64 ┆ i64 ┆ f64 ┆ f64 ┆ f64 │\n",
"╞═══════╪═════════╪═══════════════╪══════════════════╪═══════════════════════════════╡\n",
"│ 10 ┆ 1 ┆ null ┆ 10.0 ┆ -1.064405 │\n",
"│ 10 ┆ 10 ┆ null ┆ 1.0 ┆ -1.064405 │\n",
"│ 20 ┆ 50 ┆ 13.333333 ┆ 0.4 ┆ -0.483821 │\n",
"│ 40 ┆ 100 ┆ 23.333333 ┆ 0.4 ┆ 0.677349 │\n",
"│ 40 ┆ 200 ┆ 33.333333 ┆ 0.2 ┆ 0.677349 │\n",
"│ 50 ┆ 400 ┆ 43.333333 ┆ 0.125 ┆ 1.257934 │\n",
"└───────┴─────────┴───────────────┴──────────────────┴───────────────────────────────┘"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"additional_outputs[\"df_to_feather_build_result\"]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-05T16:37:01.766669Z",
"start_time": "2023-10-05T16:37:01.705286Z"
},
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr > th,\n",
".dataframe > tbody > tr > td {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (6, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>spend</th><th>signups</th><th>avg_3wk_spend</th><th>spend_per_signup</th><th>spend_zero_mean_unit_variance</th></tr><tr><td>i64</td><td>i64</td><td>f64</td><td>f64</td><td>f64</td></tr></thead><tbody><tr><td>10</td><td>1</td><td>null</td><td>10.0</td><td>-1.064405</td></tr><tr><td>10</td><td>10</td><td>null</td><td>1.0</td><td>-1.064405</td></tr><tr><td>20</td><td>50</td><td>13.333333</td><td>0.4</td><td>-0.483821</td></tr><tr><td>40</td><td>100</td><td>23.333333</td><td>0.4</td><td>0.677349</td></tr><tr><td>40</td><td>200</td><td>33.333333</td><td>0.2</td><td>0.677349</td></tr><tr><td>50</td><td>400</td><td>43.333333</td><td>0.125</td><td>1.257934</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (6, 5)\n",
"┌───────┬─────────┬───────────────┬──────────────────┬───────────────────────────────┐\n",
"│ spend ┆ signups ┆ avg_3wk_spend ┆ spend_per_signup ┆ spend_zero_mean_unit_variance │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ i64 ┆ i64 ┆ f64 ┆ f64 ┆ f64 │\n",
"╞═══════╪═════════╪═══════════════╪══════════════════╪═══════════════════════════════╡\n",
"│ 10 ┆ 1 ┆ null ┆ 10.0 ┆ -1.064405 │\n",
"│ 10 ┆ 10 ┆ null ┆ 1.0 ┆ -1.064405 │\n",
"│ 20 ┆ 50 ┆ 13.333333 ┆ 0.4 ┆ -0.483821 │\n",
"│ 40 ┆ 100 ┆ 23.333333 ┆ 0.4 ┆ 0.677349 │\n",
"│ 40 ┆ 200 ┆ 33.333333 ┆ 0.2 ┆ 0.677349 │\n",
"│ 50 ┆ 400 ┆ 43.333333 ┆ 0.125 ┆ 1.257934 │\n",
"└───────┴─────────┴───────────────┴──────────────────┴───────────────────────────────┘"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"additional_outputs[\"df_to_json_build_result\"]"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr > th,\n",
".dataframe > tbody > tr > td {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (6, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>spend</th><th>signups</th><th>avg_3wk_spend</th><th>spend_per_signup</th><th>spend_zero_mean_unit_variance</th></tr><tr><td>i64</td><td>i64</td><td>f64</td><td>f64</td><td>f64</td></tr></thead><tbody><tr><td>10</td><td>1</td><td>null</td><td>10.0</td><td>-1.064405</td></tr><tr><td>10</td><td>10</td><td>null</td><td>1.0</td><td>-1.064405</td></tr><tr><td>20</td><td>50</td><td>13.333333</td><td>0.4</td><td>-0.483821</td></tr><tr><td>40</td><td>100</td><td>23.333333</td><td>0.4</td><td>0.677349</td></tr><tr><td>40</td><td>200</td><td>33.333333</td><td>0.2</td><td>0.677349</td></tr><tr><td>50</td><td>400</td><td>43.333333</td><td>0.125</td><td>1.257934</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (6, 5)\n",
"┌───────┬─────────┬───────────────┬──────────────────┬───────────────────────────────┐\n",
"│ spend ┆ signups ┆ avg_3wk_spend ┆ spend_per_signup ┆ spend_zero_mean_unit_variance │\n",
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
"│ i64 ┆ i64 ┆ f64 ┆ f64 ┆ f64 │\n",
"╞═══════╪═════════╪═══════════════╪══════════════════╪═══════════════════════════════╡\n",
"│ 10 ┆ 1 ┆ null ┆ 10.0 ┆ -1.064405 │\n",
"│ 10 ┆ 10 ┆ null ┆ 1.0 ┆ -1.064405 │\n",
"│ 20 ┆ 50 ┆ 13.333333 ┆ 0.4 ┆ -0.483821 │\n",
"│ 40 ┆ 100 ┆ 23.333333 ┆ 0.4 ┆ 0.677349 │\n",
"│ 40 ┆ 200 ┆ 33.333333 ┆ 0.2 ┆ 0.677349 │\n",
"│ 50 ┆ 400 ┆ 43.333333 ┆ 0.125 ┆ 1.257934 │\n",
"└───────┴─────────┴───────────────┴──────────────────┴───────────────────────────────┘"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"additional_outputs[\"df_to_avro_build_result\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 4
}