| { |
| "cells": [ |
| { |
| "cell_type": "code", |
| "execution_count": 1, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-10-05T16:36:33.343763Z", |
| "start_time": "2023-10-05T16:36:33.337662Z" |
| } |
| }, |
| "outputs": [], |
| "source": [ |
| "# update the polars package - optional\n", |
| "# !pip install --upgrade polars" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 2, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-10-05T16:36:35.401649Z", |
| "start_time": "2023-10-05T16:36:33.838727Z" |
| } |
| }, |
| "outputs": [], |
| "source": [ |
| "import polars as pl\n", |
| "\n", |
| "# Add the hamilton module to your path - optinal\n", |
| "# project_dir = \"### ADD PATH HERE ###\"\n", |
| "# sys.path.append(project_dir)\n", |
| "\n", |
| "from hamilton import driver\n", |
| "from hamilton.io.materialization import to\n", |
| "from hamilton.plugins import h_polars" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 3, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-10-05T16:36:35.465307Z", |
| "start_time": "2023-10-05T16:36:35.406399Z" |
| }, |
| "pycharm": { |
| "name": "#%%\n" |
| } |
| }, |
| "outputs": [], |
| "source": [ |
| "# We use the autoreload extension that comes with ipython to automatically reload modules when\n", |
| "# the code in them changes.\n", |
| "\n", |
| "# import the jupyter extension\n", |
| "%load_ext autoreload\n", |
| "# set it to only reload the modules imported\n", |
| "%autoreload 1" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 4, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-10-05T16:36:36.389233Z", |
| "start_time": "2023-10-05T16:36:36.380788Z" |
| }, |
| "pycharm": { |
| "name": "#%%\n" |
| } |
| }, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Overwriting spend_calculations.py\n" |
| ] |
| } |
| ], |
| "source": [ |
| "%%writefile spend_calculations.py\n", |
| "# Define your new Hamilton functions.\n", |
| "# The %%writefile magic command creates a new Python module with the functions below.\n", |
| "# We will import this later and pass it into our Driver.\n", |
| "\n", |
| "import polars as pl\n", |
| "\n", |
| "# Look at `my_functions` to see how these functions connect.\n", |
| "def avg_3wk_spend(spend: pl.Series) -> pl.Series:\n", |
| " \"\"\"Rolling 3 week average spend.\"\"\"\n", |
| " return spend.rolling_mean(3)\n", |
| "\n", |
| "\n", |
| "def spend_per_signup(spend: pl.Series, signups: pl.Series) -> pl.Series:\n", |
| " \"\"\"The cost per signup in relation to spend.\"\"\"\n", |
| " return spend / signups\n", |
| "\n", |
| "\n", |
| "def spend_mean(spend: pl.Series) -> float:\n", |
| " \"\"\"Shows function creating a scalar. In this case it computes the mean of the entire column.\"\"\"\n", |
| " return spend.mean()\n", |
| "\n", |
| "\n", |
| "def spend_zero_mean(spend: pl.Series, spend_mean: float) -> pl.Series:\n", |
| " \"\"\"Shows function that takes a scalar. In this case to zero mean spend.\"\"\"\n", |
| " return spend - spend_mean\n", |
| "\n", |
| "\n", |
| "def spend_std_dev(spend: pl.Series) -> float:\n", |
| " \"\"\"Function that computes the standard deviation of the spend column.\"\"\"\n", |
| " return spend.std()\n", |
| "\n", |
| "\n", |
| "def spend_zero_mean_unit_variance(spend_zero_mean: pl.Series, spend_std_dev: float) -> pl.Series:\n", |
| " \"\"\"Function showing one way to make spend have zero mean and unit variance.\"\"\"\n", |
| " return spend_zero_mean / spend_std_dev" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 5, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-10-05T16:36:36.947063Z", |
| "start_time": "2023-10-05T16:36:36.938015Z" |
| }, |
| "pycharm": { |
| "name": "#%%\n" |
| } |
| }, |
| "outputs": [], |
| "source": [ |
| "initial_columns = { # load from actuals or wherever -- this is our initial data we use as input.\n", |
| " # Note: these values don't have to be all series, they could be a scalar.\n", |
| " \"signups\": pl.Series([1, 10, 50, 100, 200, 400]),\n", |
| " \"spend\": pl.Series([10, 10, 20, 40, 40, 50]),\n", |
| "}" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 6, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-10-05T16:36:37.496013Z", |
| "start_time": "2023-10-05T16:36:37.447521Z" |
| }, |
| "pycharm": { |
| "name": "#%%\n" |
| } |
| }, |
| "outputs": [ |
| { |
| "name": "stderr", |
| "output_type": "stream", |
| "text": [ |
| "Note: Hamilton collects completely anonymous data about usage. This will help us improve Hamilton over time. See https://github.com/dagworks-inc/hamilton#usage-analytics--data-privacy for details.\n" |
| ] |
| } |
| ], |
| "source": [ |
| "%aimport spend_calculations\n", |
| "\n", |
| "df_builder = h_polars.PolarsDataFrameResult()\n", |
| "dr = driver.Driver({}, spend_calculations) # can pass in multiple modules" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 7, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-10-05T16:36:38.178430Z", |
| "start_time": "2023-10-05T16:36:38.162359Z" |
| } |
| }, |
| "outputs": [], |
| "source": [ |
| "# we need to specify what we want in the final dataframe. These can be string names, or function references.\n", |
| "output_columns = [\n", |
| " \"spend\",\n", |
| " \"signups\",\n", |
| " \"avg_3wk_spend\",\n", |
| " \"spend_per_signup\",\n", |
| " \"spend_zero_mean_unit_variance\",\n", |
| "]" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 8, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-10-05T16:36:39.212897Z", |
| "start_time": "2023-10-05T16:36:39.202499Z" |
| }, |
| "collapsed": false |
| }, |
| "outputs": [], |
| "source": [ |
| "# pass" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 10, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-10-05T16:36:39.908587Z", |
| "start_time": "2023-10-05T16:36:39.637374Z" |
| } |
| }, |
| "outputs": [ |
| { |
| "data": { |
| "image/svg+xml": [ |
| "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", |
| "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", |
| " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", |
| "<!-- Generated by graphviz version 8.1.0 (20230707.0739)\n", |
| " -->\n", |
| "<!-- Pages: 1 -->\n", |
| "<svg width=\"900pt\" height=\"404pt\"\n", |
| " viewBox=\"0.00 0.00 899.92 404.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", |
| "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 400)\">\n", |
| "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-400 895.92,-400 895.92,4 -4,4\"/>\n", |
| "<!-- df_to_json -->\n", |
| "<g id=\"node1\" class=\"node\">\n", |
| "<title>df_to_json</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"135.82,-36 62.07,-36 62.07,0 135.82,0 135.82,-36\"/>\n", |
| "<text text-anchor=\"middle\" x=\"98.95\" y=\"-12.95\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_json</text>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean_unit_variance -->\n", |
| "<g id=\"node2\" class=\"node\">\n", |
| "<title>spend_zero_mean_unit_variance</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"359.2,-180 164.7,-180 164.7,-144 359.2,-144 359.2,-180\"/>\n", |
| "<text text-anchor=\"middle\" x=\"261.95\" y=\"-156.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_zero_mean_unit_variance</text>\n", |
| "</g>\n", |
| "<!-- df_to_avro_build_result -->\n", |
| "<g id=\"node6\" class=\"node\">\n", |
| "<title>df_to_avro_build_result</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"791.95\" cy=\"-90\" rx=\"99.97\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"791.95\" y=\"-84.95\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_avro_build_result</text>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean_unit_variance->df_to_avro_build_result -->\n", |
| "<g id=\"edge12\" class=\"edge\">\n", |
| "<title>spend_zero_mean_unit_variance->df_to_avro_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M359.46,-149.55C444.86,-139.33 572.25,-123.6 682.95,-108 691.15,-106.84 699.69,-105.59 708.22,-104.31\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"708.63,-107.64 718,-102.68 707.59,-100.72 708.63,-107.64\"/>\n", |
| "</g>\n", |
| "<!-- df_to_json_build_result -->\n", |
| "<g id=\"node7\" class=\"node\">\n", |
| "<title>df_to_json_build_result</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"98.95\" cy=\"-90\" rx=\"98.95\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"98.95\" y=\"-84.95\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_json_build_result</text>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean_unit_variance->df_to_json_build_result -->\n", |
| "<g id=\"edge17\" class=\"edge\">\n", |
| "<title>spend_zero_mean_unit_variance->df_to_json_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M221.24,-143.52C198.47,-133.74 169.97,-121.5 146.21,-111.3\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"147.92,-107.79 137.35,-107.06 145.16,-114.23 147.92,-107.79\"/>\n", |
| "</g>\n", |
| "<!-- df_to_feather_build_result -->\n", |
| "<g id=\"node10\" class=\"node\">\n", |
| "<title>df_to_feather_build_result</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"324.95\" cy=\"-90\" rx=\"109.19\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"324.95\" y=\"-84.95\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_feather_build_result</text>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean_unit_variance->df_to_feather_build_result -->\n", |
| "<g id=\"edge25\" class=\"edge\">\n", |
| "<title>spend_zero_mean_unit_variance->df_to_feather_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M277.52,-143.7C285,-135.39 294.1,-125.28 302.32,-116.14\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"304.47,-118.88 308.56,-109.1 299.26,-114.2 304.47,-118.88\"/>\n", |
| "</g>\n", |
| "<!-- df_to_parquet_build_result -->\n", |
| "<g id=\"node13\" class=\"node\">\n", |
| "<title>df_to_parquet_build_result</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"562.95\" cy=\"-90\" rx=\"111.23\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"562.95\" y=\"-84.95\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_parquet_build_result</text>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean_unit_variance->df_to_parquet_build_result -->\n", |
| "<g id=\"edge31\" class=\"edge\">\n", |
| "<title>spend_zero_mean_unit_variance->df_to_parquet_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M337.13,-143.52C384.14,-132.58 444.36,-118.58 490.79,-107.78\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"491.43,-110.99 500.37,-105.32 489.84,-104.18 491.43,-110.99\"/>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean -->\n", |
| "<g id=\"node3\" class=\"node\">\n", |
| "<title>spend_zero_mean</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"371.95\" cy=\"-234\" rx=\"77.97\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"371.95\" y=\"-228.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_zero_mean</text>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean->spend_zero_mean_unit_variance -->\n", |
| "<g id=\"edge2\" class=\"edge\">\n", |
| "<title>spend_zero_mean->spend_zero_mean_unit_variance</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M346.16,-216.59C331.83,-207.47 313.77,-195.97 298.05,-185.97\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"300.43,-182.7 290.11,-180.29 296.67,-188.61 300.43,-182.7\"/>\n", |
| "</g>\n", |
| "<!-- df_to_feather -->\n", |
| "<g id=\"node4\" class=\"node\">\n", |
| "<title>df_to_feather</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"369.32,-36 280.57,-36 280.57,0 369.32,0 369.32,-36\"/>\n", |
| "<text text-anchor=\"middle\" x=\"324.95\" y=\"-12.95\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_feather</text>\n", |
| "</g>\n", |
| "<!-- avg_3wk_spend -->\n", |
| "<g id=\"node5\" class=\"node\">\n", |
| "<title>avg_3wk_spend</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"650.82,-180 547.07,-180 547.07,-144 650.82,-144 650.82,-180\"/>\n", |
| "<text text-anchor=\"middle\" x=\"598.95\" y=\"-156.95\" font-family=\"Times,serif\" font-size=\"14.00\">avg_3wk_spend</text>\n", |
| "</g>\n", |
| "<!-- avg_3wk_spend->df_to_avro_build_result -->\n", |
| "<g id=\"edge10\" class=\"edge\">\n", |
| "<title>avg_3wk_spend->df_to_avro_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M647.15,-143.52C675,-133.42 710.07,-120.7 738.72,-110.31\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"739.75,-113.29 747.96,-106.59 737.37,-106.71 739.75,-113.29\"/>\n", |
| "</g>\n", |
| "<!-- avg_3wk_spend->df_to_json_build_result -->\n", |
| "<g id=\"edge15\" class=\"edge\">\n", |
| "<title>avg_3wk_spend->df_to_json_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M546.84,-145.87C543.84,-145.19 540.86,-144.56 537.95,-144 392.64,-116.04 353.64,-127.51 206.95,-108 198.68,-106.9 190.07,-105.68 181.47,-104.41\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"182.03,-100.8 171.63,-102.77 180.99,-107.72 182.03,-100.8\"/>\n", |
| "</g>\n", |
| "<!-- avg_3wk_spend->df_to_feather_build_result -->\n", |
| "<g id=\"edge23\" class=\"edge\">\n", |
| "<title>avg_3wk_spend->df_to_feather_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M546.73,-146.34C543.76,-145.54 540.82,-144.76 537.95,-144 490.49,-131.49 436.88,-118.15 395.31,-107.98\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"396.29,-104.38 385.74,-105.41 394.63,-111.18 396.29,-104.38\"/>\n", |
| "</g>\n", |
| "<!-- avg_3wk_spend->df_to_parquet_build_result -->\n", |
| "<g id=\"edge29\" class=\"edge\">\n", |
| "<title>avg_3wk_spend->df_to_parquet_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M590.05,-143.7C586.04,-135.9 581.21,-126.51 576.75,-117.83\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"579.43,-116.4 571.75,-109.1 573.21,-119.6 579.43,-116.4\"/>\n", |
| "</g>\n", |
| "<!-- df_to_avro -->\n", |
| "<g id=\"node16\" class=\"node\">\n", |
| "<title>df_to_avro</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"829.57,-36 754.32,-36 754.32,0 829.57,0 829.57,-36\"/>\n", |
| "<text text-anchor=\"middle\" x=\"791.95\" y=\"-12.95\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_avro</text>\n", |
| "</g>\n", |
| "<!-- df_to_avro_build_result->df_to_avro -->\n", |
| "<g id=\"edge33\" class=\"edge\">\n", |
| "<title>df_to_avro_build_result->df_to_avro</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M791.95,-71.7C791.95,-64.24 791.95,-55.32 791.95,-46.97\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"795.45,-47.1 791.95,-37.1 788.45,-47.1 795.45,-47.1\"/>\n", |
| "</g>\n", |
| "<!-- df_to_json_build_result->df_to_json -->\n", |
| "<g id=\"edge1\" class=\"edge\">\n", |
| "<title>df_to_json_build_result->df_to_json</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M98.95,-71.7C98.95,-64.24 98.95,-55.32 98.95,-46.97\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"102.45,-47.1 98.95,-37.1 95.45,-47.1 102.45,-47.1\"/>\n", |
| "</g>\n", |
| "<!-- df_to_parquet -->\n", |
| "<g id=\"node8\" class=\"node\">\n", |
| "<title>df_to_parquet</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"608.82,-36 517.07,-36 517.07,0 608.82,0 608.82,-36\"/>\n", |
| "<text text-anchor=\"middle\" x=\"562.95\" y=\"-12.95\" font-family=\"Times,serif\" font-size=\"14.00\">df_to_parquet</text>\n", |
| "</g>\n", |
| "<!-- spend_per_signup -->\n", |
| "<g id=\"node9\" class=\"node\">\n", |
| "<title>spend_per_signup</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"529.07,-180 414.82,-180 414.82,-144 529.07,-144 529.07,-180\"/>\n", |
| "<text text-anchor=\"middle\" x=\"471.95\" y=\"-156.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_per_signup</text>\n", |
| "</g>\n", |
| "<!-- spend_per_signup->df_to_avro_build_result -->\n", |
| "<g id=\"edge11\" class=\"edge\">\n", |
| "<title>spend_per_signup->df_to_avro_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M529.5,-146.01C532.36,-145.32 535.18,-144.64 537.95,-144 597.34,-130.16 664.94,-116.14 715.47,-106\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"715.86,-109.29 724.98,-103.89 714.49,-102.42 715.86,-109.29\"/>\n", |
| "</g>\n", |
| "<!-- spend_per_signup->df_to_json_build_result -->\n", |
| "<g id=\"edge16\" class=\"edge\">\n", |
| "<title>spend_per_signup->df_to_json_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M414.49,-150.22C350.73,-138.25 247.51,-118.88 176.6,-105.57\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"177.42,-101.98 166.94,-103.57 176.13,-108.86 177.42,-101.98\"/>\n", |
| "</g>\n", |
| "<!-- spend_per_signup->df_to_feather_build_result -->\n", |
| "<g id=\"edge24\" class=\"edge\">\n", |
| "<title>spend_per_signup->df_to_feather_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M435.23,-143.52C415.27,-134.01 390.42,-122.18 369.36,-112.15\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"371.22,-108.68 360.68,-107.54 368.21,-115 371.22,-108.68\"/>\n", |
| "</g>\n", |
| "<!-- spend_per_signup->df_to_parquet_build_result -->\n", |
| "<g id=\"edge30\" class=\"edge\">\n", |
| "<title>spend_per_signup->df_to_parquet_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M494.44,-143.7C505.79,-134.97 519.74,-124.24 532.07,-114.75\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"533.92,-116.98 539.71,-108.1 529.65,-111.43 533.92,-116.98\"/>\n", |
| "</g>\n", |
| "<!-- df_to_feather_build_result->df_to_feather -->\n", |
| "<g id=\"edge6\" class=\"edge\">\n", |
| "<title>df_to_feather_build_result->df_to_feather</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M324.95,-71.7C324.95,-64.24 324.95,-55.32 324.95,-46.97\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"328.45,-47.1 324.95,-37.1 321.45,-47.1 328.45,-47.1\"/>\n", |
| "</g>\n", |
| "<!-- signups -->\n", |
| "<g id=\"node11\" class=\"node\">\n", |
| "<title>signups</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"607.57,-252 514.32,-252 514.32,-216 607.57,-216 607.57,-252\"/>\n", |
| "<text text-anchor=\"middle\" x=\"560.95\" y=\"-228.95\" font-family=\"Times,serif\" font-size=\"14.00\">Input: signups</text>\n", |
| "</g>\n", |
| "<!-- signups->df_to_avro_build_result -->\n", |
| "<g id=\"edge9\" class=\"edge\">\n", |
| "<title>signups->df_to_avro_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M605.4,-215.54C627.52,-206.14 654.3,-193.69 676.95,-180 708.95,-160.66 742.4,-133.92 764.93,-114.8\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"766.69,-117.05 772,-107.88 762.13,-111.74 766.69,-117.05\"/>\n", |
| "</g>\n", |
| "<!-- signups->df_to_json_build_result -->\n", |
| "<g id=\"edge14\" class=\"edge\">\n", |
| "<title>signups->df_to_json_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M514.05,-224.71C496.76,-221.78 476.99,-218.57 458.95,-216 391.82,-206.42 213.41,-216 155.95,-180 133.31,-165.81 117.98,-138.97 108.97,-118.46\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"111.88,-117.32 104.83,-109.41 105.41,-119.99 111.88,-117.32\"/>\n", |
| "</g>\n", |
| "<!-- signups->spend_per_signup -->\n", |
| "<g id=\"edge20\" class=\"edge\">\n", |
| "<title>signups->spend_per_signup</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M538.95,-215.7C527.93,-207.03 514.42,-196.4 502.42,-186.96\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"505.03,-183.78 495,-180.35 500.7,-189.28 505.03,-183.78\"/>\n", |
| "</g>\n", |
| "<!-- signups->df_to_feather_build_result -->\n", |
| "<g id=\"edge22\" class=\"edge\">\n", |
| "<title>signups->df_to_feather_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M514.12,-223.5C481.85,-215.46 439,-201.7 405.95,-180 392.24,-171 364.9,-139.58 345.79,-116.62\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"347.98,-114.78 338.91,-109.3 342.58,-119.24 347.98,-114.78\"/>\n", |
| "</g>\n", |
| "<!-- signups->df_to_parquet_build_result -->\n", |
| "<g id=\"edge28\" class=\"edge\">\n", |
| "<title>signups->df_to_parquet_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M608,-219.77C627.65,-211.66 648.53,-199.07 659.95,-180 668.17,-166.27 668.11,-157.76 659.95,-144 651.52,-129.78 637.78,-119.13 623.26,-111.26\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"625.16,-107.8 614.65,-106.48 622.04,-114.07 625.16,-107.8\"/>\n", |
| "</g>\n", |
| "<!-- spend_std_dev -->\n", |
| "<g id=\"node12\" class=\"node\">\n", |
| "<title>spend_std_dev</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"230.95\" cy=\"-306\" rx=\"65.68\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"230.95\" y=\"-300.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_std_dev</text>\n", |
| "</g>\n", |
| "<!-- spend_std_dev->spend_zero_mean_unit_variance -->\n", |
| "<g id=\"edge3\" class=\"edge\">\n", |
| "<title>spend_std_dev->spend_zero_mean_unit_variance</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M234.75,-287.59C240.03,-263.4 249.64,-219.36 255.91,-190.65\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"259.5,-191.63 258.21,-181.11 252.66,-190.13 259.5,-191.63\"/>\n", |
| "</g>\n", |
| "<!-- df_to_parquet_build_result->df_to_parquet -->\n", |
| "<g id=\"edge18\" class=\"edge\">\n", |
| "<title>df_to_parquet_build_result->df_to_parquet</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M562.95,-71.7C562.95,-64.24 562.95,-55.32 562.95,-46.97\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"566.45,-47.1 562.95,-37.1 559.45,-47.1 566.45,-47.1\"/>\n", |
| "</g>\n", |
| "<!-- spend -->\n", |
| "<g id=\"node14\" class=\"node\">\n", |
| "<title>spend</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"498.7,-396 415.2,-396 415.2,-360 498.7,-360 498.7,-396\"/>\n", |
| "<text text-anchor=\"middle\" x=\"456.95\" y=\"-372.95\" font-family=\"Times,serif\" font-size=\"14.00\">Input: spend</text>\n", |
| "</g>\n", |
| "<!-- spend->spend_zero_mean -->\n", |
| "<g id=\"edge4\" class=\"edge\">\n", |
| "<title>spend->spend_zero_mean</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M456.64,-359.67C455.57,-340.75 451.63,-310.19 437.95,-288 430.59,-276.06 419.54,-265.65 408.57,-257.29\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"411.04,-254.03 400.87,-251.04 406.95,-259.72 411.04,-254.03\"/>\n", |
| "</g>\n", |
| "<!-- spend->avg_3wk_spend -->\n", |
| "<g id=\"edge7\" class=\"edge\">\n", |
| "<title>spend->avg_3wk_spend</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M495.7,-359.58C534.49,-339.75 591.92,-303.33 616.95,-252 626.42,-232.58 620.59,-208.43 613.12,-190.19\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"615.93,-188.91 608.64,-181.22 609.55,-191.77 615.93,-188.91\"/>\n", |
| "</g>\n", |
| "<!-- spend->df_to_avro_build_result -->\n", |
| "<g id=\"edge8\" class=\"edge\">\n", |
| "<title>spend->df_to_avro_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M499.09,-370.54C533.83,-363.59 583.05,-349.85 618.95,-324 696.84,-267.9 754.61,-166.25 779.07,-117.92\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"782.6,-119.7 783.91,-109.18 776.33,-116.58 782.6,-119.7\"/>\n", |
| "</g>\n", |
| "<!-- spend->df_to_json_build_result -->\n", |
| "<g id=\"edge13\" class=\"edge\">\n", |
| "<title>spend->df_to_json_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M414.71,-376C335.49,-373.21 168.98,-362.78 128.95,-324 73.37,-270.16 83.69,-168.15 92.76,-118.94\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"96.33,-119.94 94.83,-109.45 89.46,-118.58 96.33,-119.94\"/>\n", |
| "</g>\n", |
| "<!-- spend->spend_per_signup -->\n", |
| "<g id=\"edge19\" class=\"edge\">\n", |
| "<title>spend->spend_per_signup</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M458.15,-359.85C460.74,-322.99 466.84,-235.92 469.99,-190.96\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"473.53,-191.45 470.74,-181.23 466.55,-190.96 473.53,-191.45\"/>\n", |
| "</g>\n", |
| "<!-- spend->df_to_feather_build_result -->\n", |
| "<g id=\"edge21\" class=\"edge\">\n", |
| "<title>spend->df_to_feather_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M414.89,-375.99C340.05,-373.21 188.53,-362.81 155.95,-324 104.52,-262.72 108.6,-208.48 155.95,-144 167.14,-128.77 206.56,-115.94 244.51,-106.72\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"245.22,-109.92 254.15,-104.22 243.62,-103.1 245.22,-109.92\"/>\n", |
| "</g>\n", |
| "<!-- spend->spend_std_dev -->\n", |
| "<g id=\"edge26\" class=\"edge\">\n", |
| "<title>spend->spend_std_dev</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M414.71,-363.92C377.65,-352.44 323.6,-335.7 283.7,-323.34\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"284.96,-319.75 274.37,-320.14 282.89,-326.44 284.96,-319.75\"/>\n", |
| "</g>\n", |
| "<!-- spend->df_to_parquet_build_result -->\n", |
| "<g id=\"edge27\" class=\"edge\">\n", |
| "<title>spend->df_to_parquet_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M499.08,-366.97C526.39,-358.85 561.48,-345.22 586.95,-324 644.93,-275.69 656.51,-252.65 676.95,-180 681.28,-164.6 685.55,-157.49 676.95,-144 667.04,-128.46 651.27,-117.41 634.58,-109.57\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"636.25,-106.05 625.68,-105.32 633.49,-112.48 636.25,-106.05\"/>\n", |
| "</g>\n", |
| "<!-- spend_mean -->\n", |
| "<g id=\"node15\" class=\"node\">\n", |
| "<title>spend_mean</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"371.95\" cy=\"-306\" rx=\"57.49\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"371.95\" y=\"-300.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_mean</text>\n", |
| "</g>\n", |
| "<!-- spend->spend_mean -->\n", |
| "<g id=\"edge32\" class=\"edge\">\n", |
| "<title>spend->spend_mean</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M435.94,-359.7C425.02,-350.7 411.53,-339.6 399.76,-329.9\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"402.5,-326.8 392.55,-323.14 398.05,-332.2 402.5,-326.8\"/>\n", |
| "</g>\n", |
| "<!-- spend_mean->spend_zero_mean -->\n", |
| "<g id=\"edge5\" class=\"edge\">\n", |
| "<title>spend_mean->spend_zero_mean</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M371.95,-287.7C371.95,-280.24 371.95,-271.32 371.95,-262.97\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"375.45,-263.1 371.95,-253.1 368.45,-263.1 375.45,-263.1\"/>\n", |
| "</g>\n", |
| "</g>\n", |
| "</svg>\n" |
| ], |
| "text/plain": [ |
| "<graphviz.graphs.Digraph at 0x123b620e0>" |
| ] |
| }, |
| "execution_count": 10, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "materializers = [\n", |
| " # materialize the dataframe to a parquet file\n", |
| " to.parquet(\n", |
| " dependencies=output_columns,\n", |
| " id=\"df_to_parquet\",\n", |
| " file=\"./df.parquet\",\n", |
| " combine=df_builder,\n", |
| " ),\n", |
| " # materialize the dataframe to a feather file\n", |
| " to.feather(\n", |
| " dependencies=output_columns,\n", |
| " id=\"df_to_feather\",\n", |
| " file=\"./df.feather\",\n", |
| " combine=df_builder,\n", |
| " ),\n", |
| " # materialize the dataframe to a json file\n", |
| " to.json(\n", |
| " dependencies=output_columns,\n", |
| " id=\"df_to_json\",\n", |
| " file=\"./df.json\",\n", |
| " combine=df_builder,\n", |
| " ),\n", |
| " # materialize the dataframe to an avro file\n", |
| " to.avro(\n", |
| " dependencies=output_columns,\n", |
| " id=\"df_to_avro\",\n", |
| " file=\"./df.avro\",\n", |
| " combine=df_builder,\n", |
| " ),\n", |
| "]\n", |
| "# Visualize what is happening\n", |
| "dr.visualize_materialization(\n", |
| " *materializers,\n", |
| " additional_vars=output_columns,\n", |
| " inputs=initial_columns,\n", |
| ")" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 11, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-10-05T16:36:56.565384Z", |
| "start_time": "2023-10-05T16:36:56.531421Z" |
| } |
| }, |
| "outputs": [], |
| "source": [ |
| "# Materialize a result, i.e. execute the DAG!\n", |
| "materialization_results, additional_outputs = dr.materialize(\n", |
| " *materializers,\n", |
| " additional_vars=[\n", |
| " \"df_to_parquet_build_result\",\n", |
| " \"df_to_feather_build_result\",\n", |
| " \"df_to_json_build_result\",\n", |
| " \"df_to_avro_build_result\",\n", |
| " ], # because combine is used, we can get that result here.\n", |
| " inputs=initial_columns,\n", |
| ")" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 12, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-10-05T16:36:57.122715Z", |
| "start_time": "2023-10-05T16:36:57.101770Z" |
| } |
| }, |
| "outputs": [ |
| { |
| "data": { |
| "text/plain": [ |
| "{'df_to_parquet': {'size': 1605,\n", |
| " 'path': './df.parquet',\n", |
| " 'last_modified': 1696820306.5002773,\n", |
| " 'timestamp': 1696800506.500361},\n", |
| " 'df_to_feather': {'size': 1696,\n", |
| " 'path': './df.feather',\n", |
| " 'last_modified': 1696820306.5006804,\n", |
| " 'timestamp': 1696800506.500732},\n", |
| " 'df_to_json': {'size': 657,\n", |
| " 'path': './df.json',\n", |
| " 'last_modified': 1696820306.5011163,\n", |
| " 'timestamp': 1696800506.501182},\n", |
| " 'df_to_avro': {'size': 517,\n", |
| " 'path': './df.avro',\n", |
| " 'last_modified': 1696820306.5013201,\n", |
| " 'timestamp': 1696800506.501356}}" |
| ] |
| }, |
| "execution_count": 12, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "materialization_results" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 13, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-10-05T16:36:57.826266Z", |
| "start_time": "2023-10-05T16:36:57.805459Z" |
| } |
| }, |
| "outputs": [ |
| { |
| "data": { |
| "text/html": [ |
| "<div><style>\n", |
| ".dataframe > thead > tr > th,\n", |
| ".dataframe > tbody > tr > td {\n", |
| " text-align: right;\n", |
| " white-space: pre-wrap;\n", |
| "}\n", |
| "</style>\n", |
| "<small>shape: (6, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>spend</th><th>signups</th><th>avg_3wk_spend</th><th>spend_per_signup</th><th>spend_zero_mean_unit_variance</th></tr><tr><td>i64</td><td>i64</td><td>f64</td><td>f64</td><td>f64</td></tr></thead><tbody><tr><td>10</td><td>1</td><td>null</td><td>10.0</td><td>-1.064405</td></tr><tr><td>10</td><td>10</td><td>null</td><td>1.0</td><td>-1.064405</td></tr><tr><td>20</td><td>50</td><td>13.333333</td><td>0.4</td><td>-0.483821</td></tr><tr><td>40</td><td>100</td><td>23.333333</td><td>0.4</td><td>0.677349</td></tr><tr><td>40</td><td>200</td><td>33.333333</td><td>0.2</td><td>0.677349</td></tr><tr><td>50</td><td>400</td><td>43.333333</td><td>0.125</td><td>1.257934</td></tr></tbody></table></div>" |
| ], |
| "text/plain": [ |
| "shape: (6, 5)\n", |
| "┌───────┬─────────┬───────────────┬──────────────────┬───────────────────────────────┐\n", |
| "│ spend ┆ signups ┆ avg_3wk_spend ┆ spend_per_signup ┆ spend_zero_mean_unit_variance │\n", |
| "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", |
| "│ i64 ┆ i64 ┆ f64 ┆ f64 ┆ f64 │\n", |
| "╞═══════╪═════════╪═══════════════╪══════════════════╪═══════════════════════════════╡\n", |
| "│ 10 ┆ 1 ┆ null ┆ 10.0 ┆ -1.064405 │\n", |
| "│ 10 ┆ 10 ┆ null ┆ 1.0 ┆ -1.064405 │\n", |
| "│ 20 ┆ 50 ┆ 13.333333 ┆ 0.4 ┆ -0.483821 │\n", |
| "│ 40 ┆ 100 ┆ 23.333333 ┆ 0.4 ┆ 0.677349 │\n", |
| "│ 40 ┆ 200 ┆ 33.333333 ┆ 0.2 ┆ 0.677349 │\n", |
| "│ 50 ┆ 400 ┆ 43.333333 ┆ 0.125 ┆ 1.257934 │\n", |
| "└───────┴─────────┴───────────────┴──────────────────┴───────────────────────────────┘" |
| ] |
| }, |
| "execution_count": 13, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "additional_outputs[\"df_to_parquet_build_result\"]" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 14, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-10-05T16:36:58.919203Z", |
| "start_time": "2023-10-05T16:36:58.869761Z" |
| } |
| }, |
| "outputs": [ |
| { |
| "data": { |
| "text/html": [ |
| "<div><style>\n", |
| ".dataframe > thead > tr > th,\n", |
| ".dataframe > tbody > tr > td {\n", |
| " text-align: right;\n", |
| " white-space: pre-wrap;\n", |
| "}\n", |
| "</style>\n", |
| "<small>shape: (6, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>spend</th><th>signups</th><th>avg_3wk_spend</th><th>spend_per_signup</th><th>spend_zero_mean_unit_variance</th></tr><tr><td>i64</td><td>i64</td><td>f64</td><td>f64</td><td>f64</td></tr></thead><tbody><tr><td>10</td><td>1</td><td>null</td><td>10.0</td><td>-1.064405</td></tr><tr><td>10</td><td>10</td><td>null</td><td>1.0</td><td>-1.064405</td></tr><tr><td>20</td><td>50</td><td>13.333333</td><td>0.4</td><td>-0.483821</td></tr><tr><td>40</td><td>100</td><td>23.333333</td><td>0.4</td><td>0.677349</td></tr><tr><td>40</td><td>200</td><td>33.333333</td><td>0.2</td><td>0.677349</td></tr><tr><td>50</td><td>400</td><td>43.333333</td><td>0.125</td><td>1.257934</td></tr></tbody></table></div>" |
| ], |
| "text/plain": [ |
| "shape: (6, 5)\n", |
| "┌───────┬─────────┬───────────────┬──────────────────┬───────────────────────────────┐\n", |
| "│ spend ┆ signups ┆ avg_3wk_spend ┆ spend_per_signup ┆ spend_zero_mean_unit_variance │\n", |
| "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", |
| "│ i64 ┆ i64 ┆ f64 ┆ f64 ┆ f64 │\n", |
| "╞═══════╪═════════╪═══════════════╪══════════════════╪═══════════════════════════════╡\n", |
| "│ 10 ┆ 1 ┆ null ┆ 10.0 ┆ -1.064405 │\n", |
| "│ 10 ┆ 10 ┆ null ┆ 1.0 ┆ -1.064405 │\n", |
| "│ 20 ┆ 50 ┆ 13.333333 ┆ 0.4 ┆ -0.483821 │\n", |
| "│ 40 ┆ 100 ┆ 23.333333 ┆ 0.4 ┆ 0.677349 │\n", |
| "│ 40 ┆ 200 ┆ 33.333333 ┆ 0.2 ┆ 0.677349 │\n", |
| "│ 50 ┆ 400 ┆ 43.333333 ┆ 0.125 ┆ 1.257934 │\n", |
| "└───────┴─────────┴───────────────┴──────────────────┴───────────────────────────────┘" |
| ] |
| }, |
| "execution_count": 14, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "additional_outputs[\"df_to_feather_build_result\"]" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 15, |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2023-10-05T16:37:01.766669Z", |
| "start_time": "2023-10-05T16:37:01.705286Z" |
| }, |
| "collapsed": false |
| }, |
| "outputs": [ |
| { |
| "data": { |
| "text/html": [ |
| "<div><style>\n", |
| ".dataframe > thead > tr > th,\n", |
| ".dataframe > tbody > tr > td {\n", |
| " text-align: right;\n", |
| " white-space: pre-wrap;\n", |
| "}\n", |
| "</style>\n", |
| "<small>shape: (6, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>spend</th><th>signups</th><th>avg_3wk_spend</th><th>spend_per_signup</th><th>spend_zero_mean_unit_variance</th></tr><tr><td>i64</td><td>i64</td><td>f64</td><td>f64</td><td>f64</td></tr></thead><tbody><tr><td>10</td><td>1</td><td>null</td><td>10.0</td><td>-1.064405</td></tr><tr><td>10</td><td>10</td><td>null</td><td>1.0</td><td>-1.064405</td></tr><tr><td>20</td><td>50</td><td>13.333333</td><td>0.4</td><td>-0.483821</td></tr><tr><td>40</td><td>100</td><td>23.333333</td><td>0.4</td><td>0.677349</td></tr><tr><td>40</td><td>200</td><td>33.333333</td><td>0.2</td><td>0.677349</td></tr><tr><td>50</td><td>400</td><td>43.333333</td><td>0.125</td><td>1.257934</td></tr></tbody></table></div>" |
| ], |
| "text/plain": [ |
| "shape: (6, 5)\n", |
| "┌───────┬─────────┬───────────────┬──────────────────┬───────────────────────────────┐\n", |
| "│ spend ┆ signups ┆ avg_3wk_spend ┆ spend_per_signup ┆ spend_zero_mean_unit_variance │\n", |
| "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", |
| "│ i64 ┆ i64 ┆ f64 ┆ f64 ┆ f64 │\n", |
| "╞═══════╪═════════╪═══════════════╪══════════════════╪═══════════════════════════════╡\n", |
| "│ 10 ┆ 1 ┆ null ┆ 10.0 ┆ -1.064405 │\n", |
| "│ 10 ┆ 10 ┆ null ┆ 1.0 ┆ -1.064405 │\n", |
| "│ 20 ┆ 50 ┆ 13.333333 ┆ 0.4 ┆ -0.483821 │\n", |
| "│ 40 ┆ 100 ┆ 23.333333 ┆ 0.4 ┆ 0.677349 │\n", |
| "│ 40 ┆ 200 ┆ 33.333333 ┆ 0.2 ┆ 0.677349 │\n", |
| "│ 50 ┆ 400 ┆ 43.333333 ┆ 0.125 ┆ 1.257934 │\n", |
| "└───────┴─────────┴───────────────┴──────────────────┴───────────────────────────────┘" |
| ] |
| }, |
| "execution_count": 15, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "additional_outputs[\"df_to_json_build_result\"]" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 16, |
| "metadata": { |
| "collapsed": false |
| }, |
| "outputs": [ |
| { |
| "data": { |
| "text/html": [ |
| "<div><style>\n", |
| ".dataframe > thead > tr > th,\n", |
| ".dataframe > tbody > tr > td {\n", |
| " text-align: right;\n", |
| " white-space: pre-wrap;\n", |
| "}\n", |
| "</style>\n", |
| "<small>shape: (6, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>spend</th><th>signups</th><th>avg_3wk_spend</th><th>spend_per_signup</th><th>spend_zero_mean_unit_variance</th></tr><tr><td>i64</td><td>i64</td><td>f64</td><td>f64</td><td>f64</td></tr></thead><tbody><tr><td>10</td><td>1</td><td>null</td><td>10.0</td><td>-1.064405</td></tr><tr><td>10</td><td>10</td><td>null</td><td>1.0</td><td>-1.064405</td></tr><tr><td>20</td><td>50</td><td>13.333333</td><td>0.4</td><td>-0.483821</td></tr><tr><td>40</td><td>100</td><td>23.333333</td><td>0.4</td><td>0.677349</td></tr><tr><td>40</td><td>200</td><td>33.333333</td><td>0.2</td><td>0.677349</td></tr><tr><td>50</td><td>400</td><td>43.333333</td><td>0.125</td><td>1.257934</td></tr></tbody></table></div>" |
| ], |
| "text/plain": [ |
| "shape: (6, 5)\n", |
| "┌───────┬─────────┬───────────────┬──────────────────┬───────────────────────────────┐\n", |
| "│ spend ┆ signups ┆ avg_3wk_spend ┆ spend_per_signup ┆ spend_zero_mean_unit_variance │\n", |
| "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", |
| "│ i64 ┆ i64 ┆ f64 ┆ f64 ┆ f64 │\n", |
| "╞═══════╪═════════╪═══════════════╪══════════════════╪═══════════════════════════════╡\n", |
| "│ 10 ┆ 1 ┆ null ┆ 10.0 ┆ -1.064405 │\n", |
| "│ 10 ┆ 10 ┆ null ┆ 1.0 ┆ -1.064405 │\n", |
| "│ 20 ┆ 50 ┆ 13.333333 ┆ 0.4 ┆ -0.483821 │\n", |
| "│ 40 ┆ 100 ┆ 23.333333 ┆ 0.4 ┆ 0.677349 │\n", |
| "│ 40 ┆ 200 ┆ 33.333333 ┆ 0.2 ┆ 0.677349 │\n", |
| "│ 50 ┆ 400 ┆ 43.333333 ┆ 0.125 ┆ 1.257934 │\n", |
| "└───────┴─────────┴───────────────┴──────────────────┴───────────────────────────────┘" |
| ] |
| }, |
| "execution_count": 16, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "additional_outputs[\"df_to_avro_build_result\"]" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [] |
| } |
| ], |
| "metadata": { |
| "kernelspec": { |
| "display_name": "Python 3 (ipykernel)", |
| "language": "python", |
| "name": "python3" |
| }, |
| "language_info": { |
| "codemirror_mode": { |
| "name": "ipython", |
| "version": 3 |
| }, |
| "file_extension": ".py", |
| "mimetype": "text/x-python", |
| "name": "python", |
| "nbconvert_exporter": "python", |
| "pygments_lexer": "ipython3", |
| "version": "3.10.13" |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 4 |
| } |