| { |
| "cells": [ |
| { |
| "cell_type": "markdown", |
| "metadata": { |
| "collapsed": false, |
| "jupyter": { |
| "outputs_hidden": false |
| } |
| }, |
| "source": [ |
| "Uncomment and run the cell below if you are in a Google Colab environment. It will:\n", |
| "1. Mount google drive. You will be asked to authenticate and give permissions.\n", |
| "2. Change directory to google drive.\n", |
| "3. Make a directory \"hamilton-tutorials\"\n", |
| "4. Change directory to it.\n", |
| "5. Clone this repository to your google drive\n", |
| "6. Move your current directory to the hello_world example\n", |
| "7. Install requirements.\n", |
| "\n", |
| "This means that any modifications will be saved, and you won't lose them if you close your browser." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 1, |
| "metadata": { |
| "collapsed": false, |
| "jupyter": { |
| "outputs_hidden": false |
| }, |
| "pycharm": { |
| "name": "#%%\n" |
| } |
| }, |
| "outputs": [], |
| "source": [ |
| "## 1. Mount google drive\n", |
| "# from google.colab import drive\n", |
| "# drive.mount('/content/drive')\n", |
| "## 2. Change directory to google drive.\n", |
| "# %cd /content/drive/MyDrive\n", |
| "## 3. Make a directory \"hamilton-tutorials\"\n", |
| "# !mkdir hamilton-tutorials\n", |
| "## 4. Change directory to it.\n", |
| "# %cd hamilton-tutorials\n", |
| "## 5. Clone this repository to your google drive\n", |
| "# !git clone https://github.com/DAGWorks-Inc/hamilton/\n", |
| "## 6. Move your current directory to the hello_world example\n", |
| "# %cd hamilton/examples/hello_world\n", |
| "## 7. Install requirements.\n", |
| "# %pip install -r requirements.txt\n", |
| "# clear_output() # optionally clear outputs\n", |
| "# To check your current working directory you can type `!pwd` in a cell and run it." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 2, |
| "metadata": { |
| "collapsed": false, |
| "jupyter": { |
| "outputs_hidden": false |
| }, |
| "pycharm": { |
| "name": "#%%\n" |
| } |
| }, |
| "outputs": [], |
| "source": [ |
| "# Cell 2 - import modules to create part of the DAG from\n", |
| "# We use the autoreload extension that comes with ipython to automatically reload modules when\n", |
| "# the code in them changes.\n", |
| "\n", |
| "# import the jupyter extension\n", |
| "%load_ext autoreload\n", |
| "# set it to only reload the modules imported\n", |
| "%autoreload 1" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 3, |
| "metadata": { |
| "collapsed": false, |
| "jupyter": { |
| "outputs_hidden": false |
| }, |
| "pycharm": { |
| "name": "#%%\n" |
| } |
| }, |
| "outputs": [], |
| "source": [ |
| "# Import modules\n", |
| "\n", |
| "import pandas as pd\n", |
| "from dask import dataframe\n", |
| "from dask.distributed import (\n", |
| " Client,\n", |
| " LocalCluster,\n", |
| ")\n", |
| "\n", |
| "from hamilton import driver\n", |
| "from hamilton.plugins import h_dask" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 4, |
| "metadata": { |
| "collapsed": false, |
| "jupyter": { |
| "outputs_hidden": false |
| }, |
| "pycharm": { |
| "name": "#%%\n" |
| } |
| }, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Overwriting data_loaders.py\n" |
| ] |
| } |
| ], |
| "source": [ |
| "%%writefile data_loaders.py\n", |
| "\n", |
| "import pandas as pd\n", |
| "from dask import dataframe\n", |
| "\n", |
| "# We'll place the data loaders into a new module\n", |
| "\n", |
| "def spend(spend_location: str, spend_partitions: int) -> dataframe.Series:\n", |
| " \"\"\"Dummy function showing how to wire through loading data.\n", |
| "\n", |
| " :param spend_location:\n", |
| " :param spend_partitions: number of partitions to segment the data into\n", |
| " :return:\n", |
| " \"\"\"\n", |
| " return dataframe.from_pandas(\n", |
| " pd.Series([10, 10, 20, 40, 40, 50]), name=\"spend\", npartitions=spend_partitions\n", |
| " )\n", |
| "\n", |
| "\n", |
| "def signups(signups_location: str, signups_partitions: int) -> dataframe.Series:\n", |
| " \"\"\"Dummy function showing how to wire through loading data.\n", |
| "\n", |
| " :param signups_location:\n", |
| " :param signups_partitions: number of partitions to segment the data into\n", |
| " :return:\n", |
| " \"\"\"\n", |
| " return dataframe.from_pandas(\n", |
| " pd.Series([1, 10, 50, 100, 200, 400]), name=\"signups\", npartitions=signups_partitions\n", |
| " )" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 5, |
| "metadata": { |
| "collapsed": false, |
| "jupyter": { |
| "outputs_hidden": false |
| }, |
| "pycharm": { |
| "name": "#%%\n" |
| } |
| }, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Overwriting spend_calculations.py\n" |
| ] |
| } |
| ], |
| "source": [ |
| "%%writefile spend_calculations.py\n", |
| "\n", |
| "# We'll place the spend calculations into a new module\n", |
| "\n", |
| "import pandas as pd\n", |
| "\n", |
| "def avg_3wk_spend(spend: pd.Series) -> pd.Series:\n", |
| " \"\"\"Rolling 3 week average spend.\"\"\"\n", |
| " return spend.rolling(3).mean()\n", |
| "\n", |
| "\n", |
| "def spend_per_signup(spend: pd.Series, signups: pd.Series) -> pd.Series:\n", |
| " \"\"\"The cost per signup in relation to spend.\"\"\"\n", |
| " return spend / signups\n", |
| "\n", |
| "\n", |
| "def spend_mean(spend: pd.Series) -> float:\n", |
| " \"\"\"Shows function creating a scalar. In this case it computes the mean of the entire column.\"\"\"\n", |
| " return spend.mean()\n", |
| "\n", |
| "\n", |
| "def spend_zero_mean(spend: pd.Series, spend_mean: float) -> pd.Series:\n", |
| " \"\"\"Shows function that takes a scalar. In this case to zero mean spend.\"\"\"\n", |
| " return spend - spend_mean\n", |
| "\n", |
| "\n", |
| "def spend_std_dev(spend: pd.Series) -> float:\n", |
| " \"\"\"Function that computes the standard deviation of the spend column.\"\"\"\n", |
| " return spend.std()\n", |
| "\n", |
| "\n", |
| "def spend_zero_mean_unit_variance(spend_zero_mean: pd.Series, spend_std_dev: float) -> pd.Series:\n", |
| " \"\"\"Function showing one way to make spend have zero mean and unit variance.\"\"\"\n", |
| " return spend_zero_mean / spend_std_dev" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 6, |
| "metadata": { |
| "collapsed": false, |
| "jupyter": { |
| "outputs_hidden": false |
| }, |
| "pycharm": { |
| "name": "#%%\n" |
| } |
| }, |
| "outputs": [ |
| { |
| "name": "stderr", |
| "output_type": "stream", |
| "text": [ |
| "Note: Hamilton collects completely anonymous data about usage. This will help us improve Hamilton over time. See https://github.com/dagworks-inc/hamilton#usage-analytics--data-privacy for details.\n" |
| ] |
| }, |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "LocalCluster(94f8c394, 'tcp://127.0.0.1:50070', workers=4, threads=8, memory=16.00 GiB)\n" |
| ] |
| } |
| ], |
| "source": [ |
| "%aimport data_loaders, spend_calculations\n", |
| "\n", |
| "# Set up the local Dask cluster, adapter, and driver.\n", |
| "\n", |
| "cluster = LocalCluster()\n", |
| "client = Client(cluster)\n", |
| "\n", |
| "print(client.cluster)\n", |
| "\n", |
| "adapter = h_dask.DaskGraphAdapter(\n", |
| " client,\n", |
| " h_dask.DaskDataFrameResult(),\n", |
| " visualize_kwargs={\"filename\": \"run_dask.png\", \"format\": \"png\"},\n", |
| " use_delayed=False,\n", |
| " compute_at_end=False,\n", |
| ")\n", |
| "\n", |
| "config = {\n", |
| " \"spend_location\": \"some file path\",\n", |
| " \"spend_partitions\": 2,\n", |
| " \"signups_location\": \"some file path\",\n", |
| " \"signups_partitions\": 2,\n", |
| " \"foobar\": \"some_other_data\",\n", |
| "}\n", |
| "\n", |
| "dr = driver.Driver(config, spend_calculations, data_loaders, adapter=adapter)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 7, |
| "metadata": { |
| "collapsed": false, |
| "jupyter": { |
| "outputs_hidden": false |
| }, |
| "pycharm": { |
| "name": "#%%\n" |
| } |
| }, |
| "outputs": [ |
| { |
| "data": { |
| "image/svg+xml": [ |
| "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", |
| "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", |
| " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", |
| "<!-- Generated by graphviz version 8.1.0 (20230707.0739)\n", |
| " -->\n", |
| "<!-- Pages: 1 -->\n", |
| "<svg width=\"945pt\" height=\"332pt\"\n", |
| " viewBox=\"0.00 0.00 945.02 332.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", |
| "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 328)\">\n", |
| "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-328 941.02,-328 941.02,4 -4,4\"/>\n", |
| "<!-- signups -->\n", |
| "<g id=\"node1\" class=\"node\">\n", |
| "<title>signups</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"538.02,-252 480.77,-252 480.77,-216 538.02,-216 538.02,-252\"/>\n", |
| "<text text-anchor=\"middle\" x=\"509.39\" y=\"-228.95\" font-family=\"Times,serif\" font-size=\"14.00\">signups</text>\n", |
| "</g>\n", |
| "<!-- spend_per_signup -->\n", |
| "<g id=\"node7\" class=\"node\">\n", |
| "<title>spend_per_signup</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"556.52,-180 442.27,-180 442.27,-144 556.52,-144 556.52,-180\"/>\n", |
| "<text text-anchor=\"middle\" x=\"499.39\" y=\"-156.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_per_signup</text>\n", |
| "</g>\n", |
| "<!-- signups->spend_per_signup -->\n", |
| "<g id=\"edge9\" class=\"edge\">\n", |
| "<title>signups->spend_per_signup</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M506.92,-215.7C505.85,-208.24 504.58,-199.32 503.39,-190.97\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"506.71,-190.51 501.83,-181.1 499.78,-191.5 506.71,-190.51\"/>\n", |
| "</g>\n", |
| "<!-- spend_partitions -->\n", |
| "<g id=\"node2\" class=\"node\">\n", |
| "<title>spend_partitions</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" cx=\"96.39\" cy=\"-306\" rx=\"96.39\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"96.39\" y=\"-300.95\" font-family=\"Times,serif\" font-size=\"14.00\">Input: spend_partitions</text>\n", |
| "</g>\n", |
| "<!-- spend -->\n", |
| "<g id=\"node5\" class=\"node\">\n", |
| "<title>spend</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"329.39,-252 275.39,-252 275.39,-216 329.39,-216 329.39,-252\"/>\n", |
| "<text text-anchor=\"middle\" x=\"302.39\" y=\"-228.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend</text>\n", |
| "</g>\n", |
| "<!-- spend_partitions->spend -->\n", |
| "<g id=\"edge7\" class=\"edge\">\n", |
| "<title>spend_partitions->spend</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M141.6,-289.64C178.52,-277.09 230.09,-259.57 264.79,-247.78\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"265.63,-250.85 273.98,-244.32 263.38,-244.22 265.63,-250.85\"/>\n", |
| "</g>\n", |
| "<!-- spend_std_dev -->\n", |
| "<g id=\"node3\" class=\"node\">\n", |
| "<title>spend_std_dev</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"131.39\" cy=\"-90\" rx=\"65.68\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"131.39\" y=\"-84.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_std_dev</text>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean_unit_variance -->\n", |
| "<g id=\"node4\" class=\"node\">\n", |
| "<title>spend_zero_mean_unit_variance</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"309.64,-36 115.14,-36 115.14,0 309.64,0 309.64,-36\"/>\n", |
| "<text text-anchor=\"middle\" x=\"212.39\" y=\"-12.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_zero_mean_unit_variance</text>\n", |
| "</g>\n", |
| "<!-- spend_std_dev->spend_zero_mean_unit_variance -->\n", |
| "<g id=\"edge5\" class=\"edge\">\n", |
| "<title>spend_std_dev->spend_zero_mean_unit_variance</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M150.59,-72.41C160.58,-63.78 172.99,-53.05 184.06,-43.48\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"185.92,-45.63 191.2,-36.45 181.35,-40.34 185.92,-45.63\"/>\n", |
| "</g>\n", |
| "<!-- spend->spend_std_dev -->\n", |
| "<g id=\"edge3\" class=\"edge\">\n", |
| "<title>spend->spend_std_dev</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M274.9,-231.33C240.54,-227.72 182.37,-216.26 150.39,-180 135.78,-163.44 131.46,-138.43 130.55,-119.13\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"134.03,-119.2 130.36,-109.26 127.04,-119.32 134.03,-119.2\"/>\n", |
| "</g>\n", |
| "<!-- spend->spend_per_signup -->\n", |
| "<g id=\"edge8\" class=\"edge\">\n", |
| "<title>spend->spend_per_signup</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M329.78,-223.27C358.19,-213.17 403.51,-197.07 440.15,-184.05\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"440.98,-187.11 449.23,-180.47 438.64,-180.52 440.98,-187.11\"/>\n", |
| "</g>\n", |
| "<!-- spend_mean -->\n", |
| "<g id=\"node9\" class=\"node\">\n", |
| "<title>spend_mean</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"243.52,-180 159.27,-180 159.27,-144 243.52,-144 243.52,-180\"/>\n", |
| "<text text-anchor=\"middle\" x=\"201.39\" y=\"-156.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_mean</text>\n", |
| "</g>\n", |
| "<!-- spend->spend_mean -->\n", |
| "<g id=\"edge10\" class=\"edge\">\n", |
| "<title>spend->spend_mean</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M277.16,-215.52C264.42,-206.68 248.77,-195.84 235.01,-186.3\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"237.51,-183.08 227.3,-180.26 233.52,-188.83 237.51,-183.08\"/>\n", |
| "</g>\n", |
| "<!-- avg_3wk_spend -->\n", |
| "<g id=\"node11\" class=\"node\">\n", |
| "<title>avg_3wk_spend</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" points=\"365.27,-180 261.52,-180 261.52,-144 365.27,-144 365.27,-180\"/>\n", |
| "<text text-anchor=\"middle\" x=\"313.39\" y=\"-156.95\" font-family=\"Times,serif\" font-size=\"14.00\">avg_3wk_spend</text>\n", |
| "</g>\n", |
| "<!-- spend->avg_3wk_spend -->\n", |
| "<g id=\"edge11\" class=\"edge\">\n", |
| "<title>spend->avg_3wk_spend</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M305.11,-215.7C306.28,-208.24 307.68,-199.32 309,-190.97\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"312.61,-191.53 310.7,-181.1 305.69,-190.44 312.61,-191.53\"/>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean -->\n", |
| "<g id=\"node13\" class=\"node\">\n", |
| "<title>spend_zero_mean</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" cx=\"293.39\" cy=\"-90\" rx=\"77.97\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"293.39\" y=\"-84.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_zero_mean</text>\n", |
| "</g>\n", |
| "<!-- spend->spend_zero_mean -->\n", |
| "<g id=\"edge12\" class=\"edge\">\n", |
| "<title>spend->spend_zero_mean</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M329.83,-220.81C346.01,-212.01 365.08,-198.42 374.39,-180 381.61,-165.72 382,-158.07 374.39,-144 366.92,-130.19 354.11,-119.4 340.86,-111.26\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"343.1,-107.94 332.67,-106.05 339.64,-114.03 343.1,-107.94\"/>\n", |
| "</g>\n", |
| "<!-- signups_location -->\n", |
| "<g id=\"node6\" class=\"node\">\n", |
| "<title>signups_location</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" cx=\"509.39\" cy=\"-306\" rx=\"97.93\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"509.39\" y=\"-300.95\" font-family=\"Times,serif\" font-size=\"14.00\">Input: signups_location</text>\n", |
| "</g>\n", |
| "<!-- signups_location->signups -->\n", |
| "<g id=\"edge1\" class=\"edge\">\n", |
| "<title>signups_location->signups</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M509.39,-287.7C509.39,-280.24 509.39,-271.32 509.39,-262.97\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"512.89,-263.1 509.39,-253.1 505.89,-263.1 512.89,-263.1\"/>\n", |
| "</g>\n", |
| "<!-- foobar -->\n", |
| "<g id=\"node8\" class=\"node\">\n", |
| "<title>foobar</title>\n", |
| "<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"937.02,-324 849.77,-324 849.77,-288 937.02,-288 937.02,-324\"/>\n", |
| "<text text-anchor=\"middle\" x=\"893.39\" y=\"-300.95\" font-family=\"Times,serif\" font-size=\"14.00\">Input: foobar</text>\n", |
| "</g>\n", |
| "<!-- spend_mean->spend_zero_mean -->\n", |
| "<g id=\"edge13\" class=\"edge\">\n", |
| "<title>spend_mean->spend_zero_mean</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M224.13,-143.7C235.89,-134.75 250.4,-123.71 263.1,-114.05\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"264.71,-116.46 270.54,-107.62 260.47,-110.89 264.71,-116.46\"/>\n", |
| "</g>\n", |
| "<!-- spend_location -->\n", |
| "<g id=\"node10\" class=\"node\">\n", |
| "<title>spend_location</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" cx=\"302.39\" cy=\"-306\" rx=\"91.27\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"302.39\" y=\"-300.95\" font-family=\"Times,serif\" font-size=\"14.00\">Input: spend_location</text>\n", |
| "</g>\n", |
| "<!-- spend_location->spend -->\n", |
| "<g id=\"edge6\" class=\"edge\">\n", |
| "<title>spend_location->spend</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M302.39,-287.7C302.39,-280.24 302.39,-271.32 302.39,-262.97\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"305.89,-263.1 302.39,-253.1 298.89,-263.1 305.89,-263.1\"/>\n", |
| "</g>\n", |
| "<!-- signups_partitions -->\n", |
| "<g id=\"node12\" class=\"node\">\n", |
| "<title>signups_partitions</title>\n", |
| "<ellipse fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" cx=\"728.39\" cy=\"-306\" rx=\"103.04\" ry=\"18\"/>\n", |
| "<text text-anchor=\"middle\" x=\"728.39\" y=\"-300.95\" font-family=\"Times,serif\" font-size=\"14.00\">Input: signups_partitions</text>\n", |
| "</g>\n", |
| "<!-- signups_partitions->signups -->\n", |
| "<g id=\"edge2\" class=\"edge\">\n", |
| "<title>signups_partitions->signups</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M680.33,-289.64C640.79,-277 585.46,-259.31 548.57,-247.52\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"549.81,-243.93 539.22,-244.22 547.68,-250.59 549.81,-243.93\"/>\n", |
| "</g>\n", |
| "<!-- spend_zero_mean->spend_zero_mean_unit_variance -->\n", |
| "<g id=\"edge4\" class=\"edge\">\n", |
| "<title>spend_zero_mean->spend_zero_mean_unit_variance</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M274.19,-72.41C264.2,-63.78 251.79,-53.05 240.72,-43.48\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"243.43,-40.34 233.58,-36.45 238.86,-45.63 243.43,-40.34\"/>\n", |
| "</g>\n", |
| "</g>\n", |
| "</svg>\n" |
| ], |
| "text/plain": [ |
| "<graphviz.graphs.Digraph at 0x11b250490>" |
| ] |
| }, |
| "execution_count": 7, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "# Execute the driver.\n", |
| "\n", |
| "output_columns = [\n", |
| " \"spend\",\n", |
| " \"signups\",\n", |
| " \"avg_3wk_spend\",\n", |
| " \"spend_per_signup\",\n", |
| " \"spend_mean\",\n", |
| " \"spend_zero_mean_unit_variance\",\n", |
| " \"foobar\",\n", |
| "]\n", |
| "\n", |
| "dask_df = dr.execute(output_columns) # it's dask dataframe -- it hasn't been evaluated yet.\n", |
| "df = dask_df.compute()\n", |
| "\n", |
| "# To visualize do `pip install \"sf-hamilton[visualization]\"` if you want these to work\n", |
| "dr.visualize_execution(output_columns)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 9, |
| "metadata": { |
| "collapsed": false, |
| "jupyter": { |
| "outputs_hidden": false |
| }, |
| "pycharm": { |
| "name": "#%%\n" |
| } |
| }, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| " spend signups avg_3wk_spend spend_per_signup spend_mean spend_zero_mean_unit_variance foobar\n", |
| "0 10 1 NaN 10.000 28.333333 -1.064405 some_other_data\n", |
| "1 10 10 NaN 1.000 28.333333 -1.064405 some_other_data\n", |
| "2 20 50 13.333333 0.400 28.333333 -0.483821 some_other_data\n", |
| "3 40 100 23.333333 0.400 28.333333 0.677349 some_other_data\n", |
| "4 40 200 33.333333 0.200 28.333333 0.677349 some_other_data\n", |
| "5 50 400 43.333333 0.125 28.333333 1.257934 some_other_data\n" |
| ] |
| } |
| ], |
| "source": [ |
| "print(df.to_string())\n", |
| "client.shutdown()" |
| ] |
| } |
| ], |
| "metadata": { |
| "kernelspec": { |
| "display_name": "Python 3 (ipykernel)", |
| "language": "python", |
| "name": "python3" |
| }, |
| "language_info": { |
| "codemirror_mode": { |
| "name": "ipython", |
| "version": 3 |
| }, |
| "file_extension": ".py", |
| "mimetype": "text/x-python", |
| "name": "python", |
| "nbconvert_exporter": "python", |
| "pygments_lexer": "ipython3", |
| "version": "3.11.5" |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 4 |
| } |