| { |
| "cells": [ |
| { |
| "metadata": {}, |
| "cell_type": "code", |
| "outputs": [], |
| "execution_count": null, |
| "source": "!pip install pandas scikit-learn \"sf-hamilton[visualization,ui,sdk]\"", |
| "id": "8480733a5c6dbbe5" |
| }, |
| { |
| "metadata": {}, |
| "cell_type": "code", |
| "outputs": [ |
| { |
| "name": "stderr", |
| "output_type": "stream", |
| "text": [ |
| "/Users/stefankrawczyk/.pyenv/versions/knowledge_retrieval-py39/lib/python3.9/site-packages/pyspark/pandas/__init__.py:50: UserWarning: 'PYARROW_IGNORE_TIMEZONE' environment variable was not set. It is required to set this environment variable to '1' in both driver and executor sides if you use pyarrow>=2.0.0. pandas-on-Spark will set it for you but it does not work if there is a Spark context already launched.\n", |
| " warnings.warn(\n" |
| ] |
| } |
| ], |
| "execution_count": 1, |
| "source": "%load_ext hamilton.plugins.jupyter_magic", |
| "id": "initial_id" |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 2, |
| "id": "efd6c1b2417bb9cf", |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2024-07-22T18:12:54.548060Z", |
| "start_time": "2024-07-22T18:12:53.934926Z" |
| } |
| }, |
| "outputs": [ |
| { |
| "data": { |
| "image/svg+xml": [ |
| "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", |
| "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", |
| " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", |
| "<!-- Generated by graphviz version 10.0.1 (20240210.2158)\n", |
| " -->\n", |
| "<!-- Pages: 1 -->\n", |
| "<svg width=\"547pt\" height=\"303pt\"\n", |
| " viewBox=\"0.00 0.00 546.65 302.73\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", |
| "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 298.73)\">\n", |
| "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-298.73 542.65,-298.73 542.65,4 -4,4\"/>\n", |
| "<g id=\"clust1\" class=\"cluster\">\n", |
| "<title>cluster__legend</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" points=\"8.5,-97.72 8.5,-286.73 116.6,-286.73 116.6,-97.72 8.5,-97.72\"/>\n", |
| "<text text-anchor=\"middle\" x=\"62.55\" y=\"-269.43\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n", |
| "</g>\n", |
| "<!-- raw_data -->\n", |
| "<g id=\"node1\" class=\"node\">\n", |
| "<title>raw_data</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M231.95,-75.52C231.95,-75.52 166.1,-75.52 166.1,-75.52 160.1,-75.52 154.1,-69.52 154.1,-63.52 154.1,-63.52 154.1,-23.92 154.1,-23.92 154.1,-17.92 160.1,-11.93 166.1,-11.93 166.1,-11.93 231.95,-11.93 231.95,-11.93 237.95,-11.93 243.95,-17.93 243.95,-23.93 243.95,-23.93 243.95,-63.53 243.95,-63.53 243.95,-69.52 237.95,-75.52 231.95,-75.52\"/>\n", |
| "<text text-anchor=\"start\" x=\"169.4\" y=\"-52.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">raw_data</text>\n", |
| "<text text-anchor=\"start\" x=\"164.9\" y=\"-24.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- transformed_data -->\n", |
| "<g id=\"node2\" class=\"node\">\n", |
| "<title>transformed_data</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M397.3,-75.52C397.3,-75.52 284.95,-75.52 284.95,-75.52 278.95,-75.52 272.95,-69.52 272.95,-63.52 272.95,-63.52 272.95,-23.92 272.95,-23.92 272.95,-17.92 278.95,-11.93 284.95,-11.93 284.95,-11.93 397.3,-11.93 397.3,-11.93 403.3,-11.93 409.3,-17.93 409.3,-23.93 409.3,-23.93 409.3,-63.53 409.3,-63.53 409.3,-69.52 403.3,-75.52 397.3,-75.52\"/>\n", |
| "<text text-anchor=\"start\" x=\"283.75\" y=\"-52.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">transformed_data</text>\n", |
| "<text text-anchor=\"start\" x=\"307\" y=\"-24.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- raw_data->transformed_data -->\n", |
| "<g id=\"edge2\" class=\"edge\">\n", |
| "<title>raw_data->transformed_data</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M244.35,-43.72C249.82,-43.72 255.54,-43.72 261.33,-43.72\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"261.02,-47.23 271.02,-43.73 261.02,-40.23 261.02,-47.23\"/>\n", |
| "</g>\n", |
| "<!-- saved_data -->\n", |
| "<g id=\"node3\" class=\"node\">\n", |
| "<title>saved_data</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M538.65,-115.5C538.65,-119.89 516.16,-123.45 488.47,-123.45 460.79,-123.45 438.3,-119.89 438.3,-115.5 438.3,-115.5 438.3,-43.95 438.3,-43.95 438.3,-39.56 460.79,-36 488.47,-36 516.16,-36 538.65,-39.56 538.65,-43.95 538.65,-43.95 538.65,-115.5 538.65,-115.5\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M538.65,-115.5C538.65,-111.11 516.16,-107.55 488.47,-107.55 460.79,-107.55 438.3,-111.11 438.3,-115.5\"/>\n", |
| "<text text-anchor=\"start\" x=\"451.72\" y=\"-88.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">saved_data</text>\n", |
| "<text text-anchor=\"start\" x=\"449.1\" y=\"-60.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">saved_data()</text>\n", |
| "</g>\n", |
| "<!-- transformed_data->saved_data -->\n", |
| "<g id=\"edge3\" class=\"edge\">\n", |
| "<title>transformed_data->saved_data</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M409.69,-60.46C415.48,-61.89 421.3,-63.34 427.02,-64.75\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"425.84,-68.07 436.39,-67.07 427.53,-61.27 425.84,-68.07\"/>\n", |
| "</g>\n", |
| "<!-- raw_data.loader -->\n", |
| "<g id=\"node4\" class=\"node\">\n", |
| "<title>raw_data.loader</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M125.1,-79.5C125.1,-83.89 97.06,-87.45 62.55,-87.45 28.04,-87.45 0,-83.89 0,-79.5 0,-79.5 0,-7.95 0,-7.95 0,-3.56 28.04,0 62.55,0 97.06,0 125.1,-3.56 125.1,-7.95 125.1,-7.95 125.1,-79.5 125.1,-79.5\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M125.1,-79.5C125.1,-75.11 97.06,-71.55 62.55,-71.55 28.04,-71.55 0,-75.11 0,-79.5\"/>\n", |
| "<text text-anchor=\"start\" x=\"10.8\" y=\"-52.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">raw_data.loader</text>\n", |
| "<text text-anchor=\"start\" x=\"30.3\" y=\"-24.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">raw_data()</text>\n", |
| "</g>\n", |
| "<!-- raw_data.loader->raw_data -->\n", |
| "<g id=\"edge1\" class=\"edge\">\n", |
| "<title>raw_data.loader->raw_data</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M125.28,-43.72C131.02,-43.72 136.81,-43.72 142.49,-43.72\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"142.42,-47.23 152.42,-43.73 142.42,-40.23 142.42,-47.23\"/>\n", |
| "</g>\n", |
| "<!-- _saved_data_inputs -->\n", |
| "<g id=\"node5\" class=\"node\">\n", |
| "<title>_saved_data_inputs</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"388.18,-138.02 294.07,-138.02 294.07,-93.42 388.18,-93.42 388.18,-138.02\"/>\n", |
| "<text text-anchor=\"start\" x=\"308.88\" y=\"-109.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">filepath</text>\n", |
| "<text text-anchor=\"start\" x=\"358.38\" y=\"-109.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n", |
| "</g>\n", |
| "<!-- _saved_data_inputs->saved_data -->\n", |
| "<g id=\"edge4\" class=\"edge\">\n", |
| "<title>_saved_data_inputs->saved_data</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M388.51,-104.24C400.72,-101.21 414.08,-97.9 426.91,-94.73\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"427.68,-98.14 436.55,-92.34 426,-91.35 427.68,-98.14\"/>\n", |
| "</g>\n", |
| "<!-- input -->\n", |
| "<g id=\"node6\" class=\"node\">\n", |
| "<title>input</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"89.55,-142.02 35.55,-142.02 35.55,-105.42 89.55,-105.42 89.55,-142.02\"/>\n", |
| "<text text-anchor=\"middle\" x=\"62.55\" y=\"-117.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n", |
| "</g>\n", |
| "<!-- function -->\n", |
| "<g id=\"node7\" class=\"node\">\n", |
| "<title>function</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M84.97,-197.02C84.97,-197.02 40.12,-197.02 40.12,-197.02 34.12,-197.02 28.12,-191.02 28.12,-185.02 28.12,-185.02 28.12,-172.43 28.12,-172.43 28.12,-166.43 34.12,-160.43 40.12,-160.43 40.12,-160.43 84.97,-160.43 84.97,-160.43 90.97,-160.43 96.97,-166.43 96.97,-172.43 96.97,-172.43 96.97,-185.02 96.97,-185.02 96.97,-191.02 90.97,-197.02 84.97,-197.02\"/>\n", |
| "<text text-anchor=\"middle\" x=\"62.55\" y=\"-172.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n", |
| "</g>\n", |
| "<!-- materializer -->\n", |
| "<g id=\"node8\" class=\"node\">\n", |
| "<title>materializer</title>\n", |
| "<path fill=\"#ffffff\" stroke=\"black\" d=\"M108.6,-252.26C108.6,-254.29 87.96,-255.94 62.55,-255.94 37.14,-255.94 16.5,-254.29 16.5,-252.26 16.5,-252.26 16.5,-219.19 16.5,-219.19 16.5,-217.16 37.14,-215.51 62.55,-215.51 87.96,-215.51 108.6,-217.16 108.6,-219.19 108.6,-219.19 108.6,-252.26 108.6,-252.26\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M108.6,-252.26C108.6,-250.23 87.96,-248.59 62.55,-248.59 37.14,-248.59 16.5,-250.23 16.5,-252.26\"/>\n", |
| "<text text-anchor=\"middle\" x=\"62.55\" y=\"-229.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">materializer</text>\n", |
| "</g>\n", |
| "</g>\n", |
| "</svg>\n" |
| ], |
| "text/plain": [ |
| "<graphviz.graphs.Digraph at 0x1536c0550>" |
| ] |
| }, |
| "metadata": {}, |
| "output_type": "display_data" |
| } |
| ], |
| "source": [ |
| "%%cell_to_module simple_etl --display\n", |
| "import pandas as pd\n", |
| "from sklearn import datasets\n", |
| "from hamilton.function_modifiers import dataloader, datasaver\n", |
| "from hamilton.io import utils as io_utils\n", |
| "\n", |
| "\n", |
| "@dataloader()\n", |
| "def raw_data() -> tuple[pd.DataFrame, dict]:\n", |
| " data = datasets.load_digits()\n", |
| " df = pd.DataFrame(data.data, columns=[f\"feature_{i}\" for i in range(data.data.shape[1])])\n", |
| " metadata = io_utils.get_dataframe_metadata(df)\n", |
| " return df, metadata\n", |
| "\n", |
| "\n", |
| "def transformed_data(raw_data: pd.DataFrame) -> pd.DataFrame:\n", |
| " return raw_data\n", |
| "\n", |
| "\n", |
| "@datasaver()\n", |
| "def saved_data(transformed_data: pd.DataFrame, filepath: str) -> dict:\n", |
| " transformed_data.to_csv(filepath)\n", |
| " metadata = io_utils.get_file_and_dataframe_metadata(filepath, transformed_data)\n", |
| " return metadata\n", |
| "\n" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 3, |
| "id": "e9252f2a09228330", |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2024-06-25T00:00:37.889540Z", |
| "start_time": "2024-06-25T00:00:35.994131Z" |
| } |
| }, |
| "outputs": [ |
| { |
| "data": { |
| "image/svg+xml": [ |
| "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", |
| "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", |
| " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", |
| "<!-- Generated by graphviz version 10.0.1 (20240210.2158)\n", |
| " -->\n", |
| "<!-- Pages: 1 -->\n", |
| "<svg width=\"547pt\" height=\"303pt\"\n", |
| " viewBox=\"0.00 0.00 546.65 302.73\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", |
| "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 298.73)\">\n", |
| "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-298.73 542.65,-298.73 542.65,4 -4,4\"/>\n", |
| "<g id=\"clust1\" class=\"cluster\">\n", |
| "<title>cluster__legend</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" points=\"8.5,-97.72 8.5,-286.73 116.6,-286.73 116.6,-97.72 8.5,-97.72\"/>\n", |
| "<text text-anchor=\"middle\" x=\"62.55\" y=\"-269.43\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n", |
| "</g>\n", |
| "<!-- raw_data -->\n", |
| "<g id=\"node1\" class=\"node\">\n", |
| "<title>raw_data</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M231.95,-75.52C231.95,-75.52 166.1,-75.52 166.1,-75.52 160.1,-75.52 154.1,-69.52 154.1,-63.52 154.1,-63.52 154.1,-23.92 154.1,-23.92 154.1,-17.92 160.1,-11.93 166.1,-11.93 166.1,-11.93 231.95,-11.93 231.95,-11.93 237.95,-11.93 243.95,-17.93 243.95,-23.93 243.95,-23.93 243.95,-63.53 243.95,-63.53 243.95,-69.52 237.95,-75.52 231.95,-75.52\"/>\n", |
| "<text text-anchor=\"start\" x=\"169.4\" y=\"-52.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">raw_data</text>\n", |
| "<text text-anchor=\"start\" x=\"164.9\" y=\"-24.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- transformed_data -->\n", |
| "<g id=\"node2\" class=\"node\">\n", |
| "<title>transformed_data</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M397.3,-75.52C397.3,-75.52 284.95,-75.52 284.95,-75.52 278.95,-75.52 272.95,-69.52 272.95,-63.52 272.95,-63.52 272.95,-23.92 272.95,-23.92 272.95,-17.92 278.95,-11.93 284.95,-11.93 284.95,-11.93 397.3,-11.93 397.3,-11.93 403.3,-11.93 409.3,-17.93 409.3,-23.93 409.3,-23.93 409.3,-63.53 409.3,-63.53 409.3,-69.52 403.3,-75.52 397.3,-75.52\"/>\n", |
| "<text text-anchor=\"start\" x=\"283.75\" y=\"-52.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">transformed_data</text>\n", |
| "<text text-anchor=\"start\" x=\"307\" y=\"-24.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- raw_data->transformed_data -->\n", |
| "<g id=\"edge2\" class=\"edge\">\n", |
| "<title>raw_data->transformed_data</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M244.35,-43.72C249.82,-43.72 255.54,-43.72 261.33,-43.72\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"261.02,-47.23 271.02,-43.73 261.02,-40.23 261.02,-47.23\"/>\n", |
| "</g>\n", |
| "<!-- saved_data -->\n", |
| "<g id=\"node3\" class=\"node\">\n", |
| "<title>saved_data</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M538.65,-115.5C538.65,-119.89 516.16,-123.45 488.47,-123.45 460.79,-123.45 438.3,-119.89 438.3,-115.5 438.3,-115.5 438.3,-43.95 438.3,-43.95 438.3,-39.56 460.79,-36 488.47,-36 516.16,-36 538.65,-39.56 538.65,-43.95 538.65,-43.95 538.65,-115.5 538.65,-115.5\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M538.65,-115.5C538.65,-111.11 516.16,-107.55 488.47,-107.55 460.79,-107.55 438.3,-111.11 438.3,-115.5\"/>\n", |
| "<text text-anchor=\"start\" x=\"451.72\" y=\"-88.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">saved_data</text>\n", |
| "<text text-anchor=\"start\" x=\"449.1\" y=\"-60.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">saved_data()</text>\n", |
| "</g>\n", |
| "<!-- transformed_data->saved_data -->\n", |
| "<g id=\"edge3\" class=\"edge\">\n", |
| "<title>transformed_data->saved_data</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M409.69,-60.46C415.48,-61.89 421.3,-63.34 427.02,-64.75\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"425.84,-68.07 436.39,-67.07 427.53,-61.27 425.84,-68.07\"/>\n", |
| "</g>\n", |
| "<!-- raw_data.loader -->\n", |
| "<g id=\"node4\" class=\"node\">\n", |
| "<title>raw_data.loader</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M125.1,-79.5C125.1,-83.89 97.06,-87.45 62.55,-87.45 28.04,-87.45 0,-83.89 0,-79.5 0,-79.5 0,-7.95 0,-7.95 0,-3.56 28.04,0 62.55,0 97.06,0 125.1,-3.56 125.1,-7.95 125.1,-7.95 125.1,-79.5 125.1,-79.5\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M125.1,-79.5C125.1,-75.11 97.06,-71.55 62.55,-71.55 28.04,-71.55 0,-75.11 0,-79.5\"/>\n", |
| "<text text-anchor=\"start\" x=\"10.8\" y=\"-52.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">raw_data.loader</text>\n", |
| "<text text-anchor=\"start\" x=\"30.3\" y=\"-24.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">raw_data()</text>\n", |
| "</g>\n", |
| "<!-- raw_data.loader->raw_data -->\n", |
| "<g id=\"edge1\" class=\"edge\">\n", |
| "<title>raw_data.loader->raw_data</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M125.28,-43.72C131.02,-43.72 136.81,-43.72 142.49,-43.72\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"142.42,-47.23 152.42,-43.73 142.42,-40.23 142.42,-47.23\"/>\n", |
| "</g>\n", |
| "<!-- _saved_data_inputs -->\n", |
| "<g id=\"node5\" class=\"node\">\n", |
| "<title>_saved_data_inputs</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"388.18,-138.02 294.07,-138.02 294.07,-93.42 388.18,-93.42 388.18,-138.02\"/>\n", |
| "<text text-anchor=\"start\" x=\"308.88\" y=\"-109.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">filepath</text>\n", |
| "<text text-anchor=\"start\" x=\"358.38\" y=\"-109.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n", |
| "</g>\n", |
| "<!-- _saved_data_inputs->saved_data -->\n", |
| "<g id=\"edge4\" class=\"edge\">\n", |
| "<title>_saved_data_inputs->saved_data</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M388.51,-104.24C400.72,-101.21 414.08,-97.9 426.91,-94.73\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"427.68,-98.14 436.55,-92.34 426,-91.35 427.68,-98.14\"/>\n", |
| "</g>\n", |
| "<!-- input -->\n", |
| "<g id=\"node6\" class=\"node\">\n", |
| "<title>input</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"89.55,-142.02 35.55,-142.02 35.55,-105.42 89.55,-105.42 89.55,-142.02\"/>\n", |
| "<text text-anchor=\"middle\" x=\"62.55\" y=\"-117.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n", |
| "</g>\n", |
| "<!-- function -->\n", |
| "<g id=\"node7\" class=\"node\">\n", |
| "<title>function</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M84.97,-197.02C84.97,-197.02 40.12,-197.02 40.12,-197.02 34.12,-197.02 28.12,-191.02 28.12,-185.02 28.12,-185.02 28.12,-172.43 28.12,-172.43 28.12,-166.43 34.12,-160.43 40.12,-160.43 40.12,-160.43 84.97,-160.43 84.97,-160.43 90.97,-160.43 96.97,-166.43 96.97,-172.43 96.97,-172.43 96.97,-185.02 96.97,-185.02 96.97,-191.02 90.97,-197.02 84.97,-197.02\"/>\n", |
| "<text text-anchor=\"middle\" x=\"62.55\" y=\"-172.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n", |
| "</g>\n", |
| "<!-- materializer -->\n", |
| "<g id=\"node8\" class=\"node\">\n", |
| "<title>materializer</title>\n", |
| "<path fill=\"#ffffff\" stroke=\"black\" d=\"M108.6,-252.26C108.6,-254.29 87.96,-255.94 62.55,-255.94 37.14,-255.94 16.5,-254.29 16.5,-252.26 16.5,-252.26 16.5,-219.19 16.5,-219.19 16.5,-217.16 37.14,-215.51 62.55,-215.51 87.96,-215.51 108.6,-217.16 108.6,-219.19 108.6,-219.19 108.6,-252.26 108.6,-252.26\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M108.6,-252.26C108.6,-250.23 87.96,-248.59 62.55,-248.59 37.14,-248.59 16.5,-250.23 16.5,-252.26\"/>\n", |
| "<text text-anchor=\"middle\" x=\"62.55\" y=\"-229.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">materializer</text>\n", |
| "</g>\n", |
| "</g>\n", |
| "</svg>\n" |
| ], |
| "text/plain": [ |
| "<graphviz.graphs.Digraph at 0x153c29040>" |
| ] |
| }, |
| "execution_count": 3, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "from hamilton_sdk import adapters\n", |
| "\n", |
| "from hamilton import driver\n", |
| "\n", |
| "tracker = adapters.HamiltonTracker(\n", |
| " project_id=7, # modify this as needed\n", |
| " username=\"elijah@dagworks.io\", # modify this as needed\n", |
| " dag_name=\"my_version_of_the_dag\",\n", |
| " tags={\"environment\": \"DEV\", \"team\": \"MY_TEAM\", \"version\": \"X\"},\n", |
| ")\n", |
| "dr = driver.Builder().with_config({}).with_modules(simple_etl).with_adapters(tracker).build()\n", |
| "dr.display_all_functions()" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 4, |
| "id": "86c0d0f7da9a472b", |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2024-06-25T00:00:53.746596Z", |
| "start_time": "2024-06-25T00:00:52.320439Z" |
| } |
| }, |
| "outputs": [ |
| { |
| "name": "stderr", |
| "output_type": "stream", |
| "text": [ |
| "\n", |
| "Capturing execution run. Results can be found at http://localhost:8241/dashboard/project/7/runs/84\n", |
| "\n", |
| "\n", |
| "Captured execution run. Results can be found at http://localhost:8241/dashboard/project/7/runs/84\n", |
| "\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/plain": [ |
| "{'saved_data': {'file_metadata': {'size': 499704,\n", |
| " 'path': 'data.csv',\n", |
| " 'last_modified': 1721672006.6388159,\n", |
| " 'timestamp': 1721697206.638992,\n", |
| " 'scheme': '',\n", |
| " 'notes': ''},\n", |
| " 'dataframe_metadata': {'rows': 1797,\n", |
| " 'columns': 64,\n", |
| " 'column_names': ['feature_0',\n", |
| " 'feature_1',\n", |
| " 'feature_2',\n", |
| " 'feature_3',\n", |
| " 'feature_4',\n", |
| " 'feature_5',\n", |
| " 'feature_6',\n", |
| " 'feature_7',\n", |
| " 'feature_8',\n", |
| " 'feature_9',\n", |
| " 'feature_10',\n", |
| " 'feature_11',\n", |
| " 'feature_12',\n", |
| " 'feature_13',\n", |
| " 'feature_14',\n", |
| " 'feature_15',\n", |
| " 'feature_16',\n", |
| " 'feature_17',\n", |
| " 'feature_18',\n", |
| " 'feature_19',\n", |
| " 'feature_20',\n", |
| " 'feature_21',\n", |
| " 'feature_22',\n", |
| " 'feature_23',\n", |
| " 'feature_24',\n", |
| " 'feature_25',\n", |
| " 'feature_26',\n", |
| " 'feature_27',\n", |
| " 'feature_28',\n", |
| " 'feature_29',\n", |
| " 'feature_30',\n", |
| " 'feature_31',\n", |
| " 'feature_32',\n", |
| " 'feature_33',\n", |
| " 'feature_34',\n", |
| " 'feature_35',\n", |
| " 'feature_36',\n", |
| " 'feature_37',\n", |
| " 'feature_38',\n", |
| " 'feature_39',\n", |
| " 'feature_40',\n", |
| " 'feature_41',\n", |
| " 'feature_42',\n", |
| " 'feature_43',\n", |
| " 'feature_44',\n", |
| " 'feature_45',\n", |
| " 'feature_46',\n", |
| " 'feature_47',\n", |
| " 'feature_48',\n", |
| " 'feature_49',\n", |
| " 'feature_50',\n", |
| " 'feature_51',\n", |
| " 'feature_52',\n", |
| " 'feature_53',\n", |
| " 'feature_54',\n", |
| " 'feature_55',\n", |
| " 'feature_56',\n", |
| " 'feature_57',\n", |
| " 'feature_58',\n", |
| " 'feature_59',\n", |
| " 'feature_60',\n", |
| " 'feature_61',\n", |
| " 'feature_62',\n", |
| " 'feature_63'],\n", |
| " 'datatypes': ['float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64',\n", |
| " 'float64']}}}" |
| ] |
| }, |
| "execution_count": 4, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "dr.execute([\"saved_data\"], inputs={\"filepath\": \"data.csv\"})" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "id": "e108601ca3a88aab", |
| "metadata": {}, |
| "outputs": [], |
| "source": [] |
| } |
| ], |
| "metadata": { |
| "kernelspec": { |
| "display_name": "Python 3 (ipykernel)", |
| "language": "python", |
| "name": "python3" |
| }, |
| "language_info": { |
| "codemirror_mode": { |
| "name": "ipython", |
| "version": 3 |
| }, |
| "file_extension": ".py", |
| "mimetype": "text/x-python", |
| "name": "python", |
| "nbconvert_exporter": "python", |
| "pygments_lexer": "ipython3", |
| "version": "3.9.13" |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 5 |
| } |