blob: 2a44733dc72f7cb93e4739e42a69637bb9a35e75 [file] [log] [blame]
{
"cells": [
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": "!pip install pandas scikit-learn \"sf-hamilton[visualization,ui,sdk]\"",
"id": "8480733a5c6dbbe5"
},
{
"metadata": {},
"cell_type": "code",
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/stefankrawczyk/.pyenv/versions/knowledge_retrieval-py39/lib/python3.9/site-packages/pyspark/pandas/__init__.py:50: UserWarning: 'PYARROW_IGNORE_TIMEZONE' environment variable was not set. It is required to set this environment variable to '1' in both driver and executor sides if you use pyarrow>=2.0.0. pandas-on-Spark will set it for you but it does not work if there is a Spark context already launched.\n",
" warnings.warn(\n"
]
}
],
"execution_count": 1,
"source": "%load_ext hamilton.plugins.jupyter_magic",
"id": "initial_id"
},
{
"cell_type": "code",
"execution_count": 2,
"id": "efd6c1b2417bb9cf",
"metadata": {
"ExecuteTime": {
"end_time": "2024-07-22T18:12:54.548060Z",
"start_time": "2024-07-22T18:12:53.934926Z"
}
},
"outputs": [
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 10.0.1 (20240210.2158)\n",
" -->\n",
"<!-- Pages: 1 -->\n",
"<svg width=\"547pt\" height=\"303pt\"\n",
" viewBox=\"0.00 0.00 546.65 302.73\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 298.73)\">\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-298.73 542.65,-298.73 542.65,4 -4,4\"/>\n",
"<g id=\"clust1\" class=\"cluster\">\n",
"<title>cluster__legend</title>\n",
"<polygon fill=\"#ffffff\" stroke=\"black\" points=\"8.5,-97.72 8.5,-286.73 116.6,-286.73 116.6,-97.72 8.5,-97.72\"/>\n",
"<text text-anchor=\"middle\" x=\"62.55\" y=\"-269.43\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n",
"</g>\n",
"<!-- raw_data -->\n",
"<g id=\"node1\" class=\"node\">\n",
"<title>raw_data</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M231.95,-75.52C231.95,-75.52 166.1,-75.52 166.1,-75.52 160.1,-75.52 154.1,-69.52 154.1,-63.52 154.1,-63.52 154.1,-23.92 154.1,-23.92 154.1,-17.92 160.1,-11.93 166.1,-11.93 166.1,-11.93 231.95,-11.93 231.95,-11.93 237.95,-11.93 243.95,-17.93 243.95,-23.93 243.95,-23.93 243.95,-63.53 243.95,-63.53 243.95,-69.52 237.95,-75.52 231.95,-75.52\"/>\n",
"<text text-anchor=\"start\" x=\"169.4\" y=\"-52.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">raw_data</text>\n",
"<text text-anchor=\"start\" x=\"164.9\" y=\"-24.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n",
"</g>\n",
"<!-- transformed_data -->\n",
"<g id=\"node2\" class=\"node\">\n",
"<title>transformed_data</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M397.3,-75.52C397.3,-75.52 284.95,-75.52 284.95,-75.52 278.95,-75.52 272.95,-69.52 272.95,-63.52 272.95,-63.52 272.95,-23.92 272.95,-23.92 272.95,-17.92 278.95,-11.93 284.95,-11.93 284.95,-11.93 397.3,-11.93 397.3,-11.93 403.3,-11.93 409.3,-17.93 409.3,-23.93 409.3,-23.93 409.3,-63.53 409.3,-63.53 409.3,-69.52 403.3,-75.52 397.3,-75.52\"/>\n",
"<text text-anchor=\"start\" x=\"283.75\" y=\"-52.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">transformed_data</text>\n",
"<text text-anchor=\"start\" x=\"307\" y=\"-24.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n",
"</g>\n",
"<!-- raw_data&#45;&gt;transformed_data -->\n",
"<g id=\"edge2\" class=\"edge\">\n",
"<title>raw_data&#45;&gt;transformed_data</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M244.35,-43.72C249.82,-43.72 255.54,-43.72 261.33,-43.72\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"261.02,-47.23 271.02,-43.73 261.02,-40.23 261.02,-47.23\"/>\n",
"</g>\n",
"<!-- saved_data -->\n",
"<g id=\"node3\" class=\"node\">\n",
"<title>saved_data</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M538.65,-115.5C538.65,-119.89 516.16,-123.45 488.47,-123.45 460.79,-123.45 438.3,-119.89 438.3,-115.5 438.3,-115.5 438.3,-43.95 438.3,-43.95 438.3,-39.56 460.79,-36 488.47,-36 516.16,-36 538.65,-39.56 538.65,-43.95 538.65,-43.95 538.65,-115.5 538.65,-115.5\"/>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M538.65,-115.5C538.65,-111.11 516.16,-107.55 488.47,-107.55 460.79,-107.55 438.3,-111.11 438.3,-115.5\"/>\n",
"<text text-anchor=\"start\" x=\"451.72\" y=\"-88.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">saved_data</text>\n",
"<text text-anchor=\"start\" x=\"449.1\" y=\"-60.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">saved_data()</text>\n",
"</g>\n",
"<!-- transformed_data&#45;&gt;saved_data -->\n",
"<g id=\"edge3\" class=\"edge\">\n",
"<title>transformed_data&#45;&gt;saved_data</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M409.69,-60.46C415.48,-61.89 421.3,-63.34 427.02,-64.75\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"425.84,-68.07 436.39,-67.07 427.53,-61.27 425.84,-68.07\"/>\n",
"</g>\n",
"<!-- raw_data.loader -->\n",
"<g id=\"node4\" class=\"node\">\n",
"<title>raw_data.loader</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M125.1,-79.5C125.1,-83.89 97.06,-87.45 62.55,-87.45 28.04,-87.45 0,-83.89 0,-79.5 0,-79.5 0,-7.95 0,-7.95 0,-3.56 28.04,0 62.55,0 97.06,0 125.1,-3.56 125.1,-7.95 125.1,-7.95 125.1,-79.5 125.1,-79.5\"/>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M125.1,-79.5C125.1,-75.11 97.06,-71.55 62.55,-71.55 28.04,-71.55 0,-75.11 0,-79.5\"/>\n",
"<text text-anchor=\"start\" x=\"10.8\" y=\"-52.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">raw_data.loader</text>\n",
"<text text-anchor=\"start\" x=\"30.3\" y=\"-24.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">raw_data()</text>\n",
"</g>\n",
"<!-- raw_data.loader&#45;&gt;raw_data -->\n",
"<g id=\"edge1\" class=\"edge\">\n",
"<title>raw_data.loader&#45;&gt;raw_data</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M125.28,-43.72C131.02,-43.72 136.81,-43.72 142.49,-43.72\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"142.42,-47.23 152.42,-43.73 142.42,-40.23 142.42,-47.23\"/>\n",
"</g>\n",
"<!-- _saved_data_inputs -->\n",
"<g id=\"node5\" class=\"node\">\n",
"<title>_saved_data_inputs</title>\n",
"<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"388.18,-138.02 294.07,-138.02 294.07,-93.42 388.18,-93.42 388.18,-138.02\"/>\n",
"<text text-anchor=\"start\" x=\"308.88\" y=\"-109.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">filepath</text>\n",
"<text text-anchor=\"start\" x=\"358.38\" y=\"-109.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n",
"</g>\n",
"<!-- _saved_data_inputs&#45;&gt;saved_data -->\n",
"<g id=\"edge4\" class=\"edge\">\n",
"<title>_saved_data_inputs&#45;&gt;saved_data</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M388.51,-104.24C400.72,-101.21 414.08,-97.9 426.91,-94.73\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"427.68,-98.14 436.55,-92.34 426,-91.35 427.68,-98.14\"/>\n",
"</g>\n",
"<!-- input -->\n",
"<g id=\"node6\" class=\"node\">\n",
"<title>input</title>\n",
"<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"89.55,-142.02 35.55,-142.02 35.55,-105.42 89.55,-105.42 89.55,-142.02\"/>\n",
"<text text-anchor=\"middle\" x=\"62.55\" y=\"-117.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n",
"</g>\n",
"<!-- function -->\n",
"<g id=\"node7\" class=\"node\">\n",
"<title>function</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M84.97,-197.02C84.97,-197.02 40.12,-197.02 40.12,-197.02 34.12,-197.02 28.12,-191.02 28.12,-185.02 28.12,-185.02 28.12,-172.43 28.12,-172.43 28.12,-166.43 34.12,-160.43 40.12,-160.43 40.12,-160.43 84.97,-160.43 84.97,-160.43 90.97,-160.43 96.97,-166.43 96.97,-172.43 96.97,-172.43 96.97,-185.02 96.97,-185.02 96.97,-191.02 90.97,-197.02 84.97,-197.02\"/>\n",
"<text text-anchor=\"middle\" x=\"62.55\" y=\"-172.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n",
"</g>\n",
"<!-- materializer -->\n",
"<g id=\"node8\" class=\"node\">\n",
"<title>materializer</title>\n",
"<path fill=\"#ffffff\" stroke=\"black\" d=\"M108.6,-252.26C108.6,-254.29 87.96,-255.94 62.55,-255.94 37.14,-255.94 16.5,-254.29 16.5,-252.26 16.5,-252.26 16.5,-219.19 16.5,-219.19 16.5,-217.16 37.14,-215.51 62.55,-215.51 87.96,-215.51 108.6,-217.16 108.6,-219.19 108.6,-219.19 108.6,-252.26 108.6,-252.26\"/>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M108.6,-252.26C108.6,-250.23 87.96,-248.59 62.55,-248.59 37.14,-248.59 16.5,-250.23 16.5,-252.26\"/>\n",
"<text text-anchor=\"middle\" x=\"62.55\" y=\"-229.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">materializer</text>\n",
"</g>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.graphs.Digraph at 0x1536c0550>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%cell_to_module simple_etl --display\n",
"import pandas as pd\n",
"from sklearn import datasets\n",
"from hamilton.function_modifiers import dataloader, datasaver\n",
"from hamilton.io import utils as io_utils\n",
"\n",
"\n",
"@dataloader()\n",
"def raw_data() -> tuple[pd.DataFrame, dict]:\n",
" data = datasets.load_digits()\n",
" df = pd.DataFrame(data.data, columns=[f\"feature_{i}\" for i in range(data.data.shape[1])])\n",
" metadata = io_utils.get_dataframe_metadata(df)\n",
" return df, metadata\n",
"\n",
"\n",
"def transformed_data(raw_data: pd.DataFrame) -> pd.DataFrame:\n",
" return raw_data\n",
"\n",
"\n",
"@datasaver()\n",
"def saved_data(transformed_data: pd.DataFrame, filepath: str) -> dict:\n",
" transformed_data.to_csv(filepath)\n",
" metadata = io_utils.get_file_and_dataframe_metadata(filepath, transformed_data)\n",
" return metadata\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e9252f2a09228330",
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-25T00:00:37.889540Z",
"start_time": "2024-06-25T00:00:35.994131Z"
}
},
"outputs": [
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 10.0.1 (20240210.2158)\n",
" -->\n",
"<!-- Pages: 1 -->\n",
"<svg width=\"547pt\" height=\"303pt\"\n",
" viewBox=\"0.00 0.00 546.65 302.73\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 298.73)\">\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-298.73 542.65,-298.73 542.65,4 -4,4\"/>\n",
"<g id=\"clust1\" class=\"cluster\">\n",
"<title>cluster__legend</title>\n",
"<polygon fill=\"#ffffff\" stroke=\"black\" points=\"8.5,-97.72 8.5,-286.73 116.6,-286.73 116.6,-97.72 8.5,-97.72\"/>\n",
"<text text-anchor=\"middle\" x=\"62.55\" y=\"-269.43\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n",
"</g>\n",
"<!-- raw_data -->\n",
"<g id=\"node1\" class=\"node\">\n",
"<title>raw_data</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M231.95,-75.52C231.95,-75.52 166.1,-75.52 166.1,-75.52 160.1,-75.52 154.1,-69.52 154.1,-63.52 154.1,-63.52 154.1,-23.92 154.1,-23.92 154.1,-17.92 160.1,-11.93 166.1,-11.93 166.1,-11.93 231.95,-11.93 231.95,-11.93 237.95,-11.93 243.95,-17.93 243.95,-23.93 243.95,-23.93 243.95,-63.53 243.95,-63.53 243.95,-69.52 237.95,-75.52 231.95,-75.52\"/>\n",
"<text text-anchor=\"start\" x=\"169.4\" y=\"-52.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">raw_data</text>\n",
"<text text-anchor=\"start\" x=\"164.9\" y=\"-24.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n",
"</g>\n",
"<!-- transformed_data -->\n",
"<g id=\"node2\" class=\"node\">\n",
"<title>transformed_data</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M397.3,-75.52C397.3,-75.52 284.95,-75.52 284.95,-75.52 278.95,-75.52 272.95,-69.52 272.95,-63.52 272.95,-63.52 272.95,-23.92 272.95,-23.92 272.95,-17.92 278.95,-11.93 284.95,-11.93 284.95,-11.93 397.3,-11.93 397.3,-11.93 403.3,-11.93 409.3,-17.93 409.3,-23.93 409.3,-23.93 409.3,-63.53 409.3,-63.53 409.3,-69.52 403.3,-75.52 397.3,-75.52\"/>\n",
"<text text-anchor=\"start\" x=\"283.75\" y=\"-52.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">transformed_data</text>\n",
"<text text-anchor=\"start\" x=\"307\" y=\"-24.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n",
"</g>\n",
"<!-- raw_data&#45;&gt;transformed_data -->\n",
"<g id=\"edge2\" class=\"edge\">\n",
"<title>raw_data&#45;&gt;transformed_data</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M244.35,-43.72C249.82,-43.72 255.54,-43.72 261.33,-43.72\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"261.02,-47.23 271.02,-43.73 261.02,-40.23 261.02,-47.23\"/>\n",
"</g>\n",
"<!-- saved_data -->\n",
"<g id=\"node3\" class=\"node\">\n",
"<title>saved_data</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M538.65,-115.5C538.65,-119.89 516.16,-123.45 488.47,-123.45 460.79,-123.45 438.3,-119.89 438.3,-115.5 438.3,-115.5 438.3,-43.95 438.3,-43.95 438.3,-39.56 460.79,-36 488.47,-36 516.16,-36 538.65,-39.56 538.65,-43.95 538.65,-43.95 538.65,-115.5 538.65,-115.5\"/>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M538.65,-115.5C538.65,-111.11 516.16,-107.55 488.47,-107.55 460.79,-107.55 438.3,-111.11 438.3,-115.5\"/>\n",
"<text text-anchor=\"start\" x=\"451.72\" y=\"-88.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">saved_data</text>\n",
"<text text-anchor=\"start\" x=\"449.1\" y=\"-60.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">saved_data()</text>\n",
"</g>\n",
"<!-- transformed_data&#45;&gt;saved_data -->\n",
"<g id=\"edge3\" class=\"edge\">\n",
"<title>transformed_data&#45;&gt;saved_data</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M409.69,-60.46C415.48,-61.89 421.3,-63.34 427.02,-64.75\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"425.84,-68.07 436.39,-67.07 427.53,-61.27 425.84,-68.07\"/>\n",
"</g>\n",
"<!-- raw_data.loader -->\n",
"<g id=\"node4\" class=\"node\">\n",
"<title>raw_data.loader</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M125.1,-79.5C125.1,-83.89 97.06,-87.45 62.55,-87.45 28.04,-87.45 0,-83.89 0,-79.5 0,-79.5 0,-7.95 0,-7.95 0,-3.56 28.04,0 62.55,0 97.06,0 125.1,-3.56 125.1,-7.95 125.1,-7.95 125.1,-79.5 125.1,-79.5\"/>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M125.1,-79.5C125.1,-75.11 97.06,-71.55 62.55,-71.55 28.04,-71.55 0,-75.11 0,-79.5\"/>\n",
"<text text-anchor=\"start\" x=\"10.8\" y=\"-52.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">raw_data.loader</text>\n",
"<text text-anchor=\"start\" x=\"30.3\" y=\"-24.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">raw_data()</text>\n",
"</g>\n",
"<!-- raw_data.loader&#45;&gt;raw_data -->\n",
"<g id=\"edge1\" class=\"edge\">\n",
"<title>raw_data.loader&#45;&gt;raw_data</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M125.28,-43.72C131.02,-43.72 136.81,-43.72 142.49,-43.72\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"142.42,-47.23 152.42,-43.73 142.42,-40.23 142.42,-47.23\"/>\n",
"</g>\n",
"<!-- _saved_data_inputs -->\n",
"<g id=\"node5\" class=\"node\">\n",
"<title>_saved_data_inputs</title>\n",
"<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"388.18,-138.02 294.07,-138.02 294.07,-93.42 388.18,-93.42 388.18,-138.02\"/>\n",
"<text text-anchor=\"start\" x=\"308.88\" y=\"-109.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">filepath</text>\n",
"<text text-anchor=\"start\" x=\"358.38\" y=\"-109.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n",
"</g>\n",
"<!-- _saved_data_inputs&#45;&gt;saved_data -->\n",
"<g id=\"edge4\" class=\"edge\">\n",
"<title>_saved_data_inputs&#45;&gt;saved_data</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M388.51,-104.24C400.72,-101.21 414.08,-97.9 426.91,-94.73\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"427.68,-98.14 436.55,-92.34 426,-91.35 427.68,-98.14\"/>\n",
"</g>\n",
"<!-- input -->\n",
"<g id=\"node6\" class=\"node\">\n",
"<title>input</title>\n",
"<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"89.55,-142.02 35.55,-142.02 35.55,-105.42 89.55,-105.42 89.55,-142.02\"/>\n",
"<text text-anchor=\"middle\" x=\"62.55\" y=\"-117.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n",
"</g>\n",
"<!-- function -->\n",
"<g id=\"node7\" class=\"node\">\n",
"<title>function</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M84.97,-197.02C84.97,-197.02 40.12,-197.02 40.12,-197.02 34.12,-197.02 28.12,-191.02 28.12,-185.02 28.12,-185.02 28.12,-172.43 28.12,-172.43 28.12,-166.43 34.12,-160.43 40.12,-160.43 40.12,-160.43 84.97,-160.43 84.97,-160.43 90.97,-160.43 96.97,-166.43 96.97,-172.43 96.97,-172.43 96.97,-185.02 96.97,-185.02 96.97,-191.02 90.97,-197.02 84.97,-197.02\"/>\n",
"<text text-anchor=\"middle\" x=\"62.55\" y=\"-172.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n",
"</g>\n",
"<!-- materializer -->\n",
"<g id=\"node8\" class=\"node\">\n",
"<title>materializer</title>\n",
"<path fill=\"#ffffff\" stroke=\"black\" d=\"M108.6,-252.26C108.6,-254.29 87.96,-255.94 62.55,-255.94 37.14,-255.94 16.5,-254.29 16.5,-252.26 16.5,-252.26 16.5,-219.19 16.5,-219.19 16.5,-217.16 37.14,-215.51 62.55,-215.51 87.96,-215.51 108.6,-217.16 108.6,-219.19 108.6,-219.19 108.6,-252.26 108.6,-252.26\"/>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M108.6,-252.26C108.6,-250.23 87.96,-248.59 62.55,-248.59 37.14,-248.59 16.5,-250.23 16.5,-252.26\"/>\n",
"<text text-anchor=\"middle\" x=\"62.55\" y=\"-229.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">materializer</text>\n",
"</g>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.graphs.Digraph at 0x153c29040>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from hamilton_sdk import adapters\n",
"\n",
"from hamilton import driver\n",
"\n",
"tracker = adapters.HamiltonTracker(\n",
" project_id=7, # modify this as needed\n",
" username=\"elijah@dagworks.io\", # modify this as needed\n",
" dag_name=\"my_version_of_the_dag\",\n",
" tags={\"environment\": \"DEV\", \"team\": \"MY_TEAM\", \"version\": \"X\"},\n",
")\n",
"dr = driver.Builder().with_config({}).with_modules(simple_etl).with_adapters(tracker).build()\n",
"dr.display_all_functions()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "86c0d0f7da9a472b",
"metadata": {
"ExecuteTime": {
"end_time": "2024-06-25T00:00:53.746596Z",
"start_time": "2024-06-25T00:00:52.320439Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Capturing execution run. Results can be found at http://localhost:8241/dashboard/project/7/runs/84\n",
"\n",
"\n",
"Captured execution run. Results can be found at http://localhost:8241/dashboard/project/7/runs/84\n",
"\n"
]
},
{
"data": {
"text/plain": [
"{'saved_data': {'file_metadata': {'size': 499704,\n",
" 'path': 'data.csv',\n",
" 'last_modified': 1721672006.6388159,\n",
" 'timestamp': 1721697206.638992,\n",
" 'scheme': '',\n",
" 'notes': ''},\n",
" 'dataframe_metadata': {'rows': 1797,\n",
" 'columns': 64,\n",
" 'column_names': ['feature_0',\n",
" 'feature_1',\n",
" 'feature_2',\n",
" 'feature_3',\n",
" 'feature_4',\n",
" 'feature_5',\n",
" 'feature_6',\n",
" 'feature_7',\n",
" 'feature_8',\n",
" 'feature_9',\n",
" 'feature_10',\n",
" 'feature_11',\n",
" 'feature_12',\n",
" 'feature_13',\n",
" 'feature_14',\n",
" 'feature_15',\n",
" 'feature_16',\n",
" 'feature_17',\n",
" 'feature_18',\n",
" 'feature_19',\n",
" 'feature_20',\n",
" 'feature_21',\n",
" 'feature_22',\n",
" 'feature_23',\n",
" 'feature_24',\n",
" 'feature_25',\n",
" 'feature_26',\n",
" 'feature_27',\n",
" 'feature_28',\n",
" 'feature_29',\n",
" 'feature_30',\n",
" 'feature_31',\n",
" 'feature_32',\n",
" 'feature_33',\n",
" 'feature_34',\n",
" 'feature_35',\n",
" 'feature_36',\n",
" 'feature_37',\n",
" 'feature_38',\n",
" 'feature_39',\n",
" 'feature_40',\n",
" 'feature_41',\n",
" 'feature_42',\n",
" 'feature_43',\n",
" 'feature_44',\n",
" 'feature_45',\n",
" 'feature_46',\n",
" 'feature_47',\n",
" 'feature_48',\n",
" 'feature_49',\n",
" 'feature_50',\n",
" 'feature_51',\n",
" 'feature_52',\n",
" 'feature_53',\n",
" 'feature_54',\n",
" 'feature_55',\n",
" 'feature_56',\n",
" 'feature_57',\n",
" 'feature_58',\n",
" 'feature_59',\n",
" 'feature_60',\n",
" 'feature_61',\n",
" 'feature_62',\n",
" 'feature_63'],\n",
" 'datatypes': ['float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64',\n",
" 'float64']}}}"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dr.execute([\"saved_data\"], inputs={\"filepath\": \"data.csv\"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e108601ca3a88aab",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}