blob: 1bae32fe46c065a3e49765ee3b7e09eefb71704c [file] [log] [blame]
{
"cells": [
{
"cell_type": "markdown",
"id": "7fb27b941602401d91542211134fc71a",
"metadata": {},
"source": [
"Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements. See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership. The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License. You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied. See the License for the\nspecific language governing permissions and limitations\nunder the License."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6b12abc0bf96a1fa",
"metadata": {},
"outputs": [],
"source": [
"%pip install sf-hamilton[visualization]"
]
},
{
"cell_type": "markdown",
"id": "5fdf2bac7ddc6f79",
"metadata": {},
"source": [
"# Modular Pipeline Example [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/model_examples/modular_example/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/model_examples/modular_example/notebook.ipynb)\n",
"This uses the jupyter magic commands to create a simple example of how to reuse pipelines in a modular manner with subdag. "
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "initial_id",
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-07T06:57:19.359572Z",
"start_time": "2024-12-07T06:57:13.119759Z"
},
"collapsed": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/stefankrawczyk/.pyenv/versions/knowledge_retrieval-py39/lib/python3.9/site-packages/pyspark/pandas/__init__.py:50: UserWarning: 'PYARROW_IGNORE_TIMEZONE' environment variable was not set. It is required to set this environment variable to '1' in both driver and executor sides if you use pyarrow>=2.0.0. pandas-on-Spark will set it for you but it does not work if there is a Spark context already launched.\n",
" warnings.warn(\n"
]
}
],
"source": [
"%load_ext hamilton.plugins.jupyter_magic"
]
},
{
"cell_type": "markdown",
"id": "29ebd0ec7fc5b800",
"metadata": {},
"source": [
"# Define features module\n",
"\n",
"This is the common data preprocessing step."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "7fafbffaf2f6f68a",
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-07T06:57:19.627950Z",
"start_time": "2024-12-07T06:57:19.368576Z"
}
},
"outputs": [
{
"data": {
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 12.0.0 (20240704.0754)\n -->\n<!-- Pages: 1 -->\n<svg width=\"381pt\" height=\"211pt\"\n viewBox=\"0.00 0.00 381.05 210.80\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 206.8)\">\n<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-206.8 377.05,-206.8 377.05,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster__legend</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" points=\"8,-64.8 8,-194.8 92.85,-194.8 92.85,-64.8 8,-64.8\"/>\n<text text-anchor=\"middle\" x=\"50.43\" y=\"-177.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n</g>\n<!-- transformed_data -->\n<g id=\"node1\" class=\"node\">\n<title>transformed_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M361.05,-63.6C361.05,-63.6 248.7,-63.6 248.7,-63.6 242.7,-63.6 236.7,-57.6 236.7,-51.6 236.7,-51.6 236.7,-12 236.7,-12 236.7,-6 242.7,0 248.7,0 248.7,0 361.05,0 361.05,0 367.05,0 373.05,-6 373.05,-12 373.05,-12 373.05,-51.6 373.05,-51.6 373.05,-57.6 367.05,-63.6 361.05,-63.6\"/>\n<text text-anchor=\"start\" x=\"247.5\" y=\"-40.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">transformed_data</text>\n<text text-anchor=\"start\" x=\"270.75\" y=\"-12.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- raw_data -->\n<g id=\"node2\" class=\"node\">\n<title>raw_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M195.7,-63.6C195.7,-63.6 129.85,-63.6 129.85,-63.6 123.85,-63.6 117.85,-57.6 117.85,-51.6 117.85,-51.6 117.85,-12 117.85,-12 117.85,-6 123.85,0 129.85,0 129.85,0 195.7,0 195.7,0 201.7,0 207.7,-6 207.7,-12 207.7,-12 207.7,-51.6 207.7,-51.6 207.7,-57.6 201.7,-63.6 195.7,-63.6\"/>\n<text text-anchor=\"start\" x=\"133.15\" y=\"-40.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">raw_data</text>\n<text text-anchor=\"start\" x=\"128.65\" y=\"-12.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- raw_data&#45;&gt;transformed_data -->\n<g id=\"edge1\" class=\"edge\">\n<title>raw_data&#45;&gt;transformed_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M208.1,-31.8C213.57,-31.8 219.29,-31.8 225.08,-31.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"224.77,-35.3 234.77,-31.8 224.77,-28.3 224.77,-35.3\"/>\n</g>\n<!-- _raw_data_inputs -->\n<g id=\"node3\" class=\"node\">\n<title>_raw_data_inputs</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"88.85,-54.1 12,-54.1 12,-9.5 88.85,-9.5 88.85,-54.1\"/>\n<text text-anchor=\"start\" x=\"26.8\" y=\"-26\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">path</text>\n<text text-anchor=\"start\" x=\"59.05\" y=\"-26\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n</g>\n<!-- _raw_data_inputs&#45;&gt;raw_data -->\n<g id=\"edge2\" class=\"edge\">\n<title>_raw_data_inputs&#45;&gt;raw_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M89.09,-31.8C94.65,-31.8 100.47,-31.8 106.29,-31.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"105.91,-35.3 115.91,-31.8 105.91,-28.3 105.91,-35.3\"/>\n</g>\n<!-- input -->\n<g id=\"node4\" class=\"node\">\n<title>input</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"77.43,-109.1 23.43,-109.1 23.43,-72.5 77.43,-72.5 77.43,-109.1\"/>\n<text text-anchor=\"middle\" x=\"50.43\" y=\"-85\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n</g>\n<!-- function -->\n<g id=\"node5\" class=\"node\">\n<title>function</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M72.85,-164.1C72.85,-164.1 28,-164.1 28,-164.1 22,-164.1 16,-158.1 16,-152.1 16,-152.1 16,-139.5 16,-139.5 16,-133.5 22,-127.5 28,-127.5 28,-127.5 72.85,-127.5 72.85,-127.5 78.85,-127.5 84.85,-133.5 84.85,-139.5 84.85,-139.5 84.85,-152.1 84.85,-152.1 84.85,-158.1 78.85,-164.1 72.85,-164.1\"/>\n<text text-anchor=\"middle\" x=\"50.43\" y=\"-140\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n</g>\n</g>\n</svg>\n",
"text/plain": [
"<graphviz.graphs.Digraph at 0x14d580e20>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%cell_to_module features --display\n",
"\n",
"import pandas as pd\n",
"\n",
"def raw_data(path: str) -> pd.DataFrame:\n",
" return pd.read_csv(path)\n",
"\n",
"def transformed_data(raw_data: pd.DataFrame) -> pd.DataFrame:\n",
" return raw_data.dropna()"
]
},
{
"cell_type": "markdown",
"id": "ee170ce894848eae",
"metadata": {},
"source": [
"# Define train module\n",
"\n",
"This is the training bit of the dataflow."
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "eae523c3fba37c93",
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-07T06:57:19.971271Z",
"start_time": "2024-12-07T06:57:19.724804Z"
}
},
"outputs": [
{
"data": {
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 12.0.0 (20240704.0754)\n -->\n<!-- Pages: 1 -->\n<svg width=\"508pt\" height=\"322pt\"\n viewBox=\"0.00 0.00 508.30 322.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 318)\">\n<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-318 504.3,-318 504.3,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster__legend</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" points=\"31.25,-122 31.25,-306 116.1,-306 116.1,-122 31.25,-122\"/>\n<text text-anchor=\"middle\" x=\"73.68\" y=\"-288.7\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n</g>\n<!-- model -->\n<g id=\"node1\" class=\"node\">\n<title>model</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" points=\"120.68,-50 20.68,-50 20.68,0 126.68,0 126.68,-44 120.68,-50\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"120.68,-50 120.68,-44\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"126.68,-44 120.68,-44\"/>\n<text text-anchor=\"start\" x=\"53.43\" y=\"-33.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">model</text>\n<text text-anchor=\"start\" x=\"28.68\" y=\"-5.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">RandomForest</text>\n</g>\n<!-- base_model -->\n<g id=\"node2\" class=\"node\">\n<title>base_model</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M344.07,-121.8C344.07,-121.8 218.97,-121.8 218.97,-121.8 212.97,-121.8 206.97,-115.8 206.97,-109.8 206.97,-109.8 206.97,-70.2 206.97,-70.2 206.97,-64.2 212.97,-58.2 218.97,-58.2 218.97,-58.2 344.07,-58.2 344.07,-58.2 350.07,-58.2 356.07,-64.2 356.07,-70.2 356.07,-70.2 356.07,-109.8 356.07,-109.8 356.07,-115.8 350.07,-121.8 344.07,-121.8\"/>\n<text text-anchor=\"start\" x=\"217.77\" y=\"-98.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">base_model: model</text>\n<text text-anchor=\"start\" x=\"250.02\" y=\"-70.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">typing.Any</text>\n</g>\n<!-- fit_model -->\n<g id=\"node3\" class=\"node\">\n<title>fit_model</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M488.3,-157.8C488.3,-157.8 427.7,-157.8 427.7,-157.8 421.7,-157.8 415.7,-151.8 415.7,-145.8 415.7,-145.8 415.7,-106.2 415.7,-106.2 415.7,-100.2 421.7,-94.2 427.7,-94.2 427.7,-94.2 488.3,-94.2 488.3,-94.2 494.3,-94.2 500.3,-100.2 500.3,-106.2 500.3,-106.2 500.3,-145.8 500.3,-145.8 500.3,-151.8 494.3,-157.8 488.3,-157.8\"/>\n<text text-anchor=\"start\" x=\"427.62\" y=\"-134.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">fit_model</text>\n<text text-anchor=\"start\" x=\"426.5\" y=\"-106.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">typing.Any</text>\n</g>\n<!-- base_model&#45;&gt;fit_model -->\n<g id=\"edge2\" class=\"edge\">\n<title>base_model&#45;&gt;fit_model</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.52,-105.27C372.65,-108.6 389.41,-112.05 404.5,-115.17\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"403.7,-118.58 414.2,-117.17 405.11,-111.72 403.7,-118.58\"/>\n</g>\n<!-- _base_model_inputs -->\n<g id=\"node4\" class=\"node\">\n<title>_base_model_inputs</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"147.35,-112.3 0,-112.3 0,-67.7 147.35,-67.7 147.35,-112.3\"/>\n<text text-anchor=\"start\" x=\"14.8\" y=\"-84.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">model_params</text>\n<text text-anchor=\"start\" x=\"111.55\" y=\"-84.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">dict</text>\n</g>\n<!-- _base_model_inputs&#45;&gt;base_model -->\n<g id=\"edge1\" class=\"edge\">\n<title>_base_model_inputs&#45;&gt;base_model</title>\n<path fill=\"none\" stroke=\"black\" d=\"M147.83,-90C163.17,-90 179.48,-90 195.22,-90\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"195.05,-93.5 205.05,-90 195.05,-86.5 195.05,-93.5\"/>\n</g>\n<!-- _fit_model_inputs -->\n<g id=\"node5\" class=\"node\">\n<title>_fit_model_inputs</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"386.7,-184.3 176.35,-184.3 176.35,-139.7 386.7,-139.7 386.7,-184.3\"/>\n<text text-anchor=\"start\" x=\"191.15\" y=\"-156.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">transformed_data</text>\n<text text-anchor=\"start\" x=\"303.65\" y=\"-156.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- _fit_model_inputs&#45;&gt;fit_model -->\n<g id=\"edge3\" class=\"edge\">\n<title>_fit_model_inputs&#45;&gt;fit_model</title>\n<path fill=\"none\" stroke=\"black\" d=\"M387.04,-140.44C392.99,-139.21 398.82,-138 404.43,-136.85\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"405.01,-140.3 414.1,-134.85 403.6,-133.45 405.01,-140.3\"/>\n</g>\n<!-- config -->\n<g id=\"node6\" class=\"node\">\n<title>config</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" points=\"94.68,-166 46.68,-166 46.68,-130 100.68,-130 100.68,-160 94.68,-166\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"94.68,-166 94.68,-160\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"100.68,-160 94.68,-160\"/>\n<text text-anchor=\"middle\" x=\"73.68\" y=\"-142.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">config</text>\n</g>\n<!-- input -->\n<g id=\"node7\" class=\"node\">\n<title>input</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"100.68,-220.3 46.68,-220.3 46.68,-183.7 100.68,-183.7 100.68,-220.3\"/>\n<text text-anchor=\"middle\" x=\"73.68\" y=\"-196.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n</g>\n<!-- function -->\n<g id=\"node8\" class=\"node\">\n<title>function</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M96.1,-275.3C96.1,-275.3 51.25,-275.3 51.25,-275.3 45.25,-275.3 39.25,-269.3 39.25,-263.3 39.25,-263.3 39.25,-250.7 39.25,-250.7 39.25,-244.7 45.25,-238.7 51.25,-238.7 51.25,-238.7 96.1,-238.7 96.1,-238.7 102.1,-238.7 108.1,-244.7 108.1,-250.7 108.1,-250.7 108.1,-263.3 108.1,-263.3 108.1,-269.3 102.1,-275.3 96.1,-275.3\"/>\n<text text-anchor=\"middle\" x=\"73.68\" y=\"-251.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n</g>\n</g>\n</svg>\n",
"text/plain": [
"<graphviz.graphs.Digraph at 0x14d5a8190>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%cell_to_module train --config '{\"model\":\"RandomForest\"}'--display\n",
"\n",
"from typing import Any\n",
"import pandas as pd\n",
"\n",
"from hamilton.function_modifiers import config\n",
"\n",
"@config.when(model=\"RandomForest\")\n",
"def base_model__rf(model_params: dict) -> Any:\n",
" from sklearn.ensemble import RandomForestClassifier\n",
" return RandomForestClassifier(**model_params)\n",
"\n",
"@config.when(model=\"LogisticRegression\")\n",
"def base_model__lr(model_params: dict) -> Any:\n",
" from sklearn.linear_model import LogisticRegression\n",
" return LogisticRegression(**model_params)\n",
"\n",
"@config.when(model=\"XGBoost\")\n",
"def base_model__xgb(model_params: dict) -> Any:\n",
" from xgboost import XGBClassifier\n",
" return XGBClassifier(**model_params)\n",
"\n",
"\n",
"def fit_model(transformed_data: pd.DataFrame, base_model: Any) -> Any:\n",
" \"\"\"Fit a model to transformed data.\"\"\"\n",
" base_model.fit(transformed_data.drop(\"target\", axis=1), transformed_data[\"target\"])\n",
" return base_model\n"
]
},
{
"cell_type": "markdown",
"id": "8cae5e1a9c682ea5",
"metadata": {},
"source": [
"# Define the inference module\n",
"\n",
"This houses what we need for inference."
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "2ad9e61062f6516a",
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-07T06:57:20.363768Z",
"start_time": "2024-12-07T06:57:20.114344Z"
}
},
"outputs": [
{
"data": {
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 12.0.0 (20240704.0754)\n -->\n<!-- Pages: 1 -->\n<svg width=\"364pt\" height=\"222pt\"\n viewBox=\"0.00 0.00 364.20 221.80\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 217.8)\">\n<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-217.8 360.2,-217.8 360.2,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster__legend</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" points=\"62.38,-75.8 62.38,-205.8 147.23,-205.8 147.23,-75.8 62.38,-75.8\"/>\n<text text-anchor=\"middle\" x=\"104.8\" y=\"-188.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n</g>\n<!-- predicted_data -->\n<g id=\"node1\" class=\"node\">\n<title>predicted_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M344.2,-64.6C344.2,-64.6 250.6,-64.6 250.6,-64.6 244.6,-64.6 238.6,-58.6 238.6,-52.6 238.6,-52.6 238.6,-13 238.6,-13 238.6,-7 244.6,-1 250.6,-1 250.6,-1 344.2,-1 344.2,-1 350.2,-1 356.2,-7 356.2,-13 356.2,-13 356.2,-52.6 356.2,-52.6 356.2,-58.6 350.2,-64.6 344.2,-64.6\"/>\n<text text-anchor=\"start\" x=\"249.4\" y=\"-41.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">predicted_data</text>\n<text text-anchor=\"start\" x=\"263.28\" y=\"-13.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- _predicted_data_inputs -->\n<g id=\"node2\" class=\"node\">\n<title>_predicted_data_inputs</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"209.6,-65.6 0,-65.6 0,0 209.6,0 209.6,-65.6\"/>\n<text text-anchor=\"start\" x=\"14.55\" y=\"-37.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">transformed_data</text>\n<text text-anchor=\"start\" x=\"126.68\" y=\"-37.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">DataFrame</text>\n<text text-anchor=\"start\" x=\"40.05\" y=\"-16.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">fit_model</text>\n<text text-anchor=\"start\" x=\"129.3\" y=\"-16.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">typing.Any</text>\n</g>\n<!-- _predicted_data_inputs&#45;&gt;predicted_data -->\n<g id=\"edge1\" class=\"edge\">\n<title>_predicted_data_inputs&#45;&gt;predicted_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M210.02,-32.8C215.8,-32.8 221.54,-32.8 227.14,-32.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"226.92,-36.3 236.92,-32.8 226.92,-29.3 226.92,-36.3\"/>\n</g>\n<!-- input -->\n<g id=\"node3\" class=\"node\">\n<title>input</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"131.8,-120.1 77.8,-120.1 77.8,-83.5 131.8,-83.5 131.8,-120.1\"/>\n<text text-anchor=\"middle\" x=\"104.8\" y=\"-96\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n</g>\n<!-- function -->\n<g id=\"node4\" class=\"node\">\n<title>function</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M127.23,-175.1C127.23,-175.1 82.38,-175.1 82.38,-175.1 76.38,-175.1 70.38,-169.1 70.38,-163.1 70.38,-163.1 70.38,-150.5 70.38,-150.5 70.38,-144.5 76.38,-138.5 82.38,-138.5 82.38,-138.5 127.23,-138.5 127.23,-138.5 133.23,-138.5 139.23,-144.5 139.23,-150.5 139.23,-150.5 139.23,-163.1 139.23,-163.1 139.23,-169.1 133.23,-175.1 127.23,-175.1\"/>\n<text text-anchor=\"middle\" x=\"104.8\" y=\"-151\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n</g>\n</g>\n</svg>\n",
"text/plain": [
"<graphviz.graphs.Digraph at 0x14d647be0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%cell_to_module inference --display\n",
"from typing import Any\n",
"import pandas as pd\n",
"\n",
"\n",
"def predicted_data(transformed_data: pd.DataFrame, fit_model: Any) -> pd.DataFrame:\n",
" return fit_model.predict(transformed_data)\n",
"\n"
]
},
{
"cell_type": "markdown",
"id": "3a1a0d9aca3944b1",
"metadata": {},
"source": [
"# We can combine the modules independently with different drivers\n",
"\n",
"But this won't provide us with a single dataflow or DAG."
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "9ac29701bdd31fb5",
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-07T18:08:40.538779Z",
"start_time": "2024-12-07T18:08:39.642181Z"
}
},
"outputs": [
{
"data": {
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 12.0.0 (20240704.0754)\n -->\n<!-- Pages: 1 -->\n<svg width=\"762pt\" height=\"390pt\"\n viewBox=\"0.00 0.00 761.65 390.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 386)\">\n<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-386 757.65,-386 757.65,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster__legend</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" points=\"27.08,-190 27.08,-374 111.92,-374 111.92,-190 27.08,-190\"/>\n<text text-anchor=\"middle\" x=\"69.5\" y=\"-356.7\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n</g>\n<!-- model -->\n<g id=\"node1\" class=\"node\">\n<title>model</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" points=\"116.5,-50 16.5,-50 16.5,0 122.5,0 122.5,-44 116.5,-50\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"116.5,-50 116.5,-44\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"122.5,-44 116.5,-44\"/>\n<text text-anchor=\"start\" x=\"49.25\" y=\"-33.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">model</text>\n<text text-anchor=\"start\" x=\"24.5\" y=\"-5.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">RandomForest</text>\n</g>\n<!-- model_params -->\n<g id=\"node2\" class=\"node\">\n<title>model_params</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" points=\"133,-118 0,-118 0,-68 139,-68 139,-112 133,-118\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"133,-118 133,-112\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"139,-112 133,-112\"/>\n<text text-anchor=\"start\" x=\"21.12\" y=\"-101.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">model_params</text>\n<text text-anchor=\"start\" x=\"8\" y=\"-73.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">{&#39;n_estimators&#39;: 100}</text>\n</g>\n<!-- predicted_data -->\n<g id=\"node3\" class=\"node\">\n<title>predicted_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M741.65,-219.8C741.65,-219.8 648.05,-219.8 648.05,-219.8 642.05,-219.8 636.05,-213.8 636.05,-207.8 636.05,-207.8 636.05,-168.2 636.05,-168.2 636.05,-162.2 642.05,-156.2 648.05,-156.2 648.05,-156.2 741.65,-156.2 741.65,-156.2 747.65,-156.2 753.65,-162.2 753.65,-168.2 753.65,-168.2 753.65,-207.8 753.65,-207.8 753.65,-213.8 747.65,-219.8 741.65,-219.8\"/>\n<text text-anchor=\"start\" x=\"646.85\" y=\"-196.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">predicted_data</text>\n<text text-anchor=\"start\" x=\"660.73\" y=\"-168.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- base_model -->\n<g id=\"node4\" class=\"node\">\n<title>base_model</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M481.45,-271.8C481.45,-271.8 356.35,-271.8 356.35,-271.8 350.35,-271.8 344.35,-265.8 344.35,-259.8 344.35,-259.8 344.35,-220.2 344.35,-220.2 344.35,-214.2 350.35,-208.2 356.35,-208.2 356.35,-208.2 481.45,-208.2 481.45,-208.2 487.45,-208.2 493.45,-214.2 493.45,-220.2 493.45,-220.2 493.45,-259.8 493.45,-259.8 493.45,-265.8 487.45,-271.8 481.45,-271.8\"/>\n<text text-anchor=\"start\" x=\"355.15\" y=\"-248.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">base_model: model</text>\n<text text-anchor=\"start\" x=\"387.4\" y=\"-220.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">typing.Any</text>\n</g>\n<!-- fit_model -->\n<g id=\"node6\" class=\"node\">\n<title>fit_model</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M595.05,-249.8C595.05,-249.8 534.45,-249.8 534.45,-249.8 528.45,-249.8 522.45,-243.8 522.45,-237.8 522.45,-237.8 522.45,-198.2 522.45,-198.2 522.45,-192.2 528.45,-186.2 534.45,-186.2 534.45,-186.2 595.05,-186.2 595.05,-186.2 601.05,-186.2 607.05,-192.2 607.05,-198.2 607.05,-198.2 607.05,-237.8 607.05,-237.8 607.05,-243.8 601.05,-249.8 595.05,-249.8\"/>\n<text text-anchor=\"start\" x=\"534.38\" y=\"-226.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">fit_model</text>\n<text text-anchor=\"start\" x=\"533.25\" y=\"-198.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">typing.Any</text>\n</g>\n<!-- base_model&#45;&gt;fit_model -->\n<g id=\"edge6\" class=\"edge\">\n<title>base_model&#45;&gt;fit_model</title>\n<path fill=\"none\" stroke=\"black\" d=\"M493.93,-228.68C499.65,-227.8 505.36,-226.93 510.9,-226.08\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"511.15,-229.59 520.5,-224.61 510.09,-222.67 511.15,-229.59\"/>\n</g>\n<!-- transformed_data -->\n<g id=\"node5\" class=\"node\">\n<title>transformed_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M475.07,-189.8C475.07,-189.8 362.72,-189.8 362.72,-189.8 356.72,-189.8 350.72,-183.8 350.72,-177.8 350.72,-177.8 350.72,-138.2 350.72,-138.2 350.72,-132.2 356.72,-126.2 362.72,-126.2 362.72,-126.2 475.07,-126.2 475.07,-126.2 481.07,-126.2 487.07,-132.2 487.07,-138.2 487.07,-138.2 487.07,-177.8 487.07,-177.8 487.07,-183.8 481.07,-189.8 475.07,-189.8\"/>\n<text text-anchor=\"start\" x=\"361.52\" y=\"-166.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">transformed_data</text>\n<text text-anchor=\"start\" x=\"384.77\" y=\"-138.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- transformed_data&#45;&gt;predicted_data -->\n<g id=\"edge1\" class=\"edge\">\n<title>transformed_data&#45;&gt;predicted_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M487.51,-164.45C523.1,-167.96 567.46,-172.48 607.05,-177 612.75,-177.65 618.66,-178.35 624.59,-179.07\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"624.12,-182.53 634.48,-180.28 624.98,-175.59 624.12,-182.53\"/>\n</g>\n<!-- transformed_data&#45;&gt;fit_model -->\n<g id=\"edge5\" class=\"edge\">\n<title>transformed_data&#45;&gt;fit_model</title>\n<path fill=\"none\" stroke=\"black\" d=\"M487.19,-186.07C495.44,-189.51 503.76,-192.98 511.73,-196.3\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"510.37,-199.53 520.95,-200.15 513.07,-193.07 510.37,-199.53\"/>\n</g>\n<!-- fit_model&#45;&gt;predicted_data -->\n<g id=\"edge2\" class=\"edge\">\n<title>fit_model&#45;&gt;predicted_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M607.34,-208.26C613,-206.93 618.93,-205.55 624.92,-204.14\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"625.3,-207.65 634.24,-201.96 623.7,-200.83 625.3,-207.65\"/>\n</g>\n<!-- raw_data -->\n<g id=\"node7\" class=\"node\">\n<title>raw_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M274.6,-189.8C274.6,-189.8 208.75,-189.8 208.75,-189.8 202.75,-189.8 196.75,-183.8 196.75,-177.8 196.75,-177.8 196.75,-138.2 196.75,-138.2 196.75,-132.2 202.75,-126.2 208.75,-126.2 208.75,-126.2 274.6,-126.2 274.6,-126.2 280.6,-126.2 286.6,-132.2 286.6,-138.2 286.6,-138.2 286.6,-177.8 286.6,-177.8 286.6,-183.8 280.6,-189.8 274.6,-189.8\"/>\n<text text-anchor=\"start\" x=\"212.05\" y=\"-166.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">raw_data</text>\n<text text-anchor=\"start\" x=\"207.55\" y=\"-138.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- raw_data&#45;&gt;transformed_data -->\n<g id=\"edge4\" class=\"edge\">\n<title>raw_data&#45;&gt;transformed_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M286.89,-158C302.77,-158 321.19,-158 339.11,-158\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"338.97,-161.5 348.97,-158 338.97,-154.5 338.97,-161.5\"/>\n</g>\n<!-- _base_model_inputs -->\n<g id=\"node8\" class=\"node\">\n<title>_base_model_inputs</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"315.35,-262.3 168,-262.3 168,-217.7 315.35,-217.7 315.35,-262.3\"/>\n<text text-anchor=\"start\" x=\"182.8\" y=\"-234.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">model_params</text>\n<text text-anchor=\"start\" x=\"279.55\" y=\"-234.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">dict</text>\n</g>\n<!-- _base_model_inputs&#45;&gt;base_model -->\n<g id=\"edge3\" class=\"edge\">\n<title>_base_model_inputs&#45;&gt;base_model</title>\n<path fill=\"none\" stroke=\"black\" d=\"M315.72,-240C321.44,-240 327.25,-240 333.04,-240\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"332.62,-243.5 342.62,-240 332.62,-236.5 332.62,-243.5\"/>\n</g>\n<!-- _raw_data_inputs -->\n<g id=\"node9\" class=\"node\">\n<title>_raw_data_inputs</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"107.92,-180.3 31.08,-180.3 31.08,-135.7 107.92,-135.7 107.92,-180.3\"/>\n<text text-anchor=\"start\" x=\"45.88\" y=\"-152.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">path</text>\n<text text-anchor=\"start\" x=\"78.12\" y=\"-152.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n</g>\n<!-- _raw_data_inputs&#45;&gt;raw_data -->\n<g id=\"edge7\" class=\"edge\">\n<title>_raw_data_inputs&#45;&gt;raw_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M108.27,-158C130.85,-158 159.97,-158 185.14,-158\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"184.87,-161.5 194.87,-158 184.87,-154.5 184.87,-161.5\"/>\n</g>\n<!-- config -->\n<g id=\"node10\" class=\"node\">\n<title>config</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" points=\"90.5,-234 42.5,-234 42.5,-198 96.5,-198 96.5,-228 90.5,-234\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"90.5,-234 90.5,-228\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"96.5,-228 90.5,-228\"/>\n<text text-anchor=\"middle\" x=\"69.5\" y=\"-210.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">config</text>\n</g>\n<!-- input -->\n<g id=\"node11\" class=\"node\">\n<title>input</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"96.5,-288.3 42.5,-288.3 42.5,-251.7 96.5,-251.7 96.5,-288.3\"/>\n<text text-anchor=\"middle\" x=\"69.5\" y=\"-264.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n</g>\n<!-- function -->\n<g id=\"node12\" class=\"node\">\n<title>function</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M91.92,-343.3C91.92,-343.3 47.08,-343.3 47.08,-343.3 41.08,-343.3 35.08,-337.3 35.08,-331.3 35.08,-331.3 35.08,-318.7 35.08,-318.7 35.08,-312.7 41.08,-306.7 47.08,-306.7 47.08,-306.7 91.92,-306.7 91.92,-306.7 97.92,-306.7 103.92,-312.7 103.92,-318.7 103.92,-318.7 103.92,-331.3 103.92,-331.3 103.92,-337.3 97.92,-343.3 91.92,-343.3\"/>\n<text text-anchor=\"middle\" x=\"69.5\" y=\"-319.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n</g>\n</g>\n</svg>\n",
"text/plain": [
"<graphviz.graphs.Digraph at 0x14da8b880>"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# train\n",
"from hamilton import driver\n",
"\n",
"train_dr = (\n",
" driver.Builder()\n",
" .with_config({\"model\": \"RandomForest\", \"model_params\": {\"n_estimators\": 100}})\n",
" .with_modules(features, train, inference)\n",
" .build()\n",
")\n",
"train_dr.display_all_functions()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "cc9401ed081df22f",
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-07T18:09:13.265102Z",
"start_time": "2024-12-07T18:09:12.750662Z"
}
},
"outputs": [
{
"data": {
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 12.0.0 (20240704.0754)\n -->\n<!-- Pages: 1 -->\n<svg width=\"545pt\" height=\"273pt\"\n viewBox=\"0.00 0.00 545.40 273.30\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 269.3)\">\n<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-269.3 541.4,-269.3 541.4,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster__legend</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" points=\"8,-127.3 8,-257.3 92.85,-257.3 92.85,-127.3 8,-127.3\"/>\n<text text-anchor=\"middle\" x=\"50.42\" y=\"-240\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n</g>\n<!-- predicted_data -->\n<g id=\"node1\" class=\"node\">\n<title>predicted_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M525.4,-90.1C525.4,-90.1 431.8,-90.1 431.8,-90.1 425.8,-90.1 419.8,-84.1 419.8,-78.1 419.8,-78.1 419.8,-38.5 419.8,-38.5 419.8,-32.5 425.8,-26.5 431.8,-26.5 431.8,-26.5 525.4,-26.5 525.4,-26.5 531.4,-26.5 537.4,-32.5 537.4,-38.5 537.4,-38.5 537.4,-78.1 537.4,-78.1 537.4,-84.1 531.4,-90.1 525.4,-90.1\"/>\n<text text-anchor=\"start\" x=\"430.6\" y=\"-67\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">predicted_data</text>\n<text text-anchor=\"start\" x=\"444.47\" y=\"-39\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- transformed_data -->\n<g id=\"node2\" class=\"node\">\n<title>transformed_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M369.92,-126.1C369.92,-126.1 257.57,-126.1 257.57,-126.1 251.57,-126.1 245.57,-120.1 245.57,-114.1 245.57,-114.1 245.57,-74.5 245.57,-74.5 245.57,-68.5 251.57,-62.5 257.57,-62.5 257.57,-62.5 369.92,-62.5 369.92,-62.5 375.92,-62.5 381.92,-68.5 381.92,-74.5 381.92,-74.5 381.92,-114.1 381.92,-114.1 381.92,-120.1 375.92,-126.1 369.92,-126.1\"/>\n<text text-anchor=\"start\" x=\"256.37\" y=\"-103\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">transformed_data</text>\n<text text-anchor=\"start\" x=\"279.62\" y=\"-75\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- transformed_data&#45;&gt;predicted_data -->\n<g id=\"edge1\" class=\"edge\">\n<title>transformed_data&#45;&gt;predicted_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M382.4,-79.35C390.97,-77.45 399.74,-75.51 408.34,-73.61\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"409,-77.05 418.01,-71.47 407.49,-70.21 409,-77.05\"/>\n</g>\n<!-- raw_data -->\n<g id=\"node3\" class=\"node\">\n<title>raw_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M195.7,-126.1C195.7,-126.1 129.85,-126.1 129.85,-126.1 123.85,-126.1 117.85,-120.1 117.85,-114.1 117.85,-114.1 117.85,-74.5 117.85,-74.5 117.85,-68.5 123.85,-62.5 129.85,-62.5 129.85,-62.5 195.7,-62.5 195.7,-62.5 201.7,-62.5 207.7,-68.5 207.7,-74.5 207.7,-74.5 207.7,-114.1 207.7,-114.1 207.7,-120.1 201.7,-126.1 195.7,-126.1\"/>\n<text text-anchor=\"start\" x=\"133.15\" y=\"-103\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">raw_data</text>\n<text text-anchor=\"start\" x=\"128.65\" y=\"-75\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- raw_data&#45;&gt;transformed_data -->\n<g id=\"edge3\" class=\"edge\">\n<title>raw_data&#45;&gt;transformed_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M208.07,-94.3C216.27,-94.3 225.06,-94.3 233.93,-94.3\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"233.66,-97.8 243.66,-94.3 233.66,-90.8 233.66,-97.8\"/>\n</g>\n<!-- _predicted_data_inputs -->\n<g id=\"node4\" class=\"node\">\n<title>_predicted_data_inputs</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"390.8,-44.6 236.7,-44.6 236.7,0 390.8,0 390.8,-44.6\"/>\n<text text-anchor=\"start\" x=\"251.5\" y=\"-16.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">fit_model</text>\n<text text-anchor=\"start\" x=\"313\" y=\"-16.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">typing.Any</text>\n</g>\n<!-- _predicted_data_inputs&#45;&gt;predicted_data -->\n<g id=\"edge2\" class=\"edge\">\n<title>_predicted_data_inputs&#45;&gt;predicted_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M390.93,-39.14C396.78,-40.43 402.67,-41.74 408.47,-43.02\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"407.5,-46.39 418.02,-45.13 409.01,-39.55 407.5,-46.39\"/>\n</g>\n<!-- _raw_data_inputs -->\n<g id=\"node5\" class=\"node\">\n<title>_raw_data_inputs</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"88.85,-116.6 12,-116.6 12,-72 88.85,-72 88.85,-116.6\"/>\n<text text-anchor=\"start\" x=\"26.8\" y=\"-88.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">path</text>\n<text text-anchor=\"start\" x=\"59.05\" y=\"-88.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n</g>\n<!-- _raw_data_inputs&#45;&gt;raw_data -->\n<g id=\"edge4\" class=\"edge\">\n<title>_raw_data_inputs&#45;&gt;raw_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M89.09,-94.3C94.65,-94.3 100.47,-94.3 106.29,-94.3\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"105.91,-97.8 115.91,-94.3 105.91,-90.8 105.91,-97.8\"/>\n</g>\n<!-- input -->\n<g id=\"node6\" class=\"node\">\n<title>input</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"77.42,-171.6 23.42,-171.6 23.42,-135 77.42,-135 77.42,-171.6\"/>\n<text text-anchor=\"middle\" x=\"50.42\" y=\"-147.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n</g>\n<!-- function -->\n<g id=\"node7\" class=\"node\">\n<title>function</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M72.85,-226.6C72.85,-226.6 28,-226.6 28,-226.6 22,-226.6 16,-220.6 16,-214.6 16,-214.6 16,-202 16,-202 16,-196 22,-190 28,-190 28,-190 72.85,-190 72.85,-190 78.85,-190 84.85,-196 84.85,-202 84.85,-202 84.85,-214.6 84.85,-214.6 84.85,-220.6 78.85,-226.6 72.85,-226.6\"/>\n<text text-anchor=\"middle\" x=\"50.42\" y=\"-202.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n</g>\n</g>\n</svg>\n",
"text/plain": [
"<graphviz.graphs.Digraph at 0x14dac0820>"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Inference\n",
"from hamilton import driver\n",
"\n",
"inference_dr = driver.Builder().with_config({}).with_modules(features, inference).build()\n",
"inference_dr.display_all_functions()"
]
},
{
"cell_type": "markdown",
"id": "d85c51388733ce96",
"metadata": {},
"source": [
"# To combine into a single dataflow we can use @subdag\n",
"\n",
"So if we want a single pipeline that enables us to:\n",
"\n",
"1. train the model & get training set predictions.\n",
"2. then use the fit model to predict on a separate dataset.\n",
"\n",
"To do that we define another module that uses the `@subdag` constructs that we wire together."
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "6d1585dad64464d7",
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-07T07:00:23.770491Z",
"start_time": "2024-12-07T07:00:23.481869Z"
}
},
"outputs": [
{
"data": {
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 12.0.0 (20240704.0754)\n -->\n<!-- Pages: 1 -->\n<svg width=\"2359pt\" height=\"346pt\"\n viewBox=\"0.00 0.00 2359.25 345.80\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 341.8)\">\n<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-341.8 2355.25,-341.8 2355.25,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster__legend</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" points=\"10.57,-145.8 10.57,-329.8 95.43,-329.8 95.43,-145.8 10.57,-145.8\"/>\n<text text-anchor=\"middle\" x=\"53\" y=\"-312.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n</g>\n<!-- model -->\n<g id=\"node1\" class=\"node\">\n<title>model</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" points=\"100,-73.8 0,-73.8 0,-23.8 106,-23.8 106,-67.8 100,-73.8\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"100,-73.8 100,-67.8\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"106,-67.8 100,-67.8\"/>\n<text text-anchor=\"start\" x=\"32.75\" y=\"-57.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">model</text>\n<text text-anchor=\"start\" x=\"8\" y=\"-29.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">RandomForest</text>\n</g>\n<!-- trained_pipeline.base_model -->\n<g id=\"node2\" class=\"node\">\n<title>trained_pipeline.base_model</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M820.3,-63.6C820.3,-63.6 587.95,-63.6 587.95,-63.6 581.95,-63.6 575.95,-57.6 575.95,-51.6 575.95,-51.6 575.95,-12 575.95,-12 575.95,-6 581.95,0 587.95,0 587.95,0 820.3,0 820.3,0 826.3,0 832.3,-6 832.3,-12 832.3,-12 832.3,-51.6 832.3,-51.6 832.3,-57.6 826.3,-63.6 820.3,-63.6\"/>\n<text text-anchor=\"start\" x=\"586.75\" y=\"-40.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">trained_pipeline.base_model: model</text>\n<text text-anchor=\"start\" x=\"672.62\" y=\"-12.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">typing.Any</text>\n</g>\n<!-- trained_pipeline.fit_model -->\n<g id=\"node10\" class=\"node\">\n<title>trained_pipeline.fit_model</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1038.9,-85.6C1038.9,-85.6 873.3,-85.6 873.3,-85.6 867.3,-85.6 861.3,-79.6 861.3,-73.6 861.3,-73.6 861.3,-34 861.3,-34 861.3,-28 867.3,-22 873.3,-22 873.3,-22 1038.9,-22 1038.9,-22 1044.9,-22 1050.9,-28 1050.9,-34 1050.9,-34 1050.9,-73.6 1050.9,-73.6 1050.9,-79.6 1044.9,-85.6 1038.9,-85.6\"/>\n<text text-anchor=\"start\" x=\"872.1\" y=\"-62.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">trained_pipeline.fit_model</text>\n<text text-anchor=\"start\" x=\"924.6\" y=\"-34.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">typing.Any</text>\n</g>\n<!-- trained_pipeline.base_model&#45;&gt;trained_pipeline.fit_model -->\n<g id=\"edge12\" class=\"edge\">\n<title>trained_pipeline.base_model&#45;&gt;trained_pipeline.fit_model</title>\n<path fill=\"none\" stroke=\"black\" d=\"M832.68,-43.03C838.32,-43.52 843.96,-44.02 849.55,-44.51\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"849.15,-47.99 859.42,-45.38 849.76,-41.02 849.15,-47.99\"/>\n</g>\n<!-- fit_model -->\n<g id=\"node3\" class=\"node\">\n<title>fit_model</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1633.2,-163.6C1633.2,-163.6 1572.6,-163.6 1572.6,-163.6 1566.6,-163.6 1560.6,-157.6 1560.6,-151.6 1560.6,-151.6 1560.6,-112 1560.6,-112 1560.6,-106 1566.6,-100 1572.6,-100 1572.6,-100 1633.2,-100 1633.2,-100 1639.2,-100 1645.2,-106 1645.2,-112 1645.2,-112 1645.2,-151.6 1645.2,-151.6 1645.2,-157.6 1639.2,-163.6 1633.2,-163.6\"/>\n<text text-anchor=\"start\" x=\"1572.53\" y=\"-140.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">fit_model</text>\n<text text-anchor=\"start\" x=\"1571.4\" y=\"-112.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">typing.Any</text>\n</g>\n<!-- predicted_data.fit_model -->\n<g id=\"node11\" class=\"node\">\n<title>predicted_data.fit_model</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1919.3,-163.6C1919.3,-163.6 1761.2,-163.6 1761.2,-163.6 1755.2,-163.6 1749.2,-157.6 1749.2,-151.6 1749.2,-151.6 1749.2,-112 1749.2,-112 1749.2,-106 1755.2,-100 1761.2,-100 1761.2,-100 1919.3,-100 1919.3,-100 1925.3,-100 1931.3,-106 1931.3,-112 1931.3,-112 1931.3,-151.6 1931.3,-151.6 1931.3,-157.6 1925.3,-163.6 1919.3,-163.6\"/>\n<text text-anchor=\"start\" x=\"1760\" y=\"-140.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">predicted_data.fit_model</text>\n<text text-anchor=\"start\" x=\"1808.75\" y=\"-112.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">typing.Any</text>\n</g>\n<!-- fit_model&#45;&gt;predicted_data.fit_model -->\n<g id=\"edge13\" class=\"edge\">\n<title>fit_model&#45;&gt;predicted_data.fit_model</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1645.43,-131.8C1671.05,-131.8 1705.1,-131.8 1737.51,-131.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1737.37,-135.3 1747.37,-131.8 1737.37,-128.3 1737.37,-135.3\"/>\n</g>\n<!-- predicted_data.raw_data -->\n<g id=\"node4\" class=\"node\">\n<title>predicted_data.raw_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1681.2,-245.6C1681.2,-245.6 1524.6,-245.6 1524.6,-245.6 1518.6,-245.6 1512.6,-239.6 1512.6,-233.6 1512.6,-233.6 1512.6,-194 1512.6,-194 1512.6,-188 1518.6,-182 1524.6,-182 1524.6,-182 1681.2,-182 1681.2,-182 1687.2,-182 1693.2,-188 1693.2,-194 1693.2,-194 1693.2,-233.6 1693.2,-233.6 1693.2,-239.6 1687.2,-245.6 1681.2,-245.6\"/>\n<text text-anchor=\"start\" x=\"1523.4\" y=\"-222.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">predicted_data.raw_data</text>\n<text text-anchor=\"start\" x=\"1568.78\" y=\"-194.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- predicted_data.transformed_data -->\n<g id=\"node5\" class=\"node\">\n<title>predicted_data.transformed_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1946.3,-245.6C1946.3,-245.6 1734.2,-245.6 1734.2,-245.6 1728.2,-245.6 1722.2,-239.6 1722.2,-233.6 1722.2,-233.6 1722.2,-194 1722.2,-194 1722.2,-188 1728.2,-182 1734.2,-182 1734.2,-182 1946.3,-182 1946.3,-182 1952.3,-182 1958.3,-188 1958.3,-194 1958.3,-194 1958.3,-233.6 1958.3,-233.6 1958.3,-239.6 1952.3,-245.6 1946.3,-245.6\"/>\n<text text-anchor=\"start\" x=\"1733\" y=\"-222.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">predicted_data.transformed_data</text>\n<text text-anchor=\"start\" x=\"1806.12\" y=\"-194.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- predicted_data.raw_data&#45;&gt;predicted_data.transformed_data -->\n<g id=\"edge4\" class=\"edge\">\n<title>predicted_data.raw_data&#45;&gt;predicted_data.transformed_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1693.55,-213.8C1699.17,-213.8 1704.88,-213.8 1710.62,-213.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1710.19,-217.3 1720.19,-213.8 1710.19,-210.3 1710.19,-217.3\"/>\n</g>\n<!-- predicted_data.predicted_data -->\n<g id=\"node8\" class=\"node\">\n<title>predicted_data.predicted_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M2192.65,-204.6C2192.65,-204.6 1999.3,-204.6 1999.3,-204.6 1993.3,-204.6 1987.3,-198.6 1987.3,-192.6 1987.3,-192.6 1987.3,-153 1987.3,-153 1987.3,-147 1993.3,-141 1999.3,-141 1999.3,-141 2192.65,-141 2192.65,-141 2198.65,-141 2204.65,-147 2204.65,-153 2204.65,-153 2204.65,-192.6 2204.65,-192.6 2204.65,-198.6 2198.65,-204.6 2192.65,-204.6\"/>\n<text text-anchor=\"start\" x=\"1998.1\" y=\"-181.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">predicted_data.predicted_data</text>\n<text text-anchor=\"start\" x=\"2061.85\" y=\"-153.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- predicted_data.transformed_data&#45;&gt;predicted_data.predicted_data -->\n<g id=\"edge8\" class=\"edge\">\n<title>predicted_data.transformed_data&#45;&gt;predicted_data.predicted_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1958.46,-194.86C1964.23,-193.93 1970.04,-192.99 1975.82,-192.05\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1976.09,-195.56 1985.4,-190.51 1974.97,-188.65 1976.09,-195.56\"/>\n</g>\n<!-- trained_pipeline.predicted_data -->\n<g id=\"node6\" class=\"node\">\n<title>trained_pipeline.predicted_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1292.75,-145.6C1292.75,-145.6 1091.9,-145.6 1091.9,-145.6 1085.9,-145.6 1079.9,-139.6 1079.9,-133.6 1079.9,-133.6 1079.9,-94 1079.9,-94 1079.9,-88 1085.9,-82 1091.9,-82 1091.9,-82 1292.75,-82 1292.75,-82 1298.75,-82 1304.75,-88 1304.75,-94 1304.75,-94 1304.75,-133.6 1304.75,-133.6 1304.75,-139.6 1298.75,-145.6 1292.75,-145.6\"/>\n<text text-anchor=\"start\" x=\"1090.7\" y=\"-122.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">trained_pipeline.predicted_data</text>\n<text text-anchor=\"start\" x=\"1158.2\" y=\"-94.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- trained_pipeline -->\n<g id=\"node15\" class=\"node\">\n<title>trained_pipeline</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1459.22,-110.6C1459.22,-110.6 1358.12,-110.6 1358.12,-110.6 1352.12,-110.6 1346.12,-104.6 1346.12,-98.6 1346.12,-98.6 1346.12,-59 1346.12,-59 1346.12,-53 1352.12,-47 1358.12,-47 1358.12,-47 1459.22,-47 1459.22,-47 1465.22,-47 1471.22,-53 1471.22,-59 1471.22,-59 1471.22,-98.6 1471.22,-98.6 1471.22,-104.6 1465.22,-110.6 1459.22,-110.6\"/>\n<text text-anchor=\"start\" x=\"1356.92\" y=\"-87.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">trained_pipeline</text>\n<text text-anchor=\"start\" x=\"1398.17\" y=\"-59.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">dict</text>\n</g>\n<!-- trained_pipeline.predicted_data&#45;&gt;trained_pipeline -->\n<g id=\"edge18\" class=\"edge\">\n<title>trained_pipeline.predicted_data&#45;&gt;trained_pipeline</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1305.21,-95.53C1315.1,-93.92 1324.93,-92.31 1334.37,-90.77\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1334.87,-94.23 1344.18,-89.17 1333.75,-87.33 1334.87,-94.23\"/>\n</g>\n<!-- trained_pipeline.transformed_data -->\n<g id=\"node7\" class=\"node\">\n<title>trained_pipeline.transformed_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M813.92,-145.6C813.92,-145.6 594.33,-145.6 594.33,-145.6 588.33,-145.6 582.33,-139.6 582.33,-133.6 582.33,-133.6 582.33,-94 582.33,-94 582.33,-88 588.33,-82 594.33,-82 594.33,-82 813.92,-82 813.92,-82 819.92,-82 825.92,-88 825.92,-94 825.92,-94 825.92,-133.6 825.92,-133.6 825.92,-139.6 819.92,-145.6 813.92,-145.6\"/>\n<text text-anchor=\"start\" x=\"593.12\" y=\"-122.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">trained_pipeline.transformed_data</text>\n<text text-anchor=\"start\" x=\"670\" y=\"-94.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- trained_pipeline.transformed_data&#45;&gt;trained_pipeline.predicted_data -->\n<g id=\"edge5\" class=\"edge\">\n<title>trained_pipeline.transformed_data&#45;&gt;trained_pipeline.predicted_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M826.25,-113.8C900.08,-113.8 994.18,-113.8 1068.35,-113.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1068.1,-117.3 1078.1,-113.8 1068.1,-110.3 1068.1,-117.3\"/>\n</g>\n<!-- trained_pipeline.transformed_data&#45;&gt;trained_pipeline.fit_model -->\n<g id=\"edge11\" class=\"edge\">\n<title>trained_pipeline.transformed_data&#45;&gt;trained_pipeline.fit_model</title>\n<path fill=\"none\" stroke=\"black\" d=\"M826.09,-84.77C834.05,-82.86 842.03,-80.94 849.91,-79.05\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"850.58,-82.49 859.49,-76.75 848.95,-75.68 850.58,-82.49\"/>\n</g>\n<!-- predicted_data -->\n<g id=\"node13\" class=\"node\">\n<title>predicted_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M2339.25,-204.6C2339.25,-204.6 2245.65,-204.6 2245.65,-204.6 2239.65,-204.6 2233.65,-198.6 2233.65,-192.6 2233.65,-192.6 2233.65,-153 2233.65,-153 2233.65,-147 2239.65,-141 2245.65,-141 2245.65,-141 2339.25,-141 2339.25,-141 2345.25,-141 2351.25,-147 2351.25,-153 2351.25,-153 2351.25,-192.6 2351.25,-192.6 2351.25,-198.6 2345.25,-204.6 2339.25,-204.6\"/>\n<text text-anchor=\"start\" x=\"2244.45\" y=\"-181.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">predicted_data</text>\n<text text-anchor=\"start\" x=\"2258.32\" y=\"-153.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- predicted_data.predicted_data&#45;&gt;predicted_data -->\n<g id=\"edge15\" class=\"edge\">\n<title>predicted_data.predicted_data&#45;&gt;predicted_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M2205.02,-172.8C2210.83,-172.8 2216.59,-172.8 2222.22,-172.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"2222.03,-176.3 2232.03,-172.8 2222.03,-169.3 2222.03,-176.3\"/>\n</g>\n<!-- trained_pipeline.model_params -->\n<g id=\"node9\" class=\"node\">\n<title>trained_pipeline.model_params</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M534.95,-63.6C534.95,-63.6 333.35,-63.6 333.35,-63.6 327.35,-63.6 321.35,-57.6 321.35,-51.6 321.35,-51.6 321.35,-12 321.35,-12 321.35,-6 327.35,0 333.35,0 333.35,0 534.95,0 534.95,0 540.95,0 546.95,-6 546.95,-12 546.95,-12 546.95,-51.6 546.95,-51.6 546.95,-57.6 540.95,-63.6 534.95,-63.6\"/>\n<text text-anchor=\"start\" x=\"332.15\" y=\"-40.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">trained_pipeline.model_params</text>\n<text text-anchor=\"start\" x=\"423.65\" y=\"-12.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">dict</text>\n</g>\n<!-- trained_pipeline.model_params&#45;&gt;trained_pipeline.base_model -->\n<g id=\"edge1\" class=\"edge\">\n<title>trained_pipeline.model_params&#45;&gt;trained_pipeline.base_model</title>\n<path fill=\"none\" stroke=\"black\" d=\"M547.24,-31.8C552.78,-31.8 558.36,-31.8 563.96,-31.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"563.95,-35.3 573.95,-31.8 563.95,-28.3 563.95,-35.3\"/>\n</g>\n<!-- trained_pipeline.fit_model&#45;&gt;trained_pipeline.predicted_data -->\n<g id=\"edge6\" class=\"edge\">\n<title>trained_pipeline.fit_model&#45;&gt;trained_pipeline.predicted_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1051.36,-77.94C1057.01,-79.39 1062.74,-80.86 1068.49,-82.33\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1067.53,-85.7 1078.08,-84.79 1069.26,-78.92 1067.53,-85.7\"/>\n</g>\n<!-- trained_pipeline.fit_model&#45;&gt;trained_pipeline -->\n<g id=\"edge17\" class=\"edge\">\n<title>trained_pipeline.fit_model&#45;&gt;trained_pipeline</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1051.17,-58.85C1121.12,-62.63 1218.9,-67.95 1304.75,-72.8 1314.36,-73.34 1324.47,-73.92 1334.45,-74.5\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1333.93,-77.97 1344.12,-75.06 1334.34,-70.99 1333.93,-77.97\"/>\n</g>\n<!-- predicted_data.fit_model&#45;&gt;predicted_data.predicted_data -->\n<g id=\"edge9\" class=\"edge\">\n<title>predicted_data.fit_model&#45;&gt;predicted_data.predicted_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1931.78,-146.43C1946.07,-148.74 1961.03,-151.15 1975.85,-153.55\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1975.21,-156.99 1985.64,-155.13 1976.33,-150.08 1975.21,-156.99\"/>\n</g>\n<!-- predicted_data.path -->\n<g id=\"node12\" class=\"node\">\n<title>predicted_data.path</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1471.6,-245.6C1471.6,-245.6 1345.75,-245.6 1345.75,-245.6 1339.75,-245.6 1333.75,-239.6 1333.75,-233.6 1333.75,-233.6 1333.75,-194 1333.75,-194 1333.75,-188 1339.75,-182 1345.75,-182 1345.75,-182 1471.6,-182 1471.6,-182 1477.6,-182 1483.6,-188 1483.6,-194 1483.6,-194 1483.6,-233.6 1483.6,-233.6 1483.6,-239.6 1477.6,-245.6 1471.6,-245.6\"/>\n<text text-anchor=\"start\" x=\"1344.55\" y=\"-222.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">predicted_data.path</text>\n<text text-anchor=\"start\" x=\"1401.17\" y=\"-194.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">str</text>\n</g>\n<!-- predicted_data.path&#45;&gt;predicted_data.raw_data -->\n<g id=\"edge3\" class=\"edge\">\n<title>predicted_data.path&#45;&gt;predicted_data.raw_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1483.99,-213.8C1489.66,-213.8 1495.43,-213.8 1501.23,-213.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1500.86,-217.3 1510.86,-213.8 1500.86,-210.3 1500.86,-217.3\"/>\n</g>\n<!-- trained_pipeline.path -->\n<g id=\"node14\" class=\"node\">\n<title>trained_pipeline.path</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M280.35,-145.6C280.35,-145.6 147,-145.6 147,-145.6 141,-145.6 135,-139.6 135,-133.6 135,-133.6 135,-94 135,-94 135,-88 141,-82 147,-82 147,-82 280.35,-82 280.35,-82 286.35,-82 292.35,-88 292.35,-94 292.35,-94 292.35,-133.6 292.35,-133.6 292.35,-139.6 286.35,-145.6 280.35,-145.6\"/>\n<text text-anchor=\"start\" x=\"145.8\" y=\"-122.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">trained_pipeline.path</text>\n<text text-anchor=\"start\" x=\"206.18\" y=\"-94.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">str</text>\n</g>\n<!-- trained_pipeline.raw_data -->\n<g id=\"node17\" class=\"node\">\n<title>trained_pipeline.raw_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M516.2,-145.6C516.2,-145.6 352.1,-145.6 352.1,-145.6 346.1,-145.6 340.1,-139.6 340.1,-133.6 340.1,-133.6 340.1,-94 340.1,-94 340.1,-88 346.1,-82 352.1,-82 352.1,-82 516.2,-82 516.2,-82 522.2,-82 528.2,-88 528.2,-94 528.2,-94 528.2,-133.6 528.2,-133.6 528.2,-139.6 522.2,-145.6 516.2,-145.6\"/>\n<text text-anchor=\"start\" x=\"350.9\" y=\"-122.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">trained_pipeline.raw_data</text>\n<text text-anchor=\"start\" x=\"400.03\" y=\"-94.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- trained_pipeline.path&#45;&gt;trained_pipeline.raw_data -->\n<g id=\"edge20\" class=\"edge\">\n<title>trained_pipeline.path&#45;&gt;trained_pipeline.raw_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M292.62,-113.8C304.23,-113.8 316.36,-113.8 328.41,-113.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"328.4,-117.3 338.4,-113.8 328.4,-110.3 328.4,-117.3\"/>\n</g>\n<!-- trained_pipeline&#45;&gt;fit_model -->\n<g id=\"edge2\" class=\"edge\">\n<title>trained_pipeline&#45;&gt;fit_model</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1471.55,-95.86C1496.57,-102.76 1525.14,-110.64 1549.15,-117.25\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1548.15,-120.61 1558.72,-119.89 1550.01,-113.86 1548.15,-120.61\"/>\n</g>\n<!-- training_prediction -->\n<g id=\"node16\" class=\"node\">\n<title>training_prediction</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1662.83,-81.6C1662.83,-81.6 1542.98,-81.6 1542.98,-81.6 1536.98,-81.6 1530.98,-75.6 1530.98,-69.6 1530.98,-69.6 1530.98,-30 1530.98,-30 1530.98,-24 1536.98,-18 1542.98,-18 1542.98,-18 1662.83,-18 1662.83,-18 1668.83,-18 1674.83,-24 1674.83,-30 1674.83,-30 1674.83,-69.6 1674.83,-69.6 1674.83,-75.6 1668.83,-81.6 1662.83,-81.6\"/>\n<text text-anchor=\"start\" x=\"1541.78\" y=\"-58.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">training_prediction</text>\n<text text-anchor=\"start\" x=\"1568.78\" y=\"-30.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- trained_pipeline&#45;&gt;training_prediction -->\n<g id=\"edge19\" class=\"edge\">\n<title>trained_pipeline&#45;&gt;training_prediction</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1471.55,-69.46C1486.89,-67.15 1503.57,-64.63 1519.71,-62.2\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1519.91,-65.71 1529.28,-60.76 1518.87,-58.79 1519.91,-65.71\"/>\n</g>\n<!-- trained_pipeline.raw_data&#45;&gt;trained_pipeline.transformed_data -->\n<g id=\"edge7\" class=\"edge\">\n<title>trained_pipeline.raw_data&#45;&gt;trained_pipeline.transformed_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M528.5,-113.8C542.09,-113.8 556.29,-113.8 570.47,-113.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"570.32,-117.3 580.32,-113.8 570.32,-110.3 570.32,-117.3\"/>\n</g>\n<!-- _trained_pipeline.model_params_inputs -->\n<g id=\"node18\" class=\"node\">\n<title>_trained_pipeline.model_params_inputs</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"287.35,-54.1 140,-54.1 140,-9.5 287.35,-9.5 287.35,-54.1\"/>\n<text text-anchor=\"start\" x=\"154.8\" y=\"-26\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">model_params</text>\n<text text-anchor=\"start\" x=\"251.55\" y=\"-26\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">dict</text>\n</g>\n<!-- _trained_pipeline.model_params_inputs&#45;&gt;trained_pipeline.model_params -->\n<g id=\"edge10\" class=\"edge\">\n<title>_trained_pipeline.model_params_inputs&#45;&gt;trained_pipeline.model_params</title>\n<path fill=\"none\" stroke=\"black\" d=\"M287.73,-31.8C294.89,-31.8 302.3,-31.8 309.79,-31.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"309.69,-35.3 319.69,-31.8 309.69,-28.3 309.69,-35.3\"/>\n</g>\n<!-- _predicted_data.path_inputs -->\n<g id=\"node19\" class=\"node\">\n<title>_predicted_data.path_inputs</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"1254.75,-236.1 1129.9,-236.1 1129.9,-191.5 1254.75,-191.5 1254.75,-236.1\"/>\n<text text-anchor=\"start\" x=\"1144.7\" y=\"-208\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">predict_path</text>\n<text text-anchor=\"start\" x=\"1224.95\" y=\"-208\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n</g>\n<!-- _predicted_data.path_inputs&#45;&gt;predicted_data.path -->\n<g id=\"edge14\" class=\"edge\">\n<title>_predicted_data.path_inputs&#45;&gt;predicted_data.path</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1255.04,-213.8C1275.99,-213.8 1299.78,-213.8 1322.24,-213.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1321.98,-217.3 1331.98,-213.8 1321.98,-210.3 1321.98,-217.3\"/>\n</g>\n<!-- _trained_pipeline.path_inputs -->\n<g id=\"node20\" class=\"node\">\n<title>_trained_pipeline.path_inputs</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"91.42,-136.1 14.58,-136.1 14.58,-91.5 91.42,-91.5 91.42,-136.1\"/>\n<text text-anchor=\"start\" x=\"29.38\" y=\"-108\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">path</text>\n<text text-anchor=\"start\" x=\"61.62\" y=\"-108\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n</g>\n<!-- _trained_pipeline.path_inputs&#45;&gt;trained_pipeline.path -->\n<g id=\"edge16\" class=\"edge\">\n<title>_trained_pipeline.path_inputs&#45;&gt;trained_pipeline.path</title>\n<path fill=\"none\" stroke=\"black\" d=\"M91.6,-113.8C101.36,-113.8 112.3,-113.8 123.51,-113.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"123.25,-117.3 133.25,-113.8 123.25,-110.3 123.25,-117.3\"/>\n</g>\n<!-- config -->\n<g id=\"node21\" class=\"node\">\n<title>config</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" points=\"74,-189.8 26,-189.8 26,-153.8 80,-153.8 80,-183.8 74,-189.8\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"74,-189.8 74,-183.8\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"80,-183.8 74,-183.8\"/>\n<text text-anchor=\"middle\" x=\"53\" y=\"-166\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">config</text>\n</g>\n<!-- input -->\n<g id=\"node22\" class=\"node\">\n<title>input</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"80,-244.1 26,-244.1 26,-207.5 80,-207.5 80,-244.1\"/>\n<text text-anchor=\"middle\" x=\"53\" y=\"-220\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n</g>\n<!-- function -->\n<g id=\"node23\" class=\"node\">\n<title>function</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M75.42,-299.1C75.42,-299.1 30.58,-299.1 30.58,-299.1 24.58,-299.1 18.58,-293.1 18.58,-287.1 18.58,-287.1 18.58,-274.5 18.58,-274.5 18.58,-268.5 24.58,-262.5 30.58,-262.5 30.58,-262.5 75.42,-262.5 75.42,-262.5 81.42,-262.5 87.42,-268.5 87.42,-274.5 87.42,-274.5 87.42,-287.1 87.42,-287.1 87.42,-293.1 81.42,-299.1 75.42,-299.1\"/>\n<text text-anchor=\"middle\" x=\"53\" y=\"-275\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n</g>\n</g>\n</svg>\n",
"text/plain": [
"<graphviz.graphs.Digraph at 0x14d5886a0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%cell_to_module pipeline --config '{\"model\":\"RandomForest\"}' --display\n",
"from typing import Any\n",
"\n",
"import pandas as pd\n",
"\n",
"from hamilton.function_modifiers import subdag, extract_fields, configuration, source\n",
"import features\n",
"import train\n",
"import inference\n",
"\n",
"@extract_fields(\n",
" {'fit_model': Any, 'training_prediction': pd.DataFrame}\n",
")\n",
"@subdag(\n",
" features, train, inference,\n",
" inputs={\n",
" \"path\": source(\"path\"),\n",
" \"model_params\": source(\"model_params\"),\n",
" },\n",
" # there are several ways to pass in configuration.\n",
" # config={ \n",
" # \"model\": configuration(\"model\")\n",
" # },\n",
")\n",
"def trained_pipeline(fit_model: Any, predicted_data: pd.DataFrame) -> dict:\n",
" return {'fit_model': fit_model, 'training_prediction': predicted_data}\n",
"\n",
"@subdag(\n",
" features, inference,\n",
" inputs={\n",
" \"path\": source(\"predict_path\"),\n",
" \"fit_model\": source(\"fit_model\"),\n",
" },\n",
")\n",
"def predicted_data(predicted_data: pd.DataFrame) -> pd.DataFrame:\n",
" return predicted_data"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "f72146c07a654ca4",
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-07T06:57:20.874962Z",
"start_time": "2024-12-07T06:57:20.643256Z"
}
},
"outputs": [
{
"data": {
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 12.0.0 (20240704.0754)\n -->\n<!-- Pages: 1 -->\n<svg width=\"2423pt\" height=\"472pt\"\n viewBox=\"0.00 0.00 2422.60 472.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 468)\">\n<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-468 2418.6,-468 2418.6,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster__legend</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" points=\"42.25,-272 42.25,-456 127.1,-456 127.1,-272 42.25,-272\"/>\n<text text-anchor=\"middle\" x=\"84.68\" y=\"-438.7\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n</g>\n<!-- model -->\n<g id=\"node1\" class=\"node\">\n<title>model</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" points=\"131.68,-50 31.68,-50 31.68,0 137.68,0 137.68,-44 131.68,-50\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"131.68,-50 131.68,-44\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"137.68,-44 131.68,-44\"/>\n<text text-anchor=\"start\" x=\"64.43\" y=\"-33.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">model</text>\n<text text-anchor=\"start\" x=\"39.68\" y=\"-5.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">RandomForest</text>\n</g>\n<!-- model_params -->\n<g id=\"node2\" class=\"node\">\n<title>model_params</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" points=\"148.18,-118 15.18,-118 15.18,-68 154.18,-68 154.18,-112 148.18,-118\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"148.18,-118 148.18,-112\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"154.18,-112 148.18,-112\"/>\n<text text-anchor=\"start\" x=\"36.3\" y=\"-101.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">model_params</text>\n<text text-anchor=\"start\" x=\"23.18\" y=\"-73.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">{&#39;n_estimators&#39;: 100}</text>\n</g>\n<!-- trained_pipeline.base_model -->\n<g id=\"node3\" class=\"node\">\n<title>trained_pipeline.base_model</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M883.65,-107.8C883.65,-107.8 651.3,-107.8 651.3,-107.8 645.3,-107.8 639.3,-101.8 639.3,-95.8 639.3,-95.8 639.3,-56.2 639.3,-56.2 639.3,-50.2 645.3,-44.2 651.3,-44.2 651.3,-44.2 883.65,-44.2 883.65,-44.2 889.65,-44.2 895.65,-50.2 895.65,-56.2 895.65,-56.2 895.65,-95.8 895.65,-95.8 895.65,-101.8 889.65,-107.8 883.65,-107.8\"/>\n<text text-anchor=\"start\" x=\"650.1\" y=\"-84.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">trained_pipeline.base_model: model</text>\n<text text-anchor=\"start\" x=\"735.97\" y=\"-56.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">typing.Any</text>\n</g>\n<!-- trained_pipeline.fit_model -->\n<g id=\"node12\" class=\"node\">\n<title>trained_pipeline.fit_model</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1102.25,-129.8C1102.25,-129.8 936.65,-129.8 936.65,-129.8 930.65,-129.8 924.65,-123.8 924.65,-117.8 924.65,-117.8 924.65,-78.2 924.65,-78.2 924.65,-72.2 930.65,-66.2 936.65,-66.2 936.65,-66.2 1102.25,-66.2 1102.25,-66.2 1108.25,-66.2 1114.25,-72.2 1114.25,-78.2 1114.25,-78.2 1114.25,-117.8 1114.25,-117.8 1114.25,-123.8 1108.25,-129.8 1102.25,-129.8\"/>\n<text text-anchor=\"start\" x=\"935.45\" y=\"-106.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">trained_pipeline.fit_model</text>\n<text text-anchor=\"start\" x=\"987.95\" y=\"-78.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">typing.Any</text>\n</g>\n<!-- trained_pipeline.base_model&#45;&gt;trained_pipeline.fit_model -->\n<g id=\"edge12\" class=\"edge\">\n<title>trained_pipeline.base_model&#45;&gt;trained_pipeline.fit_model</title>\n<path fill=\"none\" stroke=\"black\" d=\"M896.03,-87.23C901.67,-87.72 907.31,-88.22 912.9,-88.71\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"912.5,-92.19 922.77,-89.58 913.11,-85.22 912.5,-92.19\"/>\n</g>\n<!-- fit_model -->\n<g id=\"node4\" class=\"node\">\n<title>fit_model</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1696.55,-207.8C1696.55,-207.8 1635.95,-207.8 1635.95,-207.8 1629.95,-207.8 1623.95,-201.8 1623.95,-195.8 1623.95,-195.8 1623.95,-156.2 1623.95,-156.2 1623.95,-150.2 1629.95,-144.2 1635.95,-144.2 1635.95,-144.2 1696.55,-144.2 1696.55,-144.2 1702.55,-144.2 1708.55,-150.2 1708.55,-156.2 1708.55,-156.2 1708.55,-195.8 1708.55,-195.8 1708.55,-201.8 1702.55,-207.8 1696.55,-207.8\"/>\n<text text-anchor=\"start\" x=\"1635.88\" y=\"-184.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">fit_model</text>\n<text text-anchor=\"start\" x=\"1634.75\" y=\"-156.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">typing.Any</text>\n</g>\n<!-- predicted_data.fit_model -->\n<g id=\"node13\" class=\"node\">\n<title>predicted_data.fit_model</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1982.65,-207.8C1982.65,-207.8 1824.55,-207.8 1824.55,-207.8 1818.55,-207.8 1812.55,-201.8 1812.55,-195.8 1812.55,-195.8 1812.55,-156.2 1812.55,-156.2 1812.55,-150.2 1818.55,-144.2 1824.55,-144.2 1824.55,-144.2 1982.65,-144.2 1982.65,-144.2 1988.65,-144.2 1994.65,-150.2 1994.65,-156.2 1994.65,-156.2 1994.65,-195.8 1994.65,-195.8 1994.65,-201.8 1988.65,-207.8 1982.65,-207.8\"/>\n<text text-anchor=\"start\" x=\"1823.35\" y=\"-184.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">predicted_data.fit_model</text>\n<text text-anchor=\"start\" x=\"1872.1\" y=\"-156.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">typing.Any</text>\n</g>\n<!-- fit_model&#45;&gt;predicted_data.fit_model -->\n<g id=\"edge13\" class=\"edge\">\n<title>fit_model&#45;&gt;predicted_data.fit_model</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1708.78,-176C1734.4,-176 1768.45,-176 1800.86,-176\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1800.72,-179.5 1810.72,-176 1800.72,-172.5 1800.72,-179.5\"/>\n</g>\n<!-- trained_pipeline.model -->\n<g id=\"node5\" class=\"node\">\n<title>trained_pipeline.model</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M157.35,-261.8C157.35,-261.8 12,-261.8 12,-261.8 6,-261.8 0,-255.8 0,-249.8 0,-249.8 0,-210.2 0,-210.2 0,-204.2 6,-198.2 12,-198.2 12,-198.2 157.35,-198.2 157.35,-198.2 163.35,-198.2 169.35,-204.2 169.35,-210.2 169.35,-210.2 169.35,-249.8 169.35,-249.8 169.35,-255.8 163.35,-261.8 157.35,-261.8\"/>\n<text text-anchor=\"start\" x=\"10.8\" y=\"-238.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">trained_pipeline.model</text>\n<text text-anchor=\"start\" x=\"17.18\" y=\"-210.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">UpstreamDependency</text>\n</g>\n<!-- predicted_data.raw_data -->\n<g id=\"node6\" class=\"node\">\n<title>predicted_data.raw_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1744.55,-289.8C1744.55,-289.8 1587.95,-289.8 1587.95,-289.8 1581.95,-289.8 1575.95,-283.8 1575.95,-277.8 1575.95,-277.8 1575.95,-238.2 1575.95,-238.2 1575.95,-232.2 1581.95,-226.2 1587.95,-226.2 1587.95,-226.2 1744.55,-226.2 1744.55,-226.2 1750.55,-226.2 1756.55,-232.2 1756.55,-238.2 1756.55,-238.2 1756.55,-277.8 1756.55,-277.8 1756.55,-283.8 1750.55,-289.8 1744.55,-289.8\"/>\n<text text-anchor=\"start\" x=\"1586.75\" y=\"-266.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">predicted_data.raw_data</text>\n<text text-anchor=\"start\" x=\"1632.12\" y=\"-238.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- predicted_data.transformed_data -->\n<g id=\"node7\" class=\"node\">\n<title>predicted_data.transformed_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M2009.65,-289.8C2009.65,-289.8 1797.55,-289.8 1797.55,-289.8 1791.55,-289.8 1785.55,-283.8 1785.55,-277.8 1785.55,-277.8 1785.55,-238.2 1785.55,-238.2 1785.55,-232.2 1791.55,-226.2 1797.55,-226.2 1797.55,-226.2 2009.65,-226.2 2009.65,-226.2 2015.65,-226.2 2021.65,-232.2 2021.65,-238.2 2021.65,-238.2 2021.65,-277.8 2021.65,-277.8 2021.65,-283.8 2015.65,-289.8 2009.65,-289.8\"/>\n<text text-anchor=\"start\" x=\"1796.35\" y=\"-266.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">predicted_data.transformed_data</text>\n<text text-anchor=\"start\" x=\"1869.47\" y=\"-238.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- predicted_data.raw_data&#45;&gt;predicted_data.transformed_data -->\n<g id=\"edge4\" class=\"edge\">\n<title>predicted_data.raw_data&#45;&gt;predicted_data.transformed_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1756.9,-258C1762.52,-258 1768.23,-258 1773.97,-258\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1773.54,-261.5 1783.54,-258 1773.54,-254.5 1773.54,-261.5\"/>\n</g>\n<!-- predicted_data.predicted_data -->\n<g id=\"node10\" class=\"node\">\n<title>predicted_data.predicted_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M2256,-248.8C2256,-248.8 2062.65,-248.8 2062.65,-248.8 2056.65,-248.8 2050.65,-242.8 2050.65,-236.8 2050.65,-236.8 2050.65,-197.2 2050.65,-197.2 2050.65,-191.2 2056.65,-185.2 2062.65,-185.2 2062.65,-185.2 2256,-185.2 2256,-185.2 2262,-185.2 2268,-191.2 2268,-197.2 2268,-197.2 2268,-236.8 2268,-236.8 2268,-242.8 2262,-248.8 2256,-248.8\"/>\n<text text-anchor=\"start\" x=\"2061.45\" y=\"-225.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">predicted_data.predicted_data</text>\n<text text-anchor=\"start\" x=\"2125.2\" y=\"-197.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- predicted_data.transformed_data&#45;&gt;predicted_data.predicted_data -->\n<g id=\"edge8\" class=\"edge\">\n<title>predicted_data.transformed_data&#45;&gt;predicted_data.predicted_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M2021.81,-239.06C2027.58,-238.13 2033.39,-237.19 2039.17,-236.25\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"2039.44,-239.76 2048.75,-234.71 2038.32,-232.85 2039.44,-239.76\"/>\n</g>\n<!-- trained_pipeline.predicted_data -->\n<g id=\"node8\" class=\"node\">\n<title>trained_pipeline.predicted_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1356.1,-189.8C1356.1,-189.8 1155.25,-189.8 1155.25,-189.8 1149.25,-189.8 1143.25,-183.8 1143.25,-177.8 1143.25,-177.8 1143.25,-138.2 1143.25,-138.2 1143.25,-132.2 1149.25,-126.2 1155.25,-126.2 1155.25,-126.2 1356.1,-126.2 1356.1,-126.2 1362.1,-126.2 1368.1,-132.2 1368.1,-138.2 1368.1,-138.2 1368.1,-177.8 1368.1,-177.8 1368.1,-183.8 1362.1,-189.8 1356.1,-189.8\"/>\n<text text-anchor=\"start\" x=\"1154.05\" y=\"-166.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">trained_pipeline.predicted_data</text>\n<text text-anchor=\"start\" x=\"1221.55\" y=\"-138.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- trained_pipeline -->\n<g id=\"node17\" class=\"node\">\n<title>trained_pipeline</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1522.57,-154.8C1522.57,-154.8 1421.47,-154.8 1421.47,-154.8 1415.47,-154.8 1409.47,-148.8 1409.47,-142.8 1409.47,-142.8 1409.47,-103.2 1409.47,-103.2 1409.47,-97.2 1415.47,-91.2 1421.47,-91.2 1421.47,-91.2 1522.57,-91.2 1522.57,-91.2 1528.57,-91.2 1534.57,-97.2 1534.57,-103.2 1534.57,-103.2 1534.57,-142.8 1534.57,-142.8 1534.57,-148.8 1528.57,-154.8 1522.57,-154.8\"/>\n<text text-anchor=\"start\" x=\"1420.27\" y=\"-131.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">trained_pipeline</text>\n<text text-anchor=\"start\" x=\"1461.52\" y=\"-103.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">dict</text>\n</g>\n<!-- trained_pipeline.predicted_data&#45;&gt;trained_pipeline -->\n<g id=\"edge18\" class=\"edge\">\n<title>trained_pipeline.predicted_data&#45;&gt;trained_pipeline</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1368.56,-139.73C1378.45,-138.12 1388.28,-136.51 1397.72,-134.97\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1398.22,-138.43 1407.53,-133.37 1397.1,-131.53 1398.22,-138.43\"/>\n</g>\n<!-- trained_pipeline.transformed_data -->\n<g id=\"node9\" class=\"node\">\n<title>trained_pipeline.transformed_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M877.27,-189.8C877.27,-189.8 657.67,-189.8 657.67,-189.8 651.67,-189.8 645.67,-183.8 645.67,-177.8 645.67,-177.8 645.67,-138.2 645.67,-138.2 645.67,-132.2 651.67,-126.2 657.67,-126.2 657.67,-126.2 877.27,-126.2 877.27,-126.2 883.27,-126.2 889.27,-132.2 889.27,-138.2 889.27,-138.2 889.27,-177.8 889.27,-177.8 889.27,-183.8 883.27,-189.8 877.27,-189.8\"/>\n<text text-anchor=\"start\" x=\"656.47\" y=\"-166.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">trained_pipeline.transformed_data</text>\n<text text-anchor=\"start\" x=\"733.35\" y=\"-138.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- trained_pipeline.transformed_data&#45;&gt;trained_pipeline.predicted_data -->\n<g id=\"edge5\" class=\"edge\">\n<title>trained_pipeline.transformed_data&#45;&gt;trained_pipeline.predicted_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M889.6,-158C963.43,-158 1057.53,-158 1131.7,-158\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1131.45,-161.5 1141.45,-158 1131.45,-154.5 1131.45,-161.5\"/>\n</g>\n<!-- trained_pipeline.transformed_data&#45;&gt;trained_pipeline.fit_model -->\n<g id=\"edge11\" class=\"edge\">\n<title>trained_pipeline.transformed_data&#45;&gt;trained_pipeline.fit_model</title>\n<path fill=\"none\" stroke=\"black\" d=\"M889.44,-128.97C897.4,-127.06 905.38,-125.14 913.26,-123.25\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"913.93,-126.69 922.84,-120.95 912.3,-119.88 913.93,-126.69\"/>\n</g>\n<!-- predicted_data -->\n<g id=\"node15\" class=\"node\">\n<title>predicted_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M2402.6,-248.8C2402.6,-248.8 2309,-248.8 2309,-248.8 2303,-248.8 2297,-242.8 2297,-236.8 2297,-236.8 2297,-197.2 2297,-197.2 2297,-191.2 2303,-185.2 2309,-185.2 2309,-185.2 2402.6,-185.2 2402.6,-185.2 2408.6,-185.2 2414.6,-191.2 2414.6,-197.2 2414.6,-197.2 2414.6,-236.8 2414.6,-236.8 2414.6,-242.8 2408.6,-248.8 2402.6,-248.8\"/>\n<text text-anchor=\"start\" x=\"2307.8\" y=\"-225.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">predicted_data</text>\n<text text-anchor=\"start\" x=\"2321.67\" y=\"-197.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- predicted_data.predicted_data&#45;&gt;predicted_data -->\n<g id=\"edge15\" class=\"edge\">\n<title>predicted_data.predicted_data&#45;&gt;predicted_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M2268.37,-217C2274.18,-217 2279.94,-217 2285.57,-217\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"2285.38,-220.5 2295.38,-217 2285.38,-213.5 2285.38,-220.5\"/>\n</g>\n<!-- trained_pipeline.model_params -->\n<g id=\"node11\" class=\"node\">\n<title>trained_pipeline.model_params</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M598.3,-107.8C598.3,-107.8 396.7,-107.8 396.7,-107.8 390.7,-107.8 384.7,-101.8 384.7,-95.8 384.7,-95.8 384.7,-56.2 384.7,-56.2 384.7,-50.2 390.7,-44.2 396.7,-44.2 396.7,-44.2 598.3,-44.2 598.3,-44.2 604.3,-44.2 610.3,-50.2 610.3,-56.2 610.3,-56.2 610.3,-95.8 610.3,-95.8 610.3,-101.8 604.3,-107.8 598.3,-107.8\"/>\n<text text-anchor=\"start\" x=\"395.5\" y=\"-84.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">trained_pipeline.model_params</text>\n<text text-anchor=\"start\" x=\"487\" y=\"-56.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">dict</text>\n</g>\n<!-- trained_pipeline.model_params&#45;&gt;trained_pipeline.base_model -->\n<g id=\"edge1\" class=\"edge\">\n<title>trained_pipeline.model_params&#45;&gt;trained_pipeline.base_model</title>\n<path fill=\"none\" stroke=\"black\" d=\"M610.59,-76C616.13,-76 621.71,-76 627.31,-76\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"627.3,-79.5 637.3,-76 627.3,-72.5 627.3,-79.5\"/>\n</g>\n<!-- trained_pipeline.fit_model&#45;&gt;trained_pipeline.predicted_data -->\n<g id=\"edge6\" class=\"edge\">\n<title>trained_pipeline.fit_model&#45;&gt;trained_pipeline.predicted_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1114.71,-122.14C1120.36,-123.59 1126.09,-125.06 1131.84,-126.53\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1130.88,-129.9 1141.43,-128.99 1132.61,-123.12 1130.88,-129.9\"/>\n</g>\n<!-- trained_pipeline.fit_model&#45;&gt;trained_pipeline -->\n<g id=\"edge17\" class=\"edge\">\n<title>trained_pipeline.fit_model&#45;&gt;trained_pipeline</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1114.52,-103.05C1184.47,-106.83 1282.25,-112.15 1368.1,-117 1377.71,-117.54 1387.82,-118.12 1397.8,-118.7\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1397.28,-122.17 1407.47,-119.26 1397.69,-115.19 1397.28,-122.17\"/>\n</g>\n<!-- predicted_data.fit_model&#45;&gt;predicted_data.predicted_data -->\n<g id=\"edge9\" class=\"edge\">\n<title>predicted_data.fit_model&#45;&gt;predicted_data.predicted_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1995.13,-190.63C2009.42,-192.94 2024.38,-195.35 2039.2,-197.75\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"2038.56,-201.19 2048.99,-199.33 2039.68,-194.28 2038.56,-201.19\"/>\n</g>\n<!-- predicted_data.path -->\n<g id=\"node14\" class=\"node\">\n<title>predicted_data.path</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1534.95,-289.8C1534.95,-289.8 1409.1,-289.8 1409.1,-289.8 1403.1,-289.8 1397.1,-283.8 1397.1,-277.8 1397.1,-277.8 1397.1,-238.2 1397.1,-238.2 1397.1,-232.2 1403.1,-226.2 1409.1,-226.2 1409.1,-226.2 1534.95,-226.2 1534.95,-226.2 1540.95,-226.2 1546.95,-232.2 1546.95,-238.2 1546.95,-238.2 1546.95,-277.8 1546.95,-277.8 1546.95,-283.8 1540.95,-289.8 1534.95,-289.8\"/>\n<text text-anchor=\"start\" x=\"1407.9\" y=\"-266.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">predicted_data.path</text>\n<text text-anchor=\"start\" x=\"1464.52\" y=\"-238.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">str</text>\n</g>\n<!-- predicted_data.path&#45;&gt;predicted_data.raw_data -->\n<g id=\"edge3\" class=\"edge\">\n<title>predicted_data.path&#45;&gt;predicted_data.raw_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1547.34,-258C1553.01,-258 1558.78,-258 1564.58,-258\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1564.21,-261.5 1574.21,-258 1564.21,-254.5 1564.21,-261.5\"/>\n</g>\n<!-- trained_pipeline.path -->\n<g id=\"node16\" class=\"node\">\n<title>trained_pipeline.path</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M343.7,-189.8C343.7,-189.8 210.35,-189.8 210.35,-189.8 204.35,-189.8 198.35,-183.8 198.35,-177.8 198.35,-177.8 198.35,-138.2 198.35,-138.2 198.35,-132.2 204.35,-126.2 210.35,-126.2 210.35,-126.2 343.7,-126.2 343.7,-126.2 349.7,-126.2 355.7,-132.2 355.7,-138.2 355.7,-138.2 355.7,-177.8 355.7,-177.8 355.7,-183.8 349.7,-189.8 343.7,-189.8\"/>\n<text text-anchor=\"start\" x=\"209.15\" y=\"-166.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">trained_pipeline.path</text>\n<text text-anchor=\"start\" x=\"269.53\" y=\"-138.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">str</text>\n</g>\n<!-- trained_pipeline.raw_data -->\n<g id=\"node19\" class=\"node\">\n<title>trained_pipeline.raw_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M579.55,-189.8C579.55,-189.8 415.45,-189.8 415.45,-189.8 409.45,-189.8 403.45,-183.8 403.45,-177.8 403.45,-177.8 403.45,-138.2 403.45,-138.2 403.45,-132.2 409.45,-126.2 415.45,-126.2 415.45,-126.2 579.55,-126.2 579.55,-126.2 585.55,-126.2 591.55,-132.2 591.55,-138.2 591.55,-138.2 591.55,-177.8 591.55,-177.8 591.55,-183.8 585.55,-189.8 579.55,-189.8\"/>\n<text text-anchor=\"start\" x=\"414.25\" y=\"-166.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">trained_pipeline.raw_data</text>\n<text text-anchor=\"start\" x=\"463.38\" y=\"-138.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- trained_pipeline.path&#45;&gt;trained_pipeline.raw_data -->\n<g id=\"edge20\" class=\"edge\">\n<title>trained_pipeline.path&#45;&gt;trained_pipeline.raw_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M355.97,-158C367.58,-158 379.71,-158 391.76,-158\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"391.75,-161.5 401.75,-158 391.75,-154.5 391.75,-161.5\"/>\n</g>\n<!-- trained_pipeline&#45;&gt;fit_model -->\n<g id=\"edge2\" class=\"edge\">\n<title>trained_pipeline&#45;&gt;fit_model</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1534.9,-140.06C1559.92,-146.96 1588.49,-154.84 1612.5,-161.45\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1611.5,-164.81 1622.07,-164.09 1613.36,-158.06 1611.5,-164.81\"/>\n</g>\n<!-- training_prediction -->\n<g id=\"node18\" class=\"node\">\n<title>training_prediction</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1726.17,-125.8C1726.17,-125.8 1606.33,-125.8 1606.33,-125.8 1600.33,-125.8 1594.33,-119.8 1594.33,-113.8 1594.33,-113.8 1594.33,-74.2 1594.33,-74.2 1594.33,-68.2 1600.33,-62.2 1606.33,-62.2 1606.33,-62.2 1726.17,-62.2 1726.17,-62.2 1732.17,-62.2 1738.17,-68.2 1738.17,-74.2 1738.17,-74.2 1738.17,-113.8 1738.17,-113.8 1738.17,-119.8 1732.17,-125.8 1726.17,-125.8\"/>\n<text text-anchor=\"start\" x=\"1605.12\" y=\"-102.7\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">training_prediction</text>\n<text text-anchor=\"start\" x=\"1632.12\" y=\"-74.7\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- trained_pipeline&#45;&gt;training_prediction -->\n<g id=\"edge19\" class=\"edge\">\n<title>trained_pipeline&#45;&gt;training_prediction</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1534.9,-113.66C1550.24,-111.35 1566.92,-108.83 1583.06,-106.4\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1583.26,-109.91 1592.63,-104.96 1582.22,-102.99 1583.26,-109.91\"/>\n</g>\n<!-- trained_pipeline.raw_data&#45;&gt;trained_pipeline.transformed_data -->\n<g id=\"edge7\" class=\"edge\">\n<title>trained_pipeline.raw_data&#45;&gt;trained_pipeline.transformed_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M591.85,-158C605.44,-158 619.64,-158 633.82,-158\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"633.67,-161.5 643.67,-158 633.67,-154.5 633.67,-161.5\"/>\n</g>\n<!-- _trained_pipeline.model_params_inputs -->\n<g id=\"node20\" class=\"node\">\n<title>_trained_pipeline.model_params_inputs</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"350.7,-98.3 203.35,-98.3 203.35,-53.7 350.7,-53.7 350.7,-98.3\"/>\n<text text-anchor=\"start\" x=\"218.15\" y=\"-70.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">model_params</text>\n<text text-anchor=\"start\" x=\"314.9\" y=\"-70.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">dict</text>\n</g>\n<!-- _trained_pipeline.model_params_inputs&#45;&gt;trained_pipeline.model_params -->\n<g id=\"edge10\" class=\"edge\">\n<title>_trained_pipeline.model_params_inputs&#45;&gt;trained_pipeline.model_params</title>\n<path fill=\"none\" stroke=\"black\" d=\"M351.08,-76C358.24,-76 365.65,-76 373.14,-76\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"373.04,-79.5 383.04,-76 373.04,-72.5 373.04,-79.5\"/>\n</g>\n<!-- _predicted_data.path_inputs -->\n<g id=\"node21\" class=\"node\">\n<title>_predicted_data.path_inputs</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"1318.1,-280.3 1193.25,-280.3 1193.25,-235.7 1318.1,-235.7 1318.1,-280.3\"/>\n<text text-anchor=\"start\" x=\"1208.05\" y=\"-252.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">predict_path</text>\n<text text-anchor=\"start\" x=\"1288.3\" y=\"-252.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n</g>\n<!-- _predicted_data.path_inputs&#45;&gt;predicted_data.path -->\n<g id=\"edge14\" class=\"edge\">\n<title>_predicted_data.path_inputs&#45;&gt;predicted_data.path</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1318.39,-258C1339.34,-258 1363.13,-258 1385.59,-258\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1385.33,-261.5 1395.33,-258 1385.33,-254.5 1385.33,-261.5\"/>\n</g>\n<!-- _trained_pipeline.path_inputs -->\n<g id=\"node22\" class=\"node\">\n<title>_trained_pipeline.path_inputs</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"123.1,-180.3 46.25,-180.3 46.25,-135.7 123.1,-135.7 123.1,-180.3\"/>\n<text text-anchor=\"start\" x=\"61.05\" y=\"-152.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">path</text>\n<text text-anchor=\"start\" x=\"93.3\" y=\"-152.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n</g>\n<!-- _trained_pipeline.path_inputs&#45;&gt;trained_pipeline.path -->\n<g id=\"edge16\" class=\"edge\">\n<title>_trained_pipeline.path_inputs&#45;&gt;trained_pipeline.path</title>\n<path fill=\"none\" stroke=\"black\" d=\"M123.28,-158C141.54,-158 164.33,-158 186.69,-158\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"186.44,-161.5 196.44,-158 186.44,-154.5 186.44,-161.5\"/>\n</g>\n<!-- config -->\n<g id=\"node23\" class=\"node\">\n<title>config</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" points=\"105.68,-316 57.68,-316 57.68,-280 111.68,-280 111.68,-310 105.68,-316\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"105.68,-316 105.68,-310\"/>\n<polyline fill=\"none\" stroke=\"black\" points=\"111.68,-310 105.68,-310\"/>\n<text text-anchor=\"middle\" x=\"84.68\" y=\"-292.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">config</text>\n</g>\n<!-- input -->\n<g id=\"node24\" class=\"node\">\n<title>input</title>\n<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"111.68,-370.3 57.68,-370.3 57.68,-333.7 111.68,-333.7 111.68,-370.3\"/>\n<text text-anchor=\"middle\" x=\"84.68\" y=\"-346.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n</g>\n<!-- function -->\n<g id=\"node25\" class=\"node\">\n<title>function</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M107.1,-425.3C107.1,-425.3 62.25,-425.3 62.25,-425.3 56.25,-425.3 50.25,-419.3 50.25,-413.3 50.25,-413.3 50.25,-400.7 50.25,-400.7 50.25,-394.7 56.25,-388.7 62.25,-388.7 62.25,-388.7 107.1,-388.7 107.1,-388.7 113.1,-388.7 119.1,-394.7 119.1,-400.7 119.1,-400.7 119.1,-413.3 119.1,-413.3 119.1,-419.3 113.1,-425.3 107.1,-425.3\"/>\n<text text-anchor=\"middle\" x=\"84.68\" y=\"-401.2\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n</g>\n</g>\n</svg>\n",
"text/plain": [
"<graphviz.graphs.Digraph at 0x14d647c40>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from hamilton import driver\n",
"\n",
"dr = (\n",
" driver.Builder()\n",
" .with_config({\"model\": \"RandomForest\", \"model_params\": {\"n_estimators\": 100}})\n",
" .with_modules(pipeline)\n",
" .build()\n",
")\n",
"dr.display_all_functions()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "b3abca24b1a86329",
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-07T06:57:34.959772Z",
"start_time": "2024-12-07T06:57:34.956204Z"
}
},
"outputs": [],
"source": [
"# this wont work because we don't actually have data...\n",
"# dr.execute([\"trained_pipeline\", \"predicted_data\"],\n",
"# inputs={\"path\": \"data.csv\", \"predict_path\": \"data.csv\"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1b3dba37a6c00d7c",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}