blob: 71b418fdd82ee1d1bc768bc56010dedd4e1504a9 [file] [log] [blame]
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "dc0e298d",
"metadata": {},
"outputs": [],
"source": [
"# Execute this cell to install dependencies\n",
"%pip install sf-hamilton[visualization]"
]
},
{
"cell_type": "markdown",
"id": "f937d835",
"metadata": {},
"source": [
"# OpenLineage example pipeline [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/openlineage/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/dagworks-inc/hamilton/blob/main/examples/openlineage/notebook.ipynb)\n",
"\n",
"\n",
"This is a simple example of a pipeline that reads data from a file and a database, joins them, fits a model, and saves the model to a file and the joined data to a database. The pipeline does not import open lineage, and doesn't need to know about it. The salient point is that metadata is exposed by the data loading and data\n",
"saving functions. This is what is used to populated OpenLineage events."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ab02bddb",
"metadata": {},
"outputs": [],
"source": [
"%load_ext hamilton.plugins.jupyter_magic"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2ccc7699",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-06T17:30:38.628295Z",
"start_time": "2024-09-06T17:30:38.182382Z"
}
},
"outputs": [
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 12.0.0 (20240704.0754)\n",
" -->\n",
"<!-- Pages: 1 -->\n",
"<svg width=\"1233pt\" height=\"387pt\"\n",
" viewBox=\"0.00 0.00 1232.70 386.73\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 382.73)\">\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-382.73 1228.7,-382.73 1228.7,4 -4,4\"/>\n",
"<g id=\"clust1\" class=\"cluster\">\n",
"<title>cluster__legend</title>\n",
"<polygon fill=\"#ffffff\" stroke=\"black\" points=\"8.75,-181.72 8.75,-370.73 116.85,-370.73 116.85,-181.72 8.75,-181.72\"/>\n",
"<text text-anchor=\"middle\" x=\"62.8\" y=\"-353.43\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n",
"</g>\n",
"<!-- purchase_dataset.loader -->\n",
"<g id=\"node1\" class=\"node\">\n",
"<title>purchase_dataset.loader</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M335.2,-79.5C335.2,-83.89 294.73,-87.45 244.9,-87.45 195.07,-87.45 154.6,-83.89 154.6,-79.5 154.6,-79.5 154.6,-7.95 154.6,-7.95 154.6,-3.56 195.07,0 244.9,0 294.73,0 335.2,-3.56 335.2,-7.95 335.2,-7.95 335.2,-79.5 335.2,-79.5\"/>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M335.2,-79.5C335.2,-75.11 294.73,-71.55 244.9,-71.55 195.07,-71.55 154.6,-75.11 154.6,-79.5\"/>\n",
"<text text-anchor=\"start\" x=\"165.4\" y=\"-52.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">purchase_dataset.loader</text>\n",
"<text text-anchor=\"start\" x=\"186.77\" y=\"-24.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">purchase_dataset()</text>\n",
"</g>\n",
"<!-- purchase_dataset -->\n",
"<g id=\"node5\" class=\"node\">\n",
"<title>purchase_dataset</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M488.55,-86.52C488.55,-86.52 376.2,-86.52 376.2,-86.52 370.2,-86.52 364.2,-80.52 364.2,-74.52 364.2,-74.52 364.2,-34.92 364.2,-34.92 364.2,-28.92 370.2,-22.93 376.2,-22.93 376.2,-22.93 488.55,-22.93 488.55,-22.93 494.55,-22.93 500.55,-28.93 500.55,-34.92 500.55,-34.92 500.55,-74.53 500.55,-74.53 500.55,-80.52 494.55,-86.52 488.55,-86.52\"/>\n",
"<text text-anchor=\"start\" x=\"375\" y=\"-63.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">purchase_dataset</text>\n",
"<text text-anchor=\"start\" x=\"398.25\" y=\"-35.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n",
"</g>\n",
"<!-- purchase_dataset.loader&#45;&gt;purchase_dataset -->\n",
"<g id=\"edge6\" class=\"edge\">\n",
"<title>purchase_dataset.loader&#45;&gt;purchase_dataset</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M335.65,-49.05C341.3,-49.38 346.97,-49.72 352.57,-50.05\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"352.24,-53.54 362.43,-50.64 352.66,-46.55 352.24,-53.54\"/>\n",
"</g>\n",
"<!-- saved_to_db -->\n",
"<g id=\"node2\" class=\"node\">\n",
"<title>saved_to_db</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1104.35,-256.5C1104.35,-260.89 1080.18,-264.45 1050.42,-264.45 1020.67,-264.45 996.5,-260.89 996.5,-256.5 996.5,-256.5 996.5,-184.95 996.5,-184.95 996.5,-180.56 1020.67,-177 1050.42,-177 1080.18,-177 1104.35,-180.56 1104.35,-184.95 1104.35,-184.95 1104.35,-256.5 1104.35,-256.5\"/>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1104.35,-256.5C1104.35,-252.11 1080.18,-248.55 1050.42,-248.55 1020.67,-248.55 996.5,-252.11 996.5,-256.5\"/>\n",
"<text text-anchor=\"start\" x=\"1009.17\" y=\"-229.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">saved_to_db</text>\n",
"<text text-anchor=\"start\" x=\"1007.3\" y=\"-201.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">saved_to_db()</text>\n",
"</g>\n",
"<!-- user_dataset.loader -->\n",
"<g id=\"node3\" class=\"node\">\n",
"<title>user_dataset.loader</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M319.45,-184.5C319.45,-188.89 286.04,-192.45 244.9,-192.45 203.76,-192.45 170.35,-188.89 170.35,-184.5 170.35,-184.5 170.35,-112.95 170.35,-112.95 170.35,-108.56 203.76,-105 244.9,-105 286.04,-105 319.45,-108.56 319.45,-112.95 319.45,-112.95 319.45,-184.5 319.45,-184.5\"/>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M319.45,-184.5C319.45,-180.11 286.04,-176.55 244.9,-176.55 203.76,-176.55 170.35,-180.11 170.35,-184.5\"/>\n",
"<text text-anchor=\"start\" x=\"181.15\" y=\"-157.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">user_dataset.loader</text>\n",
"<text text-anchor=\"start\" x=\"201.4\" y=\"-129.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">user_dataset()</text>\n",
"</g>\n",
"<!-- user_dataset -->\n",
"<g id=\"node4\" class=\"node\">\n",
"<title>user_dataset</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M472.8,-178.52C472.8,-178.52 391.95,-178.52 391.95,-178.52 385.95,-178.52 379.95,-172.52 379.95,-166.52 379.95,-166.52 379.95,-126.92 379.95,-126.92 379.95,-120.92 385.95,-114.92 391.95,-114.92 391.95,-114.92 472.8,-114.92 472.8,-114.92 478.8,-114.92 484.8,-120.92 484.8,-126.92 484.8,-126.92 484.8,-166.52 484.8,-166.52 484.8,-172.52 478.8,-178.52 472.8,-178.52\"/>\n",
"<text text-anchor=\"start\" x=\"390.75\" y=\"-155.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">user_dataset</text>\n",
"<text text-anchor=\"start\" x=\"398.25\" y=\"-127.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n",
"</g>\n",
"<!-- user_dataset.loader&#45;&gt;user_dataset -->\n",
"<g id=\"edge5\" class=\"edge\">\n",
"<title>user_dataset.loader&#45;&gt;user_dataset</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M319.74,-147.93C335.83,-147.76 352.72,-147.57 368.35,-147.4\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"368.01,-150.91 377.97,-147.3 367.93,-143.91 368.01,-150.91\"/>\n",
"</g>\n",
"<!-- transformed_user_dataset -->\n",
"<g id=\"node9\" class=\"node\">\n",
"<title>transformed_user_dataset</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M725.15,-177.52C725.15,-177.52 557.3,-177.52 557.3,-177.52 551.3,-177.52 545.3,-171.52 545.3,-165.52 545.3,-165.52 545.3,-125.92 545.3,-125.92 545.3,-119.92 551.3,-113.92 557.3,-113.92 557.3,-113.92 725.15,-113.92 725.15,-113.92 731.15,-113.92 737.15,-119.92 737.15,-125.92 737.15,-125.92 737.15,-165.52 737.15,-165.52 737.15,-171.52 731.15,-177.52 725.15,-177.52\"/>\n",
"<text text-anchor=\"start\" x=\"556.1\" y=\"-154.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">transformed_user_dataset</text>\n",
"<text text-anchor=\"start\" x=\"607.1\" y=\"-126.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n",
"</g>\n",
"<!-- user_dataset&#45;&gt;transformed_user_dataset -->\n",
"<g id=\"edge11\" class=\"edge\">\n",
"<title>user_dataset&#45;&gt;transformed_user_dataset</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M485.04,-146.48C499.96,-146.4 516.82,-146.32 533.78,-146.24\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"533.43,-149.74 543.41,-146.19 533.4,-142.74 533.43,-149.74\"/>\n",
"</g>\n",
"<!-- transformed_purchase_dataset -->\n",
"<g id=\"node6\" class=\"node\">\n",
"<title>transformed_purchase_dataset</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M740.9,-92.52C740.9,-92.52 541.55,-92.52 541.55,-92.52 535.55,-92.52 529.55,-86.52 529.55,-80.52 529.55,-80.52 529.55,-40.92 529.55,-40.92 529.55,-34.92 535.55,-28.93 541.55,-28.93 541.55,-28.93 740.9,-28.93 740.9,-28.93 746.9,-28.93 752.9,-34.92 752.9,-40.92 752.9,-40.92 752.9,-80.53 752.9,-80.53 752.9,-86.52 746.9,-92.52 740.9,-92.52\"/>\n",
"<text text-anchor=\"start\" x=\"540.35\" y=\"-69.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">transformed_purchase_dataset</text>\n",
"<text text-anchor=\"start\" x=\"607.1\" y=\"-41.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n",
"</g>\n",
"<!-- purchase_dataset&#45;&gt;transformed_purchase_dataset -->\n",
"<g id=\"edge7\" class=\"edge\">\n",
"<title>purchase_dataset&#45;&gt;transformed_purchase_dataset</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M500.82,-56.68C506.42,-56.84 512.19,-57.01 518.04,-57.18\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"517.75,-60.67 527.85,-57.47 517.95,-53.68 517.75,-60.67\"/>\n",
"</g>\n",
"<!-- joined_dataset -->\n",
"<g id=\"node10\" class=\"node\">\n",
"<title>joined_dataset</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M920.75,-133.52C920.75,-133.52 828.65,-133.52 828.65,-133.52 822.65,-133.52 816.65,-127.52 816.65,-121.52 816.65,-121.52 816.65,-81.92 816.65,-81.92 816.65,-75.92 822.65,-69.92 828.65,-69.92 828.65,-69.92 920.75,-69.92 920.75,-69.92 926.75,-69.92 932.75,-75.92 932.75,-81.92 932.75,-81.92 932.75,-121.52 932.75,-121.52 932.75,-127.52 926.75,-133.52 920.75,-133.52\"/>\n",
"<text text-anchor=\"start\" x=\"827.45\" y=\"-110.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">joined_dataset</text>\n",
"<text text-anchor=\"start\" x=\"840.57\" y=\"-82.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n",
"</g>\n",
"<!-- transformed_purchase_dataset&#45;&gt;joined_dataset -->\n",
"<g id=\"edge13\" class=\"edge\">\n",
"<title>transformed_purchase_dataset&#45;&gt;joined_dataset</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M753.22,-80.38C770.94,-83.52 788.81,-86.69 805.17,-89.59\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"804.28,-92.98 814.74,-91.28 805.5,-86.09 804.28,-92.98\"/>\n",
"</g>\n",
"<!-- saved_file -->\n",
"<g id=\"node7\" class=\"node\">\n",
"<title>saved_file</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1224.7,-136.5C1224.7,-140.89 1204.23,-144.45 1179.02,-144.45 1153.82,-144.45 1133.35,-140.89 1133.35,-136.5 1133.35,-136.5 1133.35,-64.95 1133.35,-64.95 1133.35,-60.56 1153.82,-57 1179.02,-57 1204.23,-57 1224.7,-60.56 1224.7,-64.95 1224.7,-64.95 1224.7,-136.5 1224.7,-136.5\"/>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1224.7,-136.5C1224.7,-132.11 1204.23,-128.55 1179.02,-128.55 1153.82,-128.55 1133.35,-132.11 1133.35,-136.5\"/>\n",
"<text text-anchor=\"start\" x=\"1146.4\" y=\"-109.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">saved_file</text>\n",
"<text text-anchor=\"start\" x=\"1144.15\" y=\"-81.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">saved_file()</text>\n",
"</g>\n",
"<!-- fit_model -->\n",
"<g id=\"node8\" class=\"node\">\n",
"<title>fit_model</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1087.85,-96.52C1087.85,-96.52 1013,-96.52 1013,-96.52 1007,-96.52 1001,-90.52 1001,-84.52 1001,-84.52 1001,-44.92 1001,-44.92 1001,-38.92 1007,-32.92 1013,-32.92 1013,-32.92 1087.85,-32.92 1087.85,-32.92 1093.85,-32.92 1099.85,-38.92 1099.85,-44.92 1099.85,-44.92 1099.85,-84.53 1099.85,-84.53 1099.85,-90.52 1093.85,-96.52 1087.85,-96.52\"/>\n",
"<text text-anchor=\"start\" x=\"1020.05\" y=\"-73.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">fit_model</text>\n",
"<text text-anchor=\"start\" x=\"1011.8\" y=\"-45.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">ModelObject</text>\n",
"</g>\n",
"<!-- fit_model&#45;&gt;saved_file -->\n",
"<g id=\"edge8\" class=\"edge\">\n",
"<title>fit_model&#45;&gt;saved_file</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1100.01,-78.54C1107.23,-80.59 1114.73,-82.73 1122.09,-84.82\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1121.07,-88.17 1131.65,-87.54 1122.98,-81.43 1121.07,-88.17\"/>\n",
"</g>\n",
"<!-- transformed_user_dataset&#45;&gt;joined_dataset -->\n",
"<g id=\"edge12\" class=\"edge\">\n",
"<title>transformed_user_dataset&#45;&gt;joined_dataset</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M737.38,-127.64C760.1,-123.32 783.92,-118.79 805.18,-114.75\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"805.71,-118.21 814.88,-112.91 804.4,-111.33 805.71,-118.21\"/>\n",
"</g>\n",
"<!-- joined_dataset&#45;&gt;saved_to_db -->\n",
"<g id=\"edge2\" class=\"edge\">\n",
"<title>joined_dataset&#45;&gt;saved_to_db</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M922.71,-133.93C942.5,-147.48 965.8,-163.44 986.99,-177.96\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"984.76,-180.68 994.99,-183.44 988.72,-174.9 984.76,-180.68\"/>\n",
"</g>\n",
"<!-- joined_dataset&#45;&gt;fit_model -->\n",
"<g id=\"edge10\" class=\"edge\">\n",
"<title>joined_dataset&#45;&gt;fit_model</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M933.07,-89.51C951.15,-85.65 971.18,-81.39 989.44,-77.5\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"990.02,-80.95 999.07,-75.45 988.57,-74.11 990.02,-80.95\"/>\n",
"</g>\n",
"<!-- _purchase_dataset.loader_inputs -->\n",
"<g id=\"node11\" class=\"node\">\n",
"<title>_purchase_dataset.loader_inputs</title>\n",
"<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"125.6,-66.02 0,-66.02 0,-21.42 125.6,-21.42 125.6,-66.02\"/>\n",
"<text text-anchor=\"start\" x=\"14.8\" y=\"-37.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">db_client</text>\n",
"<text text-anchor=\"start\" x=\"74.8\" y=\"-37.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">object</text>\n",
"</g>\n",
"<!-- _purchase_dataset.loader_inputs&#45;&gt;purchase_dataset.loader -->\n",
"<g id=\"edge1\" class=\"edge\">\n",
"<title>_purchase_dataset.loader_inputs&#45;&gt;purchase_dataset.loader</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M126.03,-43.72C131.61,-43.72 137.34,-43.72 143.14,-43.72\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"142.81,-47.23 152.81,-43.73 142.81,-40.23 142.81,-47.23\"/>\n",
"</g>\n",
"<!-- _saved_to_db_inputs -->\n",
"<g id=\"node12\" class=\"node\">\n",
"<title>_saved_to_db_inputs</title>\n",
"<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"967.5,-253.52 781.9,-253.52 781.9,-187.93 967.5,-187.93 967.5,-253.52\"/>\n",
"<text text-anchor=\"start\" x=\"826.7\" y=\"-225.42\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">db_client</text>\n",
"<text text-anchor=\"start\" x=\"916.7\" y=\"-225.42\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">object</text>\n",
"<text text-anchor=\"start\" x=\"796.32\" y=\"-204.42\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">joined_table_name</text>\n",
"<text text-anchor=\"start\" x=\"927.2\" y=\"-204.42\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n",
"</g>\n",
"<!-- _saved_to_db_inputs&#45;&gt;saved_to_db -->\n",
"<g id=\"edge3\" class=\"edge\">\n",
"<title>_saved_to_db_inputs&#45;&gt;saved_to_db</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M967.9,-220.72C973.69,-220.72 979.44,-220.72 985.06,-220.72\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"984.86,-224.23 994.86,-220.73 984.86,-217.23 984.86,-224.23\"/>\n",
"</g>\n",
"<!-- _user_dataset.loader_inputs -->\n",
"<g id=\"node13\" class=\"node\">\n",
"<title>_user_dataset.loader_inputs</title>\n",
"<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"124.47,-171.02 1.12,-171.02 1.12,-126.42 124.47,-126.42 124.47,-171.02\"/>\n",
"<text text-anchor=\"start\" x=\"15.92\" y=\"-142.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">file_ds_path</text>\n",
"<text text-anchor=\"start\" x=\"94.67\" y=\"-142.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n",
"</g>\n",
"<!-- _user_dataset.loader_inputs&#45;&gt;user_dataset.loader -->\n",
"<g id=\"edge4\" class=\"edge\">\n",
"<title>_user_dataset.loader_inputs&#45;&gt;user_dataset.loader</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M124.77,-148.72C135.64,-148.72 147.14,-148.72 158.54,-148.72\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"158.37,-152.23 168.37,-148.73 158.37,-145.23 158.37,-152.23\"/>\n",
"</g>\n",
"<!-- _saved_file_inputs -->\n",
"<g id=\"node14\" class=\"node\">\n",
"<title>_saved_file_inputs</title>\n",
"<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"1101.22,-159.02 999.62,-159.02 999.62,-114.42 1101.22,-114.42 1101.22,-159.02\"/>\n",
"<text text-anchor=\"start\" x=\"1014.42\" y=\"-130.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">file_path</text>\n",
"<text text-anchor=\"start\" x=\"1071.42\" y=\"-130.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n",
"</g>\n",
"<!-- _saved_file_inputs&#45;&gt;saved_file -->\n",
"<g id=\"edge9\" class=\"edge\">\n",
"<title>_saved_file_inputs&#45;&gt;saved_file</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1101.46,-122.5C1108.26,-120.56 1115.28,-118.57 1122.18,-116.61\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1122.76,-120.08 1131.42,-113.98 1120.84,-113.35 1122.76,-120.08\"/>\n",
"</g>\n",
"<!-- input -->\n",
"<g id=\"node15\" class=\"node\">\n",
"<title>input</title>\n",
"<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"89.8,-226.02 35.8,-226.02 35.8,-189.43 89.8,-189.43 89.8,-226.02\"/>\n",
"<text text-anchor=\"middle\" x=\"62.8\" y=\"-201.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n",
"</g>\n",
"<!-- function -->\n",
"<g id=\"node16\" class=\"node\">\n",
"<title>function</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M85.22,-281.03C85.22,-281.03 40.37,-281.03 40.37,-281.03 34.37,-281.03 28.37,-275.03 28.37,-269.03 28.37,-269.03 28.37,-256.43 28.37,-256.43 28.37,-250.43 34.37,-244.43 40.37,-244.43 40.37,-244.43 85.22,-244.43 85.22,-244.43 91.22,-244.43 97.22,-250.43 97.22,-256.43 97.22,-256.43 97.22,-269.03 97.22,-269.03 97.22,-275.03 91.22,-281.03 85.22,-281.03\"/>\n",
"<text text-anchor=\"middle\" x=\"62.8\" y=\"-256.93\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n",
"</g>\n",
"<!-- materializer -->\n",
"<g id=\"node17\" class=\"node\">\n",
"<title>materializer</title>\n",
"<path fill=\"#ffffff\" stroke=\"black\" d=\"M108.85,-336.26C108.85,-338.29 88.21,-339.94 62.8,-339.94 37.39,-339.94 16.75,-338.29 16.75,-336.26 16.75,-336.26 16.75,-303.19 16.75,-303.19 16.75,-301.16 37.39,-299.51 62.8,-299.51 88.21,-299.51 108.85,-301.16 108.85,-303.19 108.85,-303.19 108.85,-336.26 108.85,-336.26\"/>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M108.85,-336.26C108.85,-334.23 88.21,-332.59 62.8,-332.59 37.39,-332.59 16.75,-334.23 16.75,-336.26\"/>\n",
"<text text-anchor=\"middle\" x=\"62.8\" y=\"-313.93\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">materializer</text>\n",
"</g>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.graphs.Digraph at 0x1575446d0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%cell_to_module pipeline --display\n",
"\n",
"import pickle\n",
"from typing import Tuple\n",
"\n",
"import pandas as pd\n",
"\n",
"from hamilton.function_modifiers import dataloader, datasaver\n",
"from hamilton.io import utils\n",
"\n",
"@dataloader()\n",
"def user_dataset(file_ds_path: str) -> Tuple[pd.DataFrame, dict]:\n",
" df = pd.read_csv(file_ds_path)\n",
" return df, utils.get_file_and_dataframe_metadata(file_ds_path, df)\n",
"\n",
"\n",
"@dataloader()\n",
"def purchase_dataset(db_client: object) -> Tuple[pd.DataFrame, dict]:\n",
" query = \"SELECT * FROM purchase_data\"\n",
" df = pd.read_sql(query, con=db_client)\n",
" metadata = {\n",
" \"sql_metadata\": {\"query\": query, \"table_name\": \"purchase_data\", \"database\": \"sqlite\"}\n",
" }\n",
" metadata.update(utils.get_dataframe_metadata(df))\n",
" return df, metadata\n",
"\n",
"\n",
"def transformed_user_dataset(user_dataset: pd.DataFrame) -> pd.DataFrame:\n",
" return user_dataset\n",
"\n",
"\n",
"def transformed_purchase_dataset(purchase_dataset: pd.DataFrame) -> pd.DataFrame:\n",
" return purchase_dataset\n",
"\n",
"\n",
"def joined_dataset(\n",
" transformed_user_dataset: pd.DataFrame, transformed_purchase_dataset: pd.DataFrame\n",
") -> pd.DataFrame:\n",
" joined = pd.merge(transformed_user_dataset,\n",
" transformed_purchase_dataset,\n",
" left_on=\"id\", right_on=\"user_id\")\n",
" del joined[\"id_x\"]\n",
" del joined[\"id_y\"]\n",
" return joined\n",
"\n",
"\n",
"class ModelObject:\n",
" def __init__(self):\n",
" pass\n",
"\n",
" def predict(self, data):\n",
" return data + 1\n",
"\n",
"\n",
"def fit_model(joined_dataset: pd.DataFrame) -> ModelObject:\n",
" # model = ...\n",
" return ModelObject()\n",
"\n",
"\n",
"@datasaver()\n",
"def saved_file(fit_model: ModelObject, file_path: str) -> dict:\n",
" with open(file_path, \"wb\") as f:\n",
" pickle.dump(fit_model, f)\n",
" return utils.get_file_metadata(file_path)\n",
"\n",
"\n",
"@datasaver()\n",
"def saved_to_db(joined_dataset: pd.DataFrame, db_client: object, joined_table_name: str) -> dict:\n",
" joined_dataset.to_sql(joined_table_name, con=db_client, index=False, if_exists=\"replace\")\n",
" # raise ValueError(\"Hi\")\n",
" metadata = utils.get_sql_metadata(joined_table_name, joined_dataset)\n",
" metadata.update(utils.get_dataframe_metadata(joined_dataset))\n",
" return metadata"
]
},
{
"cell_type": "markdown",
"id": "50a7f746",
"metadata": {},
"source": [
"# Create OpenLineage client"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "f7dcecc8",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-06T17:31:52.088986Z",
"start_time": "2024-09-06T17:31:51.984490Z"
}
},
"outputs": [],
"source": [
"from openlineage.client import OpenLineageClient\n",
"from openlineage.client.transport.file import FileConfig, FileTransport\n",
"\n",
"# if you don't have a running OpenLineage server, you can use the FileTransport\n",
"file_config = FileConfig(\n",
" log_file_path=\"pipeline.json\",\n",
" append=True,\n",
")\n",
"\n",
"# if you have a running OpenLineage server, e.g. marquez, uncomment this line.\n",
"# client = OpenLineageClient(url=\"http://localhost:9000\")\n",
"client = OpenLineageClient(transport=FileTransport(file_config))"
]
},
{
"cell_type": "markdown",
"id": "4a49f34a",
"metadata": {},
"source": [
"# Create Hamilton DAG with OpenLineage Adapter"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "6db87906",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-06T17:32:31.294469Z",
"start_time": "2024-09-06T17:32:30.852466Z"
}
},
"outputs": [
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 12.0.0 (20240704.0754)\n",
" -->\n",
"<!-- Pages: 1 -->\n",
"<svg width=\"1233pt\" height=\"387pt\"\n",
" viewBox=\"0.00 0.00 1232.70 386.73\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 382.73)\">\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-382.73 1228.7,-382.73 1228.7,4 -4,4\"/>\n",
"<g id=\"clust1\" class=\"cluster\">\n",
"<title>cluster__legend</title>\n",
"<polygon fill=\"#ffffff\" stroke=\"black\" points=\"8.75,-181.72 8.75,-370.73 116.85,-370.73 116.85,-181.72 8.75,-181.72\"/>\n",
"<text text-anchor=\"middle\" x=\"62.8\" y=\"-353.43\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n",
"</g>\n",
"<!-- purchase_dataset.loader -->\n",
"<g id=\"node1\" class=\"node\">\n",
"<title>purchase_dataset.loader</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M335.2,-79.5C335.2,-83.89 294.73,-87.45 244.9,-87.45 195.07,-87.45 154.6,-83.89 154.6,-79.5 154.6,-79.5 154.6,-7.95 154.6,-7.95 154.6,-3.56 195.07,0 244.9,0 294.73,0 335.2,-3.56 335.2,-7.95 335.2,-7.95 335.2,-79.5 335.2,-79.5\"/>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M335.2,-79.5C335.2,-75.11 294.73,-71.55 244.9,-71.55 195.07,-71.55 154.6,-75.11 154.6,-79.5\"/>\n",
"<text text-anchor=\"start\" x=\"165.4\" y=\"-52.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">purchase_dataset.loader</text>\n",
"<text text-anchor=\"start\" x=\"186.77\" y=\"-24.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">purchase_dataset()</text>\n",
"</g>\n",
"<!-- purchase_dataset -->\n",
"<g id=\"node5\" class=\"node\">\n",
"<title>purchase_dataset</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M488.55,-86.52C488.55,-86.52 376.2,-86.52 376.2,-86.52 370.2,-86.52 364.2,-80.52 364.2,-74.52 364.2,-74.52 364.2,-34.92 364.2,-34.92 364.2,-28.92 370.2,-22.93 376.2,-22.93 376.2,-22.93 488.55,-22.93 488.55,-22.93 494.55,-22.93 500.55,-28.93 500.55,-34.92 500.55,-34.92 500.55,-74.53 500.55,-74.53 500.55,-80.52 494.55,-86.52 488.55,-86.52\"/>\n",
"<text text-anchor=\"start\" x=\"375\" y=\"-63.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">purchase_dataset</text>\n",
"<text text-anchor=\"start\" x=\"398.25\" y=\"-35.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n",
"</g>\n",
"<!-- purchase_dataset.loader&#45;&gt;purchase_dataset -->\n",
"<g id=\"edge6\" class=\"edge\">\n",
"<title>purchase_dataset.loader&#45;&gt;purchase_dataset</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M335.65,-49.05C341.3,-49.38 346.97,-49.72 352.57,-50.05\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"352.24,-53.54 362.43,-50.64 352.66,-46.55 352.24,-53.54\"/>\n",
"</g>\n",
"<!-- saved_to_db -->\n",
"<g id=\"node2\" class=\"node\">\n",
"<title>saved_to_db</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1104.35,-256.5C1104.35,-260.89 1080.18,-264.45 1050.42,-264.45 1020.67,-264.45 996.5,-260.89 996.5,-256.5 996.5,-256.5 996.5,-184.95 996.5,-184.95 996.5,-180.56 1020.67,-177 1050.42,-177 1080.18,-177 1104.35,-180.56 1104.35,-184.95 1104.35,-184.95 1104.35,-256.5 1104.35,-256.5\"/>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1104.35,-256.5C1104.35,-252.11 1080.18,-248.55 1050.42,-248.55 1020.67,-248.55 996.5,-252.11 996.5,-256.5\"/>\n",
"<text text-anchor=\"start\" x=\"1009.17\" y=\"-229.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">saved_to_db</text>\n",
"<text text-anchor=\"start\" x=\"1007.3\" y=\"-201.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">saved_to_db()</text>\n",
"</g>\n",
"<!-- user_dataset.loader -->\n",
"<g id=\"node3\" class=\"node\">\n",
"<title>user_dataset.loader</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M319.45,-184.5C319.45,-188.89 286.04,-192.45 244.9,-192.45 203.76,-192.45 170.35,-188.89 170.35,-184.5 170.35,-184.5 170.35,-112.95 170.35,-112.95 170.35,-108.56 203.76,-105 244.9,-105 286.04,-105 319.45,-108.56 319.45,-112.95 319.45,-112.95 319.45,-184.5 319.45,-184.5\"/>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M319.45,-184.5C319.45,-180.11 286.04,-176.55 244.9,-176.55 203.76,-176.55 170.35,-180.11 170.35,-184.5\"/>\n",
"<text text-anchor=\"start\" x=\"181.15\" y=\"-157.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">user_dataset.loader</text>\n",
"<text text-anchor=\"start\" x=\"201.4\" y=\"-129.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">user_dataset()</text>\n",
"</g>\n",
"<!-- user_dataset -->\n",
"<g id=\"node4\" class=\"node\">\n",
"<title>user_dataset</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M472.8,-178.52C472.8,-178.52 391.95,-178.52 391.95,-178.52 385.95,-178.52 379.95,-172.52 379.95,-166.52 379.95,-166.52 379.95,-126.92 379.95,-126.92 379.95,-120.92 385.95,-114.92 391.95,-114.92 391.95,-114.92 472.8,-114.92 472.8,-114.92 478.8,-114.92 484.8,-120.92 484.8,-126.92 484.8,-126.92 484.8,-166.52 484.8,-166.52 484.8,-172.52 478.8,-178.52 472.8,-178.52\"/>\n",
"<text text-anchor=\"start\" x=\"390.75\" y=\"-155.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">user_dataset</text>\n",
"<text text-anchor=\"start\" x=\"398.25\" y=\"-127.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n",
"</g>\n",
"<!-- user_dataset.loader&#45;&gt;user_dataset -->\n",
"<g id=\"edge5\" class=\"edge\">\n",
"<title>user_dataset.loader&#45;&gt;user_dataset</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M319.74,-147.93C335.83,-147.76 352.72,-147.57 368.35,-147.4\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"368.01,-150.91 377.97,-147.3 367.93,-143.91 368.01,-150.91\"/>\n",
"</g>\n",
"<!-- transformed_user_dataset -->\n",
"<g id=\"node9\" class=\"node\">\n",
"<title>transformed_user_dataset</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M725.15,-177.52C725.15,-177.52 557.3,-177.52 557.3,-177.52 551.3,-177.52 545.3,-171.52 545.3,-165.52 545.3,-165.52 545.3,-125.92 545.3,-125.92 545.3,-119.92 551.3,-113.92 557.3,-113.92 557.3,-113.92 725.15,-113.92 725.15,-113.92 731.15,-113.92 737.15,-119.92 737.15,-125.92 737.15,-125.92 737.15,-165.52 737.15,-165.52 737.15,-171.52 731.15,-177.52 725.15,-177.52\"/>\n",
"<text text-anchor=\"start\" x=\"556.1\" y=\"-154.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">transformed_user_dataset</text>\n",
"<text text-anchor=\"start\" x=\"607.1\" y=\"-126.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n",
"</g>\n",
"<!-- user_dataset&#45;&gt;transformed_user_dataset -->\n",
"<g id=\"edge11\" class=\"edge\">\n",
"<title>user_dataset&#45;&gt;transformed_user_dataset</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M485.04,-146.48C499.96,-146.4 516.82,-146.32 533.78,-146.24\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"533.43,-149.74 543.41,-146.19 533.4,-142.74 533.43,-149.74\"/>\n",
"</g>\n",
"<!-- transformed_purchase_dataset -->\n",
"<g id=\"node6\" class=\"node\">\n",
"<title>transformed_purchase_dataset</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M740.9,-92.52C740.9,-92.52 541.55,-92.52 541.55,-92.52 535.55,-92.52 529.55,-86.52 529.55,-80.52 529.55,-80.52 529.55,-40.92 529.55,-40.92 529.55,-34.92 535.55,-28.93 541.55,-28.93 541.55,-28.93 740.9,-28.93 740.9,-28.93 746.9,-28.93 752.9,-34.92 752.9,-40.92 752.9,-40.92 752.9,-80.53 752.9,-80.53 752.9,-86.52 746.9,-92.52 740.9,-92.52\"/>\n",
"<text text-anchor=\"start\" x=\"540.35\" y=\"-69.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">transformed_purchase_dataset</text>\n",
"<text text-anchor=\"start\" x=\"607.1\" y=\"-41.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n",
"</g>\n",
"<!-- purchase_dataset&#45;&gt;transformed_purchase_dataset -->\n",
"<g id=\"edge7\" class=\"edge\">\n",
"<title>purchase_dataset&#45;&gt;transformed_purchase_dataset</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M500.82,-56.68C506.42,-56.84 512.19,-57.01 518.04,-57.18\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"517.75,-60.67 527.85,-57.47 517.95,-53.68 517.75,-60.67\"/>\n",
"</g>\n",
"<!-- joined_dataset -->\n",
"<g id=\"node10\" class=\"node\">\n",
"<title>joined_dataset</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M920.75,-133.52C920.75,-133.52 828.65,-133.52 828.65,-133.52 822.65,-133.52 816.65,-127.52 816.65,-121.52 816.65,-121.52 816.65,-81.92 816.65,-81.92 816.65,-75.92 822.65,-69.92 828.65,-69.92 828.65,-69.92 920.75,-69.92 920.75,-69.92 926.75,-69.92 932.75,-75.92 932.75,-81.92 932.75,-81.92 932.75,-121.52 932.75,-121.52 932.75,-127.52 926.75,-133.52 920.75,-133.52\"/>\n",
"<text text-anchor=\"start\" x=\"827.45\" y=\"-110.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">joined_dataset</text>\n",
"<text text-anchor=\"start\" x=\"840.57\" y=\"-82.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n",
"</g>\n",
"<!-- transformed_purchase_dataset&#45;&gt;joined_dataset -->\n",
"<g id=\"edge13\" class=\"edge\">\n",
"<title>transformed_purchase_dataset&#45;&gt;joined_dataset</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M753.22,-80.38C770.94,-83.52 788.81,-86.69 805.17,-89.59\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"804.28,-92.98 814.74,-91.28 805.5,-86.09 804.28,-92.98\"/>\n",
"</g>\n",
"<!-- saved_file -->\n",
"<g id=\"node7\" class=\"node\">\n",
"<title>saved_file</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1224.7,-136.5C1224.7,-140.89 1204.23,-144.45 1179.02,-144.45 1153.82,-144.45 1133.35,-140.89 1133.35,-136.5 1133.35,-136.5 1133.35,-64.95 1133.35,-64.95 1133.35,-60.56 1153.82,-57 1179.02,-57 1204.23,-57 1224.7,-60.56 1224.7,-64.95 1224.7,-64.95 1224.7,-136.5 1224.7,-136.5\"/>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1224.7,-136.5C1224.7,-132.11 1204.23,-128.55 1179.02,-128.55 1153.82,-128.55 1133.35,-132.11 1133.35,-136.5\"/>\n",
"<text text-anchor=\"start\" x=\"1146.4\" y=\"-109.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">saved_file</text>\n",
"<text text-anchor=\"start\" x=\"1144.15\" y=\"-81.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">saved_file()</text>\n",
"</g>\n",
"<!-- fit_model -->\n",
"<g id=\"node8\" class=\"node\">\n",
"<title>fit_model</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1087.85,-96.52C1087.85,-96.52 1013,-96.52 1013,-96.52 1007,-96.52 1001,-90.52 1001,-84.52 1001,-84.52 1001,-44.92 1001,-44.92 1001,-38.92 1007,-32.92 1013,-32.92 1013,-32.92 1087.85,-32.92 1087.85,-32.92 1093.85,-32.92 1099.85,-38.92 1099.85,-44.92 1099.85,-44.92 1099.85,-84.53 1099.85,-84.53 1099.85,-90.52 1093.85,-96.52 1087.85,-96.52\"/>\n",
"<text text-anchor=\"start\" x=\"1020.05\" y=\"-73.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">fit_model</text>\n",
"<text text-anchor=\"start\" x=\"1011.8\" y=\"-45.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">ModelObject</text>\n",
"</g>\n",
"<!-- fit_model&#45;&gt;saved_file -->\n",
"<g id=\"edge8\" class=\"edge\">\n",
"<title>fit_model&#45;&gt;saved_file</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1100.01,-78.54C1107.23,-80.59 1114.73,-82.73 1122.09,-84.82\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1121.07,-88.17 1131.65,-87.54 1122.98,-81.43 1121.07,-88.17\"/>\n",
"</g>\n",
"<!-- transformed_user_dataset&#45;&gt;joined_dataset -->\n",
"<g id=\"edge12\" class=\"edge\">\n",
"<title>transformed_user_dataset&#45;&gt;joined_dataset</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M737.38,-127.64C760.1,-123.32 783.92,-118.79 805.18,-114.75\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"805.71,-118.21 814.88,-112.91 804.4,-111.33 805.71,-118.21\"/>\n",
"</g>\n",
"<!-- joined_dataset&#45;&gt;saved_to_db -->\n",
"<g id=\"edge2\" class=\"edge\">\n",
"<title>joined_dataset&#45;&gt;saved_to_db</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M922.71,-133.93C942.5,-147.48 965.8,-163.44 986.99,-177.96\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"984.76,-180.68 994.99,-183.44 988.72,-174.9 984.76,-180.68\"/>\n",
"</g>\n",
"<!-- joined_dataset&#45;&gt;fit_model -->\n",
"<g id=\"edge10\" class=\"edge\">\n",
"<title>joined_dataset&#45;&gt;fit_model</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M933.07,-89.51C951.15,-85.65 971.18,-81.39 989.44,-77.5\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"990.02,-80.95 999.07,-75.45 988.57,-74.11 990.02,-80.95\"/>\n",
"</g>\n",
"<!-- _purchase_dataset.loader_inputs -->\n",
"<g id=\"node11\" class=\"node\">\n",
"<title>_purchase_dataset.loader_inputs</title>\n",
"<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"125.6,-66.02 0,-66.02 0,-21.42 125.6,-21.42 125.6,-66.02\"/>\n",
"<text text-anchor=\"start\" x=\"14.8\" y=\"-37.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">db_client</text>\n",
"<text text-anchor=\"start\" x=\"74.8\" y=\"-37.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">object</text>\n",
"</g>\n",
"<!-- _purchase_dataset.loader_inputs&#45;&gt;purchase_dataset.loader -->\n",
"<g id=\"edge1\" class=\"edge\">\n",
"<title>_purchase_dataset.loader_inputs&#45;&gt;purchase_dataset.loader</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M126.03,-43.72C131.61,-43.72 137.34,-43.72 143.14,-43.72\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"142.81,-47.23 152.81,-43.73 142.81,-40.23 142.81,-47.23\"/>\n",
"</g>\n",
"<!-- _saved_to_db_inputs -->\n",
"<g id=\"node12\" class=\"node\">\n",
"<title>_saved_to_db_inputs</title>\n",
"<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"967.5,-253.52 781.9,-253.52 781.9,-187.93 967.5,-187.93 967.5,-253.52\"/>\n",
"<text text-anchor=\"start\" x=\"826.7\" y=\"-225.42\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">db_client</text>\n",
"<text text-anchor=\"start\" x=\"916.7\" y=\"-225.42\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">object</text>\n",
"<text text-anchor=\"start\" x=\"796.32\" y=\"-204.42\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">joined_table_name</text>\n",
"<text text-anchor=\"start\" x=\"927.2\" y=\"-204.42\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n",
"</g>\n",
"<!-- _saved_to_db_inputs&#45;&gt;saved_to_db -->\n",
"<g id=\"edge3\" class=\"edge\">\n",
"<title>_saved_to_db_inputs&#45;&gt;saved_to_db</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M967.9,-220.72C973.69,-220.72 979.44,-220.72 985.06,-220.72\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"984.86,-224.23 994.86,-220.73 984.86,-217.23 984.86,-224.23\"/>\n",
"</g>\n",
"<!-- _user_dataset.loader_inputs -->\n",
"<g id=\"node13\" class=\"node\">\n",
"<title>_user_dataset.loader_inputs</title>\n",
"<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"124.47,-171.02 1.12,-171.02 1.12,-126.42 124.47,-126.42 124.47,-171.02\"/>\n",
"<text text-anchor=\"start\" x=\"15.92\" y=\"-142.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">file_ds_path</text>\n",
"<text text-anchor=\"start\" x=\"94.67\" y=\"-142.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n",
"</g>\n",
"<!-- _user_dataset.loader_inputs&#45;&gt;user_dataset.loader -->\n",
"<g id=\"edge4\" class=\"edge\">\n",
"<title>_user_dataset.loader_inputs&#45;&gt;user_dataset.loader</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M124.77,-148.72C135.64,-148.72 147.14,-148.72 158.54,-148.72\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"158.37,-152.23 168.37,-148.73 158.37,-145.23 158.37,-152.23\"/>\n",
"</g>\n",
"<!-- _saved_file_inputs -->\n",
"<g id=\"node14\" class=\"node\">\n",
"<title>_saved_file_inputs</title>\n",
"<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"1101.22,-159.02 999.62,-159.02 999.62,-114.42 1101.22,-114.42 1101.22,-159.02\"/>\n",
"<text text-anchor=\"start\" x=\"1014.42\" y=\"-130.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">file_path</text>\n",
"<text text-anchor=\"start\" x=\"1071.42\" y=\"-130.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n",
"</g>\n",
"<!-- _saved_file_inputs&#45;&gt;saved_file -->\n",
"<g id=\"edge9\" class=\"edge\">\n",
"<title>_saved_file_inputs&#45;&gt;saved_file</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M1101.46,-122.5C1108.26,-120.56 1115.28,-118.57 1122.18,-116.61\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"1122.76,-120.08 1131.42,-113.98 1120.84,-113.35 1122.76,-120.08\"/>\n",
"</g>\n",
"<!-- input -->\n",
"<g id=\"node15\" class=\"node\">\n",
"<title>input</title>\n",
"<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"89.8,-226.02 35.8,-226.02 35.8,-189.43 89.8,-189.43 89.8,-226.02\"/>\n",
"<text text-anchor=\"middle\" x=\"62.8\" y=\"-201.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n",
"</g>\n",
"<!-- function -->\n",
"<g id=\"node16\" class=\"node\">\n",
"<title>function</title>\n",
"<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M85.22,-281.03C85.22,-281.03 40.37,-281.03 40.37,-281.03 34.37,-281.03 28.37,-275.03 28.37,-269.03 28.37,-269.03 28.37,-256.43 28.37,-256.43 28.37,-250.43 34.37,-244.43 40.37,-244.43 40.37,-244.43 85.22,-244.43 85.22,-244.43 91.22,-244.43 97.22,-250.43 97.22,-256.43 97.22,-256.43 97.22,-269.03 97.22,-269.03 97.22,-275.03 91.22,-281.03 85.22,-281.03\"/>\n",
"<text text-anchor=\"middle\" x=\"62.8\" y=\"-256.93\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n",
"</g>\n",
"<!-- materializer -->\n",
"<g id=\"node17\" class=\"node\">\n",
"<title>materializer</title>\n",
"<path fill=\"#ffffff\" stroke=\"black\" d=\"M108.85,-336.26C108.85,-338.29 88.21,-339.94 62.8,-339.94 37.39,-339.94 16.75,-338.29 16.75,-336.26 16.75,-336.26 16.75,-303.19 16.75,-303.19 16.75,-301.16 37.39,-299.51 62.8,-299.51 88.21,-299.51 108.85,-301.16 108.85,-303.19 108.85,-303.19 108.85,-336.26 108.85,-336.26\"/>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M108.85,-336.26C108.85,-334.23 88.21,-332.59 62.8,-332.59 37.39,-332.59 16.75,-334.23 16.75,-336.26\"/>\n",
"<text text-anchor=\"middle\" x=\"62.8\" y=\"-313.93\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">materializer</text>\n",
"</g>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.graphs.Digraph at 0x157544e50>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from hamilton.plugins import h_openlineage\n",
"from hamilton import driver\n",
"\n",
"import pipeline\n",
"ola = h_openlineage.OpenLineageAdapter(client, \"demo_namespace\", \"my_hamilton_job\")\n",
"\n",
"# create the DAG\n",
"dr = driver.Builder().with_modules(pipeline).with_adapters(ola).build()\n",
"# display the graph\n",
"dr.display_all_functions()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "ec295725",
"metadata": {
"ExecuteTime": {
"end_time": "2024-09-06T17:32:31.534973Z",
"start_time": "2024-09-06T17:32:31.475171Z"
}
},
"outputs": [],
"source": [
"# create inputs to run the DAG\n",
"import sqlite3\n",
"db_client = sqlite3.connect(\"purchase_data.db\")\n",
"\n",
"# execute & emit lineage\n",
"result = dr.execute(\n",
" [\"saved_file\", \"saved_to_db\"],\n",
" inputs={\n",
" \"db_client\": db_client,\n",
" \"file_ds_path\": \"data.csv\",\n",
" \"file_path\": \"model.pkl\",\n",
" \"joined_table_name\": \"joined_data\",\n",
" },\n",
")\n",
"# close the DB\n",
"db_client.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7b4ad693",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}