| { |
| "cells": [ |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "id": "dc0e298d", |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# Execute this cell to install dependencies\n", |
| "%pip install sf-hamilton[visualization]" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "id": "f937d835", |
| "metadata": {}, |
| "source": [ |
| "# OpenLineage example pipeline [](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/openlineage/notebook.ipynb) [](https://github.com/dagworks-inc/hamilton/blob/main/examples/openlineage/notebook.ipynb)\n", |
| "\n", |
| "\n", |
| "This is a simple example of a pipeline that reads data from a file and a database, joins them, fits a model, and saves the model to a file and the joined data to a database. The pipeline does not import open lineage, and doesn't need to know about it. The salient point is that metadata is exposed by the data loading and data\n", |
| "saving functions. This is what is used to populated OpenLineage events." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "id": "ab02bddb", |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "%load_ext hamilton.plugins.jupyter_magic" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 3, |
| "id": "2ccc7699", |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2024-09-06T17:30:38.628295Z", |
| "start_time": "2024-09-06T17:30:38.182382Z" |
| } |
| }, |
| "outputs": [ |
| { |
| "data": { |
| "image/svg+xml": [ |
| "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", |
| "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", |
| " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", |
| "<!-- Generated by graphviz version 12.0.0 (20240704.0754)\n", |
| " -->\n", |
| "<!-- Pages: 1 -->\n", |
| "<svg width=\"1233pt\" height=\"387pt\"\n", |
| " viewBox=\"0.00 0.00 1232.70 386.73\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", |
| "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 382.73)\">\n", |
| "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-382.73 1228.7,-382.73 1228.7,4 -4,4\"/>\n", |
| "<g id=\"clust1\" class=\"cluster\">\n", |
| "<title>cluster__legend</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" points=\"8.75,-181.72 8.75,-370.73 116.85,-370.73 116.85,-181.72 8.75,-181.72\"/>\n", |
| "<text text-anchor=\"middle\" x=\"62.8\" y=\"-353.43\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n", |
| "</g>\n", |
| "<!-- purchase_dataset.loader -->\n", |
| "<g id=\"node1\" class=\"node\">\n", |
| "<title>purchase_dataset.loader</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M335.2,-79.5C335.2,-83.89 294.73,-87.45 244.9,-87.45 195.07,-87.45 154.6,-83.89 154.6,-79.5 154.6,-79.5 154.6,-7.95 154.6,-7.95 154.6,-3.56 195.07,0 244.9,0 294.73,0 335.2,-3.56 335.2,-7.95 335.2,-7.95 335.2,-79.5 335.2,-79.5\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M335.2,-79.5C335.2,-75.11 294.73,-71.55 244.9,-71.55 195.07,-71.55 154.6,-75.11 154.6,-79.5\"/>\n", |
| "<text text-anchor=\"start\" x=\"165.4\" y=\"-52.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">purchase_dataset.loader</text>\n", |
| "<text text-anchor=\"start\" x=\"186.77\" y=\"-24.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">purchase_dataset()</text>\n", |
| "</g>\n", |
| "<!-- purchase_dataset -->\n", |
| "<g id=\"node5\" class=\"node\">\n", |
| "<title>purchase_dataset</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M488.55,-86.52C488.55,-86.52 376.2,-86.52 376.2,-86.52 370.2,-86.52 364.2,-80.52 364.2,-74.52 364.2,-74.52 364.2,-34.92 364.2,-34.92 364.2,-28.92 370.2,-22.93 376.2,-22.93 376.2,-22.93 488.55,-22.93 488.55,-22.93 494.55,-22.93 500.55,-28.93 500.55,-34.92 500.55,-34.92 500.55,-74.53 500.55,-74.53 500.55,-80.52 494.55,-86.52 488.55,-86.52\"/>\n", |
| "<text text-anchor=\"start\" x=\"375\" y=\"-63.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">purchase_dataset</text>\n", |
| "<text text-anchor=\"start\" x=\"398.25\" y=\"-35.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- purchase_dataset.loader->purchase_dataset -->\n", |
| "<g id=\"edge6\" class=\"edge\">\n", |
| "<title>purchase_dataset.loader->purchase_dataset</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M335.65,-49.05C341.3,-49.38 346.97,-49.72 352.57,-50.05\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"352.24,-53.54 362.43,-50.64 352.66,-46.55 352.24,-53.54\"/>\n", |
| "</g>\n", |
| "<!-- saved_to_db -->\n", |
| "<g id=\"node2\" class=\"node\">\n", |
| "<title>saved_to_db</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1104.35,-256.5C1104.35,-260.89 1080.18,-264.45 1050.42,-264.45 1020.67,-264.45 996.5,-260.89 996.5,-256.5 996.5,-256.5 996.5,-184.95 996.5,-184.95 996.5,-180.56 1020.67,-177 1050.42,-177 1080.18,-177 1104.35,-180.56 1104.35,-184.95 1104.35,-184.95 1104.35,-256.5 1104.35,-256.5\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1104.35,-256.5C1104.35,-252.11 1080.18,-248.55 1050.42,-248.55 1020.67,-248.55 996.5,-252.11 996.5,-256.5\"/>\n", |
| "<text text-anchor=\"start\" x=\"1009.17\" y=\"-229.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">saved_to_db</text>\n", |
| "<text text-anchor=\"start\" x=\"1007.3\" y=\"-201.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">saved_to_db()</text>\n", |
| "</g>\n", |
| "<!-- user_dataset.loader -->\n", |
| "<g id=\"node3\" class=\"node\">\n", |
| "<title>user_dataset.loader</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M319.45,-184.5C319.45,-188.89 286.04,-192.45 244.9,-192.45 203.76,-192.45 170.35,-188.89 170.35,-184.5 170.35,-184.5 170.35,-112.95 170.35,-112.95 170.35,-108.56 203.76,-105 244.9,-105 286.04,-105 319.45,-108.56 319.45,-112.95 319.45,-112.95 319.45,-184.5 319.45,-184.5\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M319.45,-184.5C319.45,-180.11 286.04,-176.55 244.9,-176.55 203.76,-176.55 170.35,-180.11 170.35,-184.5\"/>\n", |
| "<text text-anchor=\"start\" x=\"181.15\" y=\"-157.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">user_dataset.loader</text>\n", |
| "<text text-anchor=\"start\" x=\"201.4\" y=\"-129.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">user_dataset()</text>\n", |
| "</g>\n", |
| "<!-- user_dataset -->\n", |
| "<g id=\"node4\" class=\"node\">\n", |
| "<title>user_dataset</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M472.8,-178.52C472.8,-178.52 391.95,-178.52 391.95,-178.52 385.95,-178.52 379.95,-172.52 379.95,-166.52 379.95,-166.52 379.95,-126.92 379.95,-126.92 379.95,-120.92 385.95,-114.92 391.95,-114.92 391.95,-114.92 472.8,-114.92 472.8,-114.92 478.8,-114.92 484.8,-120.92 484.8,-126.92 484.8,-126.92 484.8,-166.52 484.8,-166.52 484.8,-172.52 478.8,-178.52 472.8,-178.52\"/>\n", |
| "<text text-anchor=\"start\" x=\"390.75\" y=\"-155.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">user_dataset</text>\n", |
| "<text text-anchor=\"start\" x=\"398.25\" y=\"-127.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- user_dataset.loader->user_dataset -->\n", |
| "<g id=\"edge5\" class=\"edge\">\n", |
| "<title>user_dataset.loader->user_dataset</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M319.74,-147.93C335.83,-147.76 352.72,-147.57 368.35,-147.4\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"368.01,-150.91 377.97,-147.3 367.93,-143.91 368.01,-150.91\"/>\n", |
| "</g>\n", |
| "<!-- transformed_user_dataset -->\n", |
| "<g id=\"node9\" class=\"node\">\n", |
| "<title>transformed_user_dataset</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M725.15,-177.52C725.15,-177.52 557.3,-177.52 557.3,-177.52 551.3,-177.52 545.3,-171.52 545.3,-165.52 545.3,-165.52 545.3,-125.92 545.3,-125.92 545.3,-119.92 551.3,-113.92 557.3,-113.92 557.3,-113.92 725.15,-113.92 725.15,-113.92 731.15,-113.92 737.15,-119.92 737.15,-125.92 737.15,-125.92 737.15,-165.52 737.15,-165.52 737.15,-171.52 731.15,-177.52 725.15,-177.52\"/>\n", |
| "<text text-anchor=\"start\" x=\"556.1\" y=\"-154.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">transformed_user_dataset</text>\n", |
| "<text text-anchor=\"start\" x=\"607.1\" y=\"-126.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- user_dataset->transformed_user_dataset -->\n", |
| "<g id=\"edge11\" class=\"edge\">\n", |
| "<title>user_dataset->transformed_user_dataset</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M485.04,-146.48C499.96,-146.4 516.82,-146.32 533.78,-146.24\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"533.43,-149.74 543.41,-146.19 533.4,-142.74 533.43,-149.74\"/>\n", |
| "</g>\n", |
| "<!-- transformed_purchase_dataset -->\n", |
| "<g id=\"node6\" class=\"node\">\n", |
| "<title>transformed_purchase_dataset</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M740.9,-92.52C740.9,-92.52 541.55,-92.52 541.55,-92.52 535.55,-92.52 529.55,-86.52 529.55,-80.52 529.55,-80.52 529.55,-40.92 529.55,-40.92 529.55,-34.92 535.55,-28.93 541.55,-28.93 541.55,-28.93 740.9,-28.93 740.9,-28.93 746.9,-28.93 752.9,-34.92 752.9,-40.92 752.9,-40.92 752.9,-80.53 752.9,-80.53 752.9,-86.52 746.9,-92.52 740.9,-92.52\"/>\n", |
| "<text text-anchor=\"start\" x=\"540.35\" y=\"-69.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">transformed_purchase_dataset</text>\n", |
| "<text text-anchor=\"start\" x=\"607.1\" y=\"-41.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- purchase_dataset->transformed_purchase_dataset -->\n", |
| "<g id=\"edge7\" class=\"edge\">\n", |
| "<title>purchase_dataset->transformed_purchase_dataset</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M500.82,-56.68C506.42,-56.84 512.19,-57.01 518.04,-57.18\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"517.75,-60.67 527.85,-57.47 517.95,-53.68 517.75,-60.67\"/>\n", |
| "</g>\n", |
| "<!-- joined_dataset -->\n", |
| "<g id=\"node10\" class=\"node\">\n", |
| "<title>joined_dataset</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M920.75,-133.52C920.75,-133.52 828.65,-133.52 828.65,-133.52 822.65,-133.52 816.65,-127.52 816.65,-121.52 816.65,-121.52 816.65,-81.92 816.65,-81.92 816.65,-75.92 822.65,-69.92 828.65,-69.92 828.65,-69.92 920.75,-69.92 920.75,-69.92 926.75,-69.92 932.75,-75.92 932.75,-81.92 932.75,-81.92 932.75,-121.52 932.75,-121.52 932.75,-127.52 926.75,-133.52 920.75,-133.52\"/>\n", |
| "<text text-anchor=\"start\" x=\"827.45\" y=\"-110.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">joined_dataset</text>\n", |
| "<text text-anchor=\"start\" x=\"840.57\" y=\"-82.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- transformed_purchase_dataset->joined_dataset -->\n", |
| "<g id=\"edge13\" class=\"edge\">\n", |
| "<title>transformed_purchase_dataset->joined_dataset</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M753.22,-80.38C770.94,-83.52 788.81,-86.69 805.17,-89.59\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"804.28,-92.98 814.74,-91.28 805.5,-86.09 804.28,-92.98\"/>\n", |
| "</g>\n", |
| "<!-- saved_file -->\n", |
| "<g id=\"node7\" class=\"node\">\n", |
| "<title>saved_file</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1224.7,-136.5C1224.7,-140.89 1204.23,-144.45 1179.02,-144.45 1153.82,-144.45 1133.35,-140.89 1133.35,-136.5 1133.35,-136.5 1133.35,-64.95 1133.35,-64.95 1133.35,-60.56 1153.82,-57 1179.02,-57 1204.23,-57 1224.7,-60.56 1224.7,-64.95 1224.7,-64.95 1224.7,-136.5 1224.7,-136.5\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1224.7,-136.5C1224.7,-132.11 1204.23,-128.55 1179.02,-128.55 1153.82,-128.55 1133.35,-132.11 1133.35,-136.5\"/>\n", |
| "<text text-anchor=\"start\" x=\"1146.4\" y=\"-109.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">saved_file</text>\n", |
| "<text text-anchor=\"start\" x=\"1144.15\" y=\"-81.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">saved_file()</text>\n", |
| "</g>\n", |
| "<!-- fit_model -->\n", |
| "<g id=\"node8\" class=\"node\">\n", |
| "<title>fit_model</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1087.85,-96.52C1087.85,-96.52 1013,-96.52 1013,-96.52 1007,-96.52 1001,-90.52 1001,-84.52 1001,-84.52 1001,-44.92 1001,-44.92 1001,-38.92 1007,-32.92 1013,-32.92 1013,-32.92 1087.85,-32.92 1087.85,-32.92 1093.85,-32.92 1099.85,-38.92 1099.85,-44.92 1099.85,-44.92 1099.85,-84.53 1099.85,-84.53 1099.85,-90.52 1093.85,-96.52 1087.85,-96.52\"/>\n", |
| "<text text-anchor=\"start\" x=\"1020.05\" y=\"-73.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">fit_model</text>\n", |
| "<text text-anchor=\"start\" x=\"1011.8\" y=\"-45.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">ModelObject</text>\n", |
| "</g>\n", |
| "<!-- fit_model->saved_file -->\n", |
| "<g id=\"edge8\" class=\"edge\">\n", |
| "<title>fit_model->saved_file</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1100.01,-78.54C1107.23,-80.59 1114.73,-82.73 1122.09,-84.82\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1121.07,-88.17 1131.65,-87.54 1122.98,-81.43 1121.07,-88.17\"/>\n", |
| "</g>\n", |
| "<!-- transformed_user_dataset->joined_dataset -->\n", |
| "<g id=\"edge12\" class=\"edge\">\n", |
| "<title>transformed_user_dataset->joined_dataset</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M737.38,-127.64C760.1,-123.32 783.92,-118.79 805.18,-114.75\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"805.71,-118.21 814.88,-112.91 804.4,-111.33 805.71,-118.21\"/>\n", |
| "</g>\n", |
| "<!-- joined_dataset->saved_to_db -->\n", |
| "<g id=\"edge2\" class=\"edge\">\n", |
| "<title>joined_dataset->saved_to_db</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M922.71,-133.93C942.5,-147.48 965.8,-163.44 986.99,-177.96\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"984.76,-180.68 994.99,-183.44 988.72,-174.9 984.76,-180.68\"/>\n", |
| "</g>\n", |
| "<!-- joined_dataset->fit_model -->\n", |
| "<g id=\"edge10\" class=\"edge\">\n", |
| "<title>joined_dataset->fit_model</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M933.07,-89.51C951.15,-85.65 971.18,-81.39 989.44,-77.5\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"990.02,-80.95 999.07,-75.45 988.57,-74.11 990.02,-80.95\"/>\n", |
| "</g>\n", |
| "<!-- _purchase_dataset.loader_inputs -->\n", |
| "<g id=\"node11\" class=\"node\">\n", |
| "<title>_purchase_dataset.loader_inputs</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"125.6,-66.02 0,-66.02 0,-21.42 125.6,-21.42 125.6,-66.02\"/>\n", |
| "<text text-anchor=\"start\" x=\"14.8\" y=\"-37.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">db_client</text>\n", |
| "<text text-anchor=\"start\" x=\"74.8\" y=\"-37.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">object</text>\n", |
| "</g>\n", |
| "<!-- _purchase_dataset.loader_inputs->purchase_dataset.loader -->\n", |
| "<g id=\"edge1\" class=\"edge\">\n", |
| "<title>_purchase_dataset.loader_inputs->purchase_dataset.loader</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M126.03,-43.72C131.61,-43.72 137.34,-43.72 143.14,-43.72\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"142.81,-47.23 152.81,-43.73 142.81,-40.23 142.81,-47.23\"/>\n", |
| "</g>\n", |
| "<!-- _saved_to_db_inputs -->\n", |
| "<g id=\"node12\" class=\"node\">\n", |
| "<title>_saved_to_db_inputs</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"967.5,-253.52 781.9,-253.52 781.9,-187.93 967.5,-187.93 967.5,-253.52\"/>\n", |
| "<text text-anchor=\"start\" x=\"826.7\" y=\"-225.42\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">db_client</text>\n", |
| "<text text-anchor=\"start\" x=\"916.7\" y=\"-225.42\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">object</text>\n", |
| "<text text-anchor=\"start\" x=\"796.32\" y=\"-204.42\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">joined_table_name</text>\n", |
| "<text text-anchor=\"start\" x=\"927.2\" y=\"-204.42\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n", |
| "</g>\n", |
| "<!-- _saved_to_db_inputs->saved_to_db -->\n", |
| "<g id=\"edge3\" class=\"edge\">\n", |
| "<title>_saved_to_db_inputs->saved_to_db</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M967.9,-220.72C973.69,-220.72 979.44,-220.72 985.06,-220.72\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"984.86,-224.23 994.86,-220.73 984.86,-217.23 984.86,-224.23\"/>\n", |
| "</g>\n", |
| "<!-- _user_dataset.loader_inputs -->\n", |
| "<g id=\"node13\" class=\"node\">\n", |
| "<title>_user_dataset.loader_inputs</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"124.47,-171.02 1.12,-171.02 1.12,-126.42 124.47,-126.42 124.47,-171.02\"/>\n", |
| "<text text-anchor=\"start\" x=\"15.92\" y=\"-142.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">file_ds_path</text>\n", |
| "<text text-anchor=\"start\" x=\"94.67\" y=\"-142.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n", |
| "</g>\n", |
| "<!-- _user_dataset.loader_inputs->user_dataset.loader -->\n", |
| "<g id=\"edge4\" class=\"edge\">\n", |
| "<title>_user_dataset.loader_inputs->user_dataset.loader</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M124.77,-148.72C135.64,-148.72 147.14,-148.72 158.54,-148.72\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"158.37,-152.23 168.37,-148.73 158.37,-145.23 158.37,-152.23\"/>\n", |
| "</g>\n", |
| "<!-- _saved_file_inputs -->\n", |
| "<g id=\"node14\" class=\"node\">\n", |
| "<title>_saved_file_inputs</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"1101.22,-159.02 999.62,-159.02 999.62,-114.42 1101.22,-114.42 1101.22,-159.02\"/>\n", |
| "<text text-anchor=\"start\" x=\"1014.42\" y=\"-130.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">file_path</text>\n", |
| "<text text-anchor=\"start\" x=\"1071.42\" y=\"-130.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n", |
| "</g>\n", |
| "<!-- _saved_file_inputs->saved_file -->\n", |
| "<g id=\"edge9\" class=\"edge\">\n", |
| "<title>_saved_file_inputs->saved_file</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1101.46,-122.5C1108.26,-120.56 1115.28,-118.57 1122.18,-116.61\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1122.76,-120.08 1131.42,-113.98 1120.84,-113.35 1122.76,-120.08\"/>\n", |
| "</g>\n", |
| "<!-- input -->\n", |
| "<g id=\"node15\" class=\"node\">\n", |
| "<title>input</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"89.8,-226.02 35.8,-226.02 35.8,-189.43 89.8,-189.43 89.8,-226.02\"/>\n", |
| "<text text-anchor=\"middle\" x=\"62.8\" y=\"-201.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n", |
| "</g>\n", |
| "<!-- function -->\n", |
| "<g id=\"node16\" class=\"node\">\n", |
| "<title>function</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M85.22,-281.03C85.22,-281.03 40.37,-281.03 40.37,-281.03 34.37,-281.03 28.37,-275.03 28.37,-269.03 28.37,-269.03 28.37,-256.43 28.37,-256.43 28.37,-250.43 34.37,-244.43 40.37,-244.43 40.37,-244.43 85.22,-244.43 85.22,-244.43 91.22,-244.43 97.22,-250.43 97.22,-256.43 97.22,-256.43 97.22,-269.03 97.22,-269.03 97.22,-275.03 91.22,-281.03 85.22,-281.03\"/>\n", |
| "<text text-anchor=\"middle\" x=\"62.8\" y=\"-256.93\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n", |
| "</g>\n", |
| "<!-- materializer -->\n", |
| "<g id=\"node17\" class=\"node\">\n", |
| "<title>materializer</title>\n", |
| "<path fill=\"#ffffff\" stroke=\"black\" d=\"M108.85,-336.26C108.85,-338.29 88.21,-339.94 62.8,-339.94 37.39,-339.94 16.75,-338.29 16.75,-336.26 16.75,-336.26 16.75,-303.19 16.75,-303.19 16.75,-301.16 37.39,-299.51 62.8,-299.51 88.21,-299.51 108.85,-301.16 108.85,-303.19 108.85,-303.19 108.85,-336.26 108.85,-336.26\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M108.85,-336.26C108.85,-334.23 88.21,-332.59 62.8,-332.59 37.39,-332.59 16.75,-334.23 16.75,-336.26\"/>\n", |
| "<text text-anchor=\"middle\" x=\"62.8\" y=\"-313.93\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">materializer</text>\n", |
| "</g>\n", |
| "</g>\n", |
| "</svg>\n" |
| ], |
| "text/plain": [ |
| "<graphviz.graphs.Digraph at 0x1575446d0>" |
| ] |
| }, |
| "metadata": {}, |
| "output_type": "display_data" |
| } |
| ], |
| "source": [ |
| "%%cell_to_module pipeline --display\n", |
| "\n", |
| "import pickle\n", |
| "from typing import Tuple\n", |
| "\n", |
| "import pandas as pd\n", |
| "\n", |
| "from hamilton.function_modifiers import dataloader, datasaver\n", |
| "from hamilton.io import utils\n", |
| "\n", |
| "@dataloader()\n", |
| "def user_dataset(file_ds_path: str) -> Tuple[pd.DataFrame, dict]:\n", |
| " df = pd.read_csv(file_ds_path)\n", |
| " return df, utils.get_file_and_dataframe_metadata(file_ds_path, df)\n", |
| "\n", |
| "\n", |
| "@dataloader()\n", |
| "def purchase_dataset(db_client: object) -> Tuple[pd.DataFrame, dict]:\n", |
| " query = \"SELECT * FROM purchase_data\"\n", |
| " df = pd.read_sql(query, con=db_client)\n", |
| " metadata = {\n", |
| " \"sql_metadata\": {\"query\": query, \"table_name\": \"purchase_data\", \"database\": \"sqlite\"}\n", |
| " }\n", |
| " metadata.update(utils.get_dataframe_metadata(df))\n", |
| " return df, metadata\n", |
| "\n", |
| "\n", |
| "def transformed_user_dataset(user_dataset: pd.DataFrame) -> pd.DataFrame:\n", |
| " return user_dataset\n", |
| "\n", |
| "\n", |
| "def transformed_purchase_dataset(purchase_dataset: pd.DataFrame) -> pd.DataFrame:\n", |
| " return purchase_dataset\n", |
| "\n", |
| "\n", |
| "def joined_dataset(\n", |
| " transformed_user_dataset: pd.DataFrame, transformed_purchase_dataset: pd.DataFrame\n", |
| ") -> pd.DataFrame:\n", |
| " joined = pd.merge(transformed_user_dataset,\n", |
| " transformed_purchase_dataset,\n", |
| " left_on=\"id\", right_on=\"user_id\")\n", |
| " del joined[\"id_x\"]\n", |
| " del joined[\"id_y\"]\n", |
| " return joined\n", |
| "\n", |
| "\n", |
| "class ModelObject:\n", |
| " def __init__(self):\n", |
| " pass\n", |
| "\n", |
| " def predict(self, data):\n", |
| " return data + 1\n", |
| "\n", |
| "\n", |
| "def fit_model(joined_dataset: pd.DataFrame) -> ModelObject:\n", |
| " # model = ...\n", |
| " return ModelObject()\n", |
| "\n", |
| "\n", |
| "@datasaver()\n", |
| "def saved_file(fit_model: ModelObject, file_path: str) -> dict:\n", |
| " with open(file_path, \"wb\") as f:\n", |
| " pickle.dump(fit_model, f)\n", |
| " return utils.get_file_metadata(file_path)\n", |
| "\n", |
| "\n", |
| "@datasaver()\n", |
| "def saved_to_db(joined_dataset: pd.DataFrame, db_client: object, joined_table_name: str) -> dict:\n", |
| " joined_dataset.to_sql(joined_table_name, con=db_client, index=False, if_exists=\"replace\")\n", |
| " # raise ValueError(\"Hi\")\n", |
| " metadata = utils.get_sql_metadata(joined_table_name, joined_dataset)\n", |
| " metadata.update(utils.get_dataframe_metadata(joined_dataset))\n", |
| " return metadata" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "id": "50a7f746", |
| "metadata": {}, |
| "source": [ |
| "# Create OpenLineage client" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 4, |
| "id": "f7dcecc8", |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2024-09-06T17:31:52.088986Z", |
| "start_time": "2024-09-06T17:31:51.984490Z" |
| } |
| }, |
| "outputs": [], |
| "source": [ |
| "from openlineage.client import OpenLineageClient\n", |
| "from openlineage.client.transport.file import FileConfig, FileTransport\n", |
| "\n", |
| "# if you don't have a running OpenLineage server, you can use the FileTransport\n", |
| "file_config = FileConfig(\n", |
| " log_file_path=\"pipeline.json\",\n", |
| " append=True,\n", |
| ")\n", |
| "\n", |
| "# if you have a running OpenLineage server, e.g. marquez, uncomment this line.\n", |
| "# client = OpenLineageClient(url=\"http://localhost:9000\")\n", |
| "client = OpenLineageClient(transport=FileTransport(file_config))" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "id": "4a49f34a", |
| "metadata": {}, |
| "source": [ |
| "# Create Hamilton DAG with OpenLineage Adapter" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 5, |
| "id": "6db87906", |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2024-09-06T17:32:31.294469Z", |
| "start_time": "2024-09-06T17:32:30.852466Z" |
| } |
| }, |
| "outputs": [ |
| { |
| "data": { |
| "image/svg+xml": [ |
| "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", |
| "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", |
| " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", |
| "<!-- Generated by graphviz version 12.0.0 (20240704.0754)\n", |
| " -->\n", |
| "<!-- Pages: 1 -->\n", |
| "<svg width=\"1233pt\" height=\"387pt\"\n", |
| " viewBox=\"0.00 0.00 1232.70 386.73\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", |
| "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 382.73)\">\n", |
| "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-382.73 1228.7,-382.73 1228.7,4 -4,4\"/>\n", |
| "<g id=\"clust1\" class=\"cluster\">\n", |
| "<title>cluster__legend</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" points=\"8.75,-181.72 8.75,-370.73 116.85,-370.73 116.85,-181.72 8.75,-181.72\"/>\n", |
| "<text text-anchor=\"middle\" x=\"62.8\" y=\"-353.43\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n", |
| "</g>\n", |
| "<!-- purchase_dataset.loader -->\n", |
| "<g id=\"node1\" class=\"node\">\n", |
| "<title>purchase_dataset.loader</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M335.2,-79.5C335.2,-83.89 294.73,-87.45 244.9,-87.45 195.07,-87.45 154.6,-83.89 154.6,-79.5 154.6,-79.5 154.6,-7.95 154.6,-7.95 154.6,-3.56 195.07,0 244.9,0 294.73,0 335.2,-3.56 335.2,-7.95 335.2,-7.95 335.2,-79.5 335.2,-79.5\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M335.2,-79.5C335.2,-75.11 294.73,-71.55 244.9,-71.55 195.07,-71.55 154.6,-75.11 154.6,-79.5\"/>\n", |
| "<text text-anchor=\"start\" x=\"165.4\" y=\"-52.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">purchase_dataset.loader</text>\n", |
| "<text text-anchor=\"start\" x=\"186.77\" y=\"-24.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">purchase_dataset()</text>\n", |
| "</g>\n", |
| "<!-- purchase_dataset -->\n", |
| "<g id=\"node5\" class=\"node\">\n", |
| "<title>purchase_dataset</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M488.55,-86.52C488.55,-86.52 376.2,-86.52 376.2,-86.52 370.2,-86.52 364.2,-80.52 364.2,-74.52 364.2,-74.52 364.2,-34.92 364.2,-34.92 364.2,-28.92 370.2,-22.93 376.2,-22.93 376.2,-22.93 488.55,-22.93 488.55,-22.93 494.55,-22.93 500.55,-28.93 500.55,-34.92 500.55,-34.92 500.55,-74.53 500.55,-74.53 500.55,-80.52 494.55,-86.52 488.55,-86.52\"/>\n", |
| "<text text-anchor=\"start\" x=\"375\" y=\"-63.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">purchase_dataset</text>\n", |
| "<text text-anchor=\"start\" x=\"398.25\" y=\"-35.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- purchase_dataset.loader->purchase_dataset -->\n", |
| "<g id=\"edge6\" class=\"edge\">\n", |
| "<title>purchase_dataset.loader->purchase_dataset</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M335.65,-49.05C341.3,-49.38 346.97,-49.72 352.57,-50.05\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"352.24,-53.54 362.43,-50.64 352.66,-46.55 352.24,-53.54\"/>\n", |
| "</g>\n", |
| "<!-- saved_to_db -->\n", |
| "<g id=\"node2\" class=\"node\">\n", |
| "<title>saved_to_db</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1104.35,-256.5C1104.35,-260.89 1080.18,-264.45 1050.42,-264.45 1020.67,-264.45 996.5,-260.89 996.5,-256.5 996.5,-256.5 996.5,-184.95 996.5,-184.95 996.5,-180.56 1020.67,-177 1050.42,-177 1080.18,-177 1104.35,-180.56 1104.35,-184.95 1104.35,-184.95 1104.35,-256.5 1104.35,-256.5\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1104.35,-256.5C1104.35,-252.11 1080.18,-248.55 1050.42,-248.55 1020.67,-248.55 996.5,-252.11 996.5,-256.5\"/>\n", |
| "<text text-anchor=\"start\" x=\"1009.17\" y=\"-229.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">saved_to_db</text>\n", |
| "<text text-anchor=\"start\" x=\"1007.3\" y=\"-201.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">saved_to_db()</text>\n", |
| "</g>\n", |
| "<!-- user_dataset.loader -->\n", |
| "<g id=\"node3\" class=\"node\">\n", |
| "<title>user_dataset.loader</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M319.45,-184.5C319.45,-188.89 286.04,-192.45 244.9,-192.45 203.76,-192.45 170.35,-188.89 170.35,-184.5 170.35,-184.5 170.35,-112.95 170.35,-112.95 170.35,-108.56 203.76,-105 244.9,-105 286.04,-105 319.45,-108.56 319.45,-112.95 319.45,-112.95 319.45,-184.5 319.45,-184.5\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M319.45,-184.5C319.45,-180.11 286.04,-176.55 244.9,-176.55 203.76,-176.55 170.35,-180.11 170.35,-184.5\"/>\n", |
| "<text text-anchor=\"start\" x=\"181.15\" y=\"-157.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">user_dataset.loader</text>\n", |
| "<text text-anchor=\"start\" x=\"201.4\" y=\"-129.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">user_dataset()</text>\n", |
| "</g>\n", |
| "<!-- user_dataset -->\n", |
| "<g id=\"node4\" class=\"node\">\n", |
| "<title>user_dataset</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M472.8,-178.52C472.8,-178.52 391.95,-178.52 391.95,-178.52 385.95,-178.52 379.95,-172.52 379.95,-166.52 379.95,-166.52 379.95,-126.92 379.95,-126.92 379.95,-120.92 385.95,-114.92 391.95,-114.92 391.95,-114.92 472.8,-114.92 472.8,-114.92 478.8,-114.92 484.8,-120.92 484.8,-126.92 484.8,-126.92 484.8,-166.52 484.8,-166.52 484.8,-172.52 478.8,-178.52 472.8,-178.52\"/>\n", |
| "<text text-anchor=\"start\" x=\"390.75\" y=\"-155.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">user_dataset</text>\n", |
| "<text text-anchor=\"start\" x=\"398.25\" y=\"-127.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- user_dataset.loader->user_dataset -->\n", |
| "<g id=\"edge5\" class=\"edge\">\n", |
| "<title>user_dataset.loader->user_dataset</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M319.74,-147.93C335.83,-147.76 352.72,-147.57 368.35,-147.4\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"368.01,-150.91 377.97,-147.3 367.93,-143.91 368.01,-150.91\"/>\n", |
| "</g>\n", |
| "<!-- transformed_user_dataset -->\n", |
| "<g id=\"node9\" class=\"node\">\n", |
| "<title>transformed_user_dataset</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M725.15,-177.52C725.15,-177.52 557.3,-177.52 557.3,-177.52 551.3,-177.52 545.3,-171.52 545.3,-165.52 545.3,-165.52 545.3,-125.92 545.3,-125.92 545.3,-119.92 551.3,-113.92 557.3,-113.92 557.3,-113.92 725.15,-113.92 725.15,-113.92 731.15,-113.92 737.15,-119.92 737.15,-125.92 737.15,-125.92 737.15,-165.52 737.15,-165.52 737.15,-171.52 731.15,-177.52 725.15,-177.52\"/>\n", |
| "<text text-anchor=\"start\" x=\"556.1\" y=\"-154.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">transformed_user_dataset</text>\n", |
| "<text text-anchor=\"start\" x=\"607.1\" y=\"-126.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- user_dataset->transformed_user_dataset -->\n", |
| "<g id=\"edge11\" class=\"edge\">\n", |
| "<title>user_dataset->transformed_user_dataset</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M485.04,-146.48C499.96,-146.4 516.82,-146.32 533.78,-146.24\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"533.43,-149.74 543.41,-146.19 533.4,-142.74 533.43,-149.74\"/>\n", |
| "</g>\n", |
| "<!-- transformed_purchase_dataset -->\n", |
| "<g id=\"node6\" class=\"node\">\n", |
| "<title>transformed_purchase_dataset</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M740.9,-92.52C740.9,-92.52 541.55,-92.52 541.55,-92.52 535.55,-92.52 529.55,-86.52 529.55,-80.52 529.55,-80.52 529.55,-40.92 529.55,-40.92 529.55,-34.92 535.55,-28.93 541.55,-28.93 541.55,-28.93 740.9,-28.93 740.9,-28.93 746.9,-28.93 752.9,-34.92 752.9,-40.92 752.9,-40.92 752.9,-80.53 752.9,-80.53 752.9,-86.52 746.9,-92.52 740.9,-92.52\"/>\n", |
| "<text text-anchor=\"start\" x=\"540.35\" y=\"-69.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">transformed_purchase_dataset</text>\n", |
| "<text text-anchor=\"start\" x=\"607.1\" y=\"-41.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- purchase_dataset->transformed_purchase_dataset -->\n", |
| "<g id=\"edge7\" class=\"edge\">\n", |
| "<title>purchase_dataset->transformed_purchase_dataset</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M500.82,-56.68C506.42,-56.84 512.19,-57.01 518.04,-57.18\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"517.75,-60.67 527.85,-57.47 517.95,-53.68 517.75,-60.67\"/>\n", |
| "</g>\n", |
| "<!-- joined_dataset -->\n", |
| "<g id=\"node10\" class=\"node\">\n", |
| "<title>joined_dataset</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M920.75,-133.52C920.75,-133.52 828.65,-133.52 828.65,-133.52 822.65,-133.52 816.65,-127.52 816.65,-121.52 816.65,-121.52 816.65,-81.92 816.65,-81.92 816.65,-75.92 822.65,-69.92 828.65,-69.92 828.65,-69.92 920.75,-69.92 920.75,-69.92 926.75,-69.92 932.75,-75.92 932.75,-81.92 932.75,-81.92 932.75,-121.52 932.75,-121.52 932.75,-127.52 926.75,-133.52 920.75,-133.52\"/>\n", |
| "<text text-anchor=\"start\" x=\"827.45\" y=\"-110.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">joined_dataset</text>\n", |
| "<text text-anchor=\"start\" x=\"840.57\" y=\"-82.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- transformed_purchase_dataset->joined_dataset -->\n", |
| "<g id=\"edge13\" class=\"edge\">\n", |
| "<title>transformed_purchase_dataset->joined_dataset</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M753.22,-80.38C770.94,-83.52 788.81,-86.69 805.17,-89.59\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"804.28,-92.98 814.74,-91.28 805.5,-86.09 804.28,-92.98\"/>\n", |
| "</g>\n", |
| "<!-- saved_file -->\n", |
| "<g id=\"node7\" class=\"node\">\n", |
| "<title>saved_file</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1224.7,-136.5C1224.7,-140.89 1204.23,-144.45 1179.02,-144.45 1153.82,-144.45 1133.35,-140.89 1133.35,-136.5 1133.35,-136.5 1133.35,-64.95 1133.35,-64.95 1133.35,-60.56 1153.82,-57 1179.02,-57 1204.23,-57 1224.7,-60.56 1224.7,-64.95 1224.7,-64.95 1224.7,-136.5 1224.7,-136.5\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1224.7,-136.5C1224.7,-132.11 1204.23,-128.55 1179.02,-128.55 1153.82,-128.55 1133.35,-132.11 1133.35,-136.5\"/>\n", |
| "<text text-anchor=\"start\" x=\"1146.4\" y=\"-109.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">saved_file</text>\n", |
| "<text text-anchor=\"start\" x=\"1144.15\" y=\"-81.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">saved_file()</text>\n", |
| "</g>\n", |
| "<!-- fit_model -->\n", |
| "<g id=\"node8\" class=\"node\">\n", |
| "<title>fit_model</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1087.85,-96.52C1087.85,-96.52 1013,-96.52 1013,-96.52 1007,-96.52 1001,-90.52 1001,-84.52 1001,-84.52 1001,-44.92 1001,-44.92 1001,-38.92 1007,-32.92 1013,-32.92 1013,-32.92 1087.85,-32.92 1087.85,-32.92 1093.85,-32.92 1099.85,-38.92 1099.85,-44.92 1099.85,-44.92 1099.85,-84.53 1099.85,-84.53 1099.85,-90.52 1093.85,-96.52 1087.85,-96.52\"/>\n", |
| "<text text-anchor=\"start\" x=\"1020.05\" y=\"-73.42\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">fit_model</text>\n", |
| "<text text-anchor=\"start\" x=\"1011.8\" y=\"-45.42\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">ModelObject</text>\n", |
| "</g>\n", |
| "<!-- fit_model->saved_file -->\n", |
| "<g id=\"edge8\" class=\"edge\">\n", |
| "<title>fit_model->saved_file</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1100.01,-78.54C1107.23,-80.59 1114.73,-82.73 1122.09,-84.82\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1121.07,-88.17 1131.65,-87.54 1122.98,-81.43 1121.07,-88.17\"/>\n", |
| "</g>\n", |
| "<!-- transformed_user_dataset->joined_dataset -->\n", |
| "<g id=\"edge12\" class=\"edge\">\n", |
| "<title>transformed_user_dataset->joined_dataset</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M737.38,-127.64C760.1,-123.32 783.92,-118.79 805.18,-114.75\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"805.71,-118.21 814.88,-112.91 804.4,-111.33 805.71,-118.21\"/>\n", |
| "</g>\n", |
| "<!-- joined_dataset->saved_to_db -->\n", |
| "<g id=\"edge2\" class=\"edge\">\n", |
| "<title>joined_dataset->saved_to_db</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M922.71,-133.93C942.5,-147.48 965.8,-163.44 986.99,-177.96\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"984.76,-180.68 994.99,-183.44 988.72,-174.9 984.76,-180.68\"/>\n", |
| "</g>\n", |
| "<!-- joined_dataset->fit_model -->\n", |
| "<g id=\"edge10\" class=\"edge\">\n", |
| "<title>joined_dataset->fit_model</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M933.07,-89.51C951.15,-85.65 971.18,-81.39 989.44,-77.5\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"990.02,-80.95 999.07,-75.45 988.57,-74.11 990.02,-80.95\"/>\n", |
| "</g>\n", |
| "<!-- _purchase_dataset.loader_inputs -->\n", |
| "<g id=\"node11\" class=\"node\">\n", |
| "<title>_purchase_dataset.loader_inputs</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"125.6,-66.02 0,-66.02 0,-21.42 125.6,-21.42 125.6,-66.02\"/>\n", |
| "<text text-anchor=\"start\" x=\"14.8\" y=\"-37.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">db_client</text>\n", |
| "<text text-anchor=\"start\" x=\"74.8\" y=\"-37.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">object</text>\n", |
| "</g>\n", |
| "<!-- _purchase_dataset.loader_inputs->purchase_dataset.loader -->\n", |
| "<g id=\"edge1\" class=\"edge\">\n", |
| "<title>_purchase_dataset.loader_inputs->purchase_dataset.loader</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M126.03,-43.72C131.61,-43.72 137.34,-43.72 143.14,-43.72\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"142.81,-47.23 152.81,-43.73 142.81,-40.23 142.81,-47.23\"/>\n", |
| "</g>\n", |
| "<!-- _saved_to_db_inputs -->\n", |
| "<g id=\"node12\" class=\"node\">\n", |
| "<title>_saved_to_db_inputs</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"967.5,-253.52 781.9,-253.52 781.9,-187.93 967.5,-187.93 967.5,-253.52\"/>\n", |
| "<text text-anchor=\"start\" x=\"826.7\" y=\"-225.42\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">db_client</text>\n", |
| "<text text-anchor=\"start\" x=\"916.7\" y=\"-225.42\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">object</text>\n", |
| "<text text-anchor=\"start\" x=\"796.32\" y=\"-204.42\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">joined_table_name</text>\n", |
| "<text text-anchor=\"start\" x=\"927.2\" y=\"-204.42\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n", |
| "</g>\n", |
| "<!-- _saved_to_db_inputs->saved_to_db -->\n", |
| "<g id=\"edge3\" class=\"edge\">\n", |
| "<title>_saved_to_db_inputs->saved_to_db</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M967.9,-220.72C973.69,-220.72 979.44,-220.72 985.06,-220.72\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"984.86,-224.23 994.86,-220.73 984.86,-217.23 984.86,-224.23\"/>\n", |
| "</g>\n", |
| "<!-- _user_dataset.loader_inputs -->\n", |
| "<g id=\"node13\" class=\"node\">\n", |
| "<title>_user_dataset.loader_inputs</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"124.47,-171.02 1.12,-171.02 1.12,-126.42 124.47,-126.42 124.47,-171.02\"/>\n", |
| "<text text-anchor=\"start\" x=\"15.92\" y=\"-142.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">file_ds_path</text>\n", |
| "<text text-anchor=\"start\" x=\"94.67\" y=\"-142.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n", |
| "</g>\n", |
| "<!-- _user_dataset.loader_inputs->user_dataset.loader -->\n", |
| "<g id=\"edge4\" class=\"edge\">\n", |
| "<title>_user_dataset.loader_inputs->user_dataset.loader</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M124.77,-148.72C135.64,-148.72 147.14,-148.72 158.54,-148.72\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"158.37,-152.23 168.37,-148.73 158.37,-145.23 158.37,-152.23\"/>\n", |
| "</g>\n", |
| "<!-- _saved_file_inputs -->\n", |
| "<g id=\"node14\" class=\"node\">\n", |
| "<title>_saved_file_inputs</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"1101.22,-159.02 999.62,-159.02 999.62,-114.42 1101.22,-114.42 1101.22,-159.02\"/>\n", |
| "<text text-anchor=\"start\" x=\"1014.42\" y=\"-130.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">file_path</text>\n", |
| "<text text-anchor=\"start\" x=\"1071.42\" y=\"-130.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n", |
| "</g>\n", |
| "<!-- _saved_file_inputs->saved_file -->\n", |
| "<g id=\"edge9\" class=\"edge\">\n", |
| "<title>_saved_file_inputs->saved_file</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M1101.46,-122.5C1108.26,-120.56 1115.28,-118.57 1122.18,-116.61\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"1122.76,-120.08 1131.42,-113.98 1120.84,-113.35 1122.76,-120.08\"/>\n", |
| "</g>\n", |
| "<!-- input -->\n", |
| "<g id=\"node15\" class=\"node\">\n", |
| "<title>input</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"89.8,-226.02 35.8,-226.02 35.8,-189.43 89.8,-189.43 89.8,-226.02\"/>\n", |
| "<text text-anchor=\"middle\" x=\"62.8\" y=\"-201.92\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n", |
| "</g>\n", |
| "<!-- function -->\n", |
| "<g id=\"node16\" class=\"node\">\n", |
| "<title>function</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M85.22,-281.03C85.22,-281.03 40.37,-281.03 40.37,-281.03 34.37,-281.03 28.37,-275.03 28.37,-269.03 28.37,-269.03 28.37,-256.43 28.37,-256.43 28.37,-250.43 34.37,-244.43 40.37,-244.43 40.37,-244.43 85.22,-244.43 85.22,-244.43 91.22,-244.43 97.22,-250.43 97.22,-256.43 97.22,-256.43 97.22,-269.03 97.22,-269.03 97.22,-275.03 91.22,-281.03 85.22,-281.03\"/>\n", |
| "<text text-anchor=\"middle\" x=\"62.8\" y=\"-256.93\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n", |
| "</g>\n", |
| "<!-- materializer -->\n", |
| "<g id=\"node17\" class=\"node\">\n", |
| "<title>materializer</title>\n", |
| "<path fill=\"#ffffff\" stroke=\"black\" d=\"M108.85,-336.26C108.85,-338.29 88.21,-339.94 62.8,-339.94 37.39,-339.94 16.75,-338.29 16.75,-336.26 16.75,-336.26 16.75,-303.19 16.75,-303.19 16.75,-301.16 37.39,-299.51 62.8,-299.51 88.21,-299.51 108.85,-301.16 108.85,-303.19 108.85,-303.19 108.85,-336.26 108.85,-336.26\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M108.85,-336.26C108.85,-334.23 88.21,-332.59 62.8,-332.59 37.39,-332.59 16.75,-334.23 16.75,-336.26\"/>\n", |
| "<text text-anchor=\"middle\" x=\"62.8\" y=\"-313.93\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">materializer</text>\n", |
| "</g>\n", |
| "</g>\n", |
| "</svg>\n" |
| ], |
| "text/plain": [ |
| "<graphviz.graphs.Digraph at 0x157544e50>" |
| ] |
| }, |
| "execution_count": 5, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "from hamilton.plugins import h_openlineage\n", |
| "from hamilton import driver\n", |
| "\n", |
| "import pipeline\n", |
| "ola = h_openlineage.OpenLineageAdapter(client, \"demo_namespace\", \"my_hamilton_job\")\n", |
| "\n", |
| "# create the DAG\n", |
| "dr = driver.Builder().with_modules(pipeline).with_adapters(ola).build()\n", |
| "# display the graph\n", |
| "dr.display_all_functions()" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 6, |
| "id": "ec295725", |
| "metadata": { |
| "ExecuteTime": { |
| "end_time": "2024-09-06T17:32:31.534973Z", |
| "start_time": "2024-09-06T17:32:31.475171Z" |
| } |
| }, |
| "outputs": [], |
| "source": [ |
| "# create inputs to run the DAG\n", |
| "import sqlite3\n", |
| "db_client = sqlite3.connect(\"purchase_data.db\")\n", |
| "\n", |
| "# execute & emit lineage\n", |
| "result = dr.execute(\n", |
| " [\"saved_file\", \"saved_to_db\"],\n", |
| " inputs={\n", |
| " \"db_client\": db_client,\n", |
| " \"file_ds_path\": \"data.csv\",\n", |
| " \"file_path\": \"model.pkl\",\n", |
| " \"joined_table_name\": \"joined_data\",\n", |
| " },\n", |
| ")\n", |
| "# close the DB\n", |
| "db_client.close()" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "id": "7b4ad693", |
| "metadata": {}, |
| "outputs": [], |
| "source": [] |
| } |
| ], |
| "metadata": { |
| "kernelspec": { |
| "display_name": "Python 3", |
| "language": "python", |
| "name": "python3" |
| }, |
| "language_info": { |
| "codemirror_mode": { |
| "name": "ipython", |
| "version": 2 |
| }, |
| "file_extension": ".py", |
| "mimetype": "text/x-python", |
| "name": "python", |
| "nbconvert_exporter": "python", |
| "pygments_lexer": "ipython2", |
| "version": "2.7.6" |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 5 |
| } |