| { |
| "cells": [ |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# Execute this cell to install dependencies\n", |
| "%pip install sf-hamilton[visualization] dlt" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# dlt plugin for Hamilton [](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/dlt/dlt_plugin.ipynb) [](https://github.com/dagworks-inc/hamilton/blob/main/examples/dlt/dlt_plugin.ipynb)\n", |
| "\n", |
| "This notebook shows how to use Hamilton [materializers](https://hamilton.dagworks.io/en/latest/concepts/materialization/) to move data between Hamilton and dlt.\n", |
| "\n", |
| "Content:\n", |
| "1. Defining an illustrative Hamilton dataflow\n", |
| "2. `DataSaver`: save Hamilton results to a [dlt Destination](https://dlthub.com/docs/dlt-ecosystem/destinations/)\n", |
| "3. `DataLoader`: load data from a [dlt Resource](https://dlthub.com/docs/dlt-ecosystem/verified-sources/) (a single table from a Source) into a Hamilton node" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 1, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "%load_ext hamilton.plugins.jupyter_magic\n", |
| "\n", |
| "import dlt\n", |
| "from hamilton import driver\n", |
| "from hamilton.io.materialization import to, from_\n", |
| "from hamilton.plugins import dlt_extensions" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 2, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "data": { |
| "image/svg+xml": [ |
| "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", |
| "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", |
| " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", |
| "<!-- Generated by graphviz version 2.43.0 (0)\n", |
| " -->\n", |
| "<!-- Title: %3 Pages: 1 -->\n", |
| "<svg width=\"334pt\" height=\"367pt\"\n", |
| " viewBox=\"0.00 0.00 334.00 367.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", |
| "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 363)\">\n", |
| "<title>%3</title>\n", |
| "<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-363 330,-363 330,4 -4,4\"/>\n", |
| "<g id=\"clust1\" class=\"cluster\">\n", |
| "<title>cluster__legend</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" points=\"36.5,-219 36.5,-351 132.5,-351 132.5,-219 36.5,-219\"/>\n", |
| "<text text-anchor=\"middle\" x=\"84.5\" y=\"-335.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n", |
| "</g>\n", |
| "<!-- table -->\n", |
| "<g id=\"node1\" class=\"node\">\n", |
| "<title>table</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M122,-64C122,-64 47,-64 47,-64 41,-64 35,-58 35,-52 35,-52 35,-12 35,-12 35,-6 41,0 47,0 47,0 122,0 122,0 128,0 134,-6 134,-12 134,-12 134,-52 134,-52 134,-58 128,-64 122,-64\"/>\n", |
| "<text text-anchor=\"start\" x=\"64\" y=\"-42.8\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">table</text>\n", |
| "<text text-anchor=\"start\" x=\"46\" y=\"-14.8\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- polars_table -->\n", |
| "<g id=\"node2\" class=\"node\">\n", |
| "<title>polars_table</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M132,-146C132,-146 37,-146 37,-146 31,-146 25,-140 25,-134 25,-134 25,-94 25,-94 25,-88 31,-82 37,-82 37,-82 132,-82 132,-82 138,-82 144,-88 144,-94 144,-94 144,-134 144,-134 144,-140 138,-146 132,-146\"/>\n", |
| "<text text-anchor=\"start\" x=\"36\" y=\"-124.8\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">polars_table</text>\n", |
| "<text text-anchor=\"start\" x=\"46\" y=\"-96.8\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- print_df_head -->\n", |
| "<g id=\"node3\" class=\"node\">\n", |
| "<title>print_df_head</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M314,-219C314,-219 210,-219 210,-219 204,-219 198,-213 198,-207 198,-207 198,-167 198,-167 198,-161 204,-155 210,-155 210,-155 314,-155 314,-155 320,-155 326,-161 326,-167 326,-167 326,-207 326,-207 326,-213 320,-219 314,-219\"/>\n", |
| "<text text-anchor=\"start\" x=\"209\" y=\"-197.8\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">print_df_head</text>\n", |
| "<text text-anchor=\"start\" x=\"223.5\" y=\"-169.8\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- _print_df_head_inputs -->\n", |
| "<g id=\"node4\" class=\"node\">\n", |
| "<title>_print_df_head_inputs</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"169,-209.5 0,-209.5 0,-164.5 169,-164.5 169,-209.5\"/>\n", |
| "<text text-anchor=\"start\" x=\"15.5\" y=\"-182.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">external</text>\n", |
| "<text text-anchor=\"start\" x=\"78.5\" y=\"-182.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- _print_df_head_inputs->print_df_head -->\n", |
| "<g id=\"edge1\" class=\"edge\">\n", |
| "<title>_print_df_head_inputs->print_df_head</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M169.14,-187C175.36,-187 181.61,-187 187.76,-187\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"188,-190.5 198,-187 188,-183.5 188,-190.5\"/>\n", |
| "</g>\n", |
| "<!-- input -->\n", |
| "<g id=\"node5\" class=\"node\">\n", |
| "<title>input</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"114,-319.5 55,-319.5 55,-282.5 114,-282.5 114,-319.5\"/>\n", |
| "<text text-anchor=\"middle\" x=\"84.5\" y=\"-297.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n", |
| "</g>\n", |
| "<!-- function -->\n", |
| "<g id=\"node6\" class=\"node\">\n", |
| "<title>function</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M112.5,-264.5C112.5,-264.5 56.5,-264.5 56.5,-264.5 50.5,-264.5 44.5,-258.5 44.5,-252.5 44.5,-252.5 44.5,-239.5 44.5,-239.5 44.5,-233.5 50.5,-227.5 56.5,-227.5 56.5,-227.5 112.5,-227.5 112.5,-227.5 118.5,-227.5 124.5,-233.5 124.5,-239.5 124.5,-239.5 124.5,-252.5 124.5,-252.5 124.5,-258.5 118.5,-264.5 112.5,-264.5\"/>\n", |
| "<text text-anchor=\"middle\" x=\"84.5\" y=\"-242.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n", |
| "</g>\n", |
| "</g>\n", |
| "</svg>\n" |
| ], |
| "text/plain": [ |
| "<graphviz.graphs.Digraph at 0x7f47dee61360>" |
| ] |
| }, |
| "execution_count": 2, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%%cell_to_module -m my_module -d\n", |
| "import pandas as pd\n", |
| "import polars as pl\n", |
| "\n", |
| "def table() -> pd.DataFrame:\n", |
| " return pd.DataFrame([{\"C\": 1}, {\"C\": 2}])\n", |
| "\n", |
| "def polars_table() -> pl.DataFrame:\n", |
| " return pl.DataFrame([{\"C\": 1}, {\"C\": 2}])\n", |
| "\n", |
| "def print_df_head(external: pd.DataFrame) -> pd.DataFrame:\n", |
| " print(\"from print_df_head:\\n\", external.head())" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 3, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "dr = driver.Builder().with_modules(my_module).build()" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## DataSaver\n", |
| "With \"Extract, Transform, Load\" (ETL) as frame of reference, here, the Hamilton dataflow is responsible for Transform, and `DltDestination` for Load.\n", |
| "\n", |
| "\n", |
| "Start by defining a dlt `Pipeline` that uses your chosen dlt Destination. This is regular dlt code that you will pass to Hamilton." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 4, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "saver_pipeline = dlt.pipeline(pipeline_name=\"saver_pipe\", destination=\"duckdb\")" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "### Single dependency\n", |
| "Define the materializer with `to.dlt()` the example below shows required arguments. You specify an `id` for the materializer and `dependencies` includes the name of a single Hamilton node. Then, specify a `table_name` for the destination and pass the `pipeline`. \n", |
| "\n", |
| "The [other keyword arguments](https://dlthub.com/docs/api_reference/pipeline/__init__#run) for `dlt.pipeline.run()` are accepted and allow specifying incremental loading, table schema annotation, and more." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 5, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "{'dlt_metadata': {'pipeline': {'pipeline_name': 'saver_pipe'}, 'metrics': [{'started_at': DateTime(2024, 4, 17, 20, 22, 7, 283298, tzinfo=Timezone('UTC')), 'finished_at': DateTime(2024, 4, 17, 20, 22, 7, 453053, tzinfo=Timezone('UTC')), 'load_id': '1713385326.9071813'}], 'destination_type': 'dlt.destinations.duckdb', 'destination_displayable_credentials': 'duckdb:////home/tjean/projects/dagworks/hamilton/examples/dlt/saver_pipe.duckdb', 'destination_name': 'duckdb', 'environment': None, 'staging_type': None, 'staging_name': None, 'staging_displayable_credentials': None, 'destination_fingerprint': '', 'dataset_name': 'saver_pipe_dataset', 'loads_ids': ['1713385326.9071813'], 'load_packages': [{'load_id': '1713385326.9071813', 'package_path': '/home/tjean/.dlt/pipelines/saver_pipe/load/loaded/1713385326.9071813', 'state': 'loaded', 'completed_at': DateTime(2024, 4, 17, 20, 22, 7, 435481, tzinfo=Timezone('UTC')), 'jobs': [{'state': 'completed_jobs', 'file_path': '/home/tjean/.dlt/pipelines/saver_pipe/load/loaded/1713385326.9071813/completed_jobs/my_table.777bd2e418.0.parquet', 'file_size': 574, 'created_at': DateTime(2024, 4, 17, 20, 22, 6, 915481, tzinfo=Timezone('UTC')), 'elapsed': 0.5199999809265137, 'failed_message': None, 'table_name': 'my_table', 'file_id': '777bd2e418', 'retry_count': 0, 'file_format': 'parquet'}], 'schema_hash': 'UE8l1iVz3xnHM+zYpjm8Bqd+3m6rDG++zNubWIUyecg=', 'schema_name': 'saver_pipe', 'tables': []}], 'first_run': False, 'started_at': DateTime(2024, 4, 17, 20, 22, 7, 283298, tzinfo=Timezone('UTC')), 'finished_at': DateTime(2024, 4, 17, 20, 22, 7, 453053, tzinfo=Timezone('UTC'))}}\n" |
| ] |
| }, |
| { |
| "data": { |
| "image/svg+xml": [ |
| "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", |
| "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", |
| " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", |
| "<!-- Generated by graphviz version 2.43.0 (0)\n", |
| " -->\n", |
| "<!-- Title: %3 Pages: 1 -->\n", |
| "<svg width=\"319pt\" height=\"291pt\"\n", |
| " viewBox=\"0.00 0.00 319.00 291.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", |
| "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 287)\">\n", |
| "<title>%3</title>\n", |
| "<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-287 315,-287 315,4 -4,4\"/>\n", |
| "<g id=\"clust1\" class=\"cluster\">\n", |
| "<title>cluster__legend</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" points=\"8,-86 8,-275 129,-275 129,-86 8,-86\"/>\n", |
| "<text text-anchor=\"middle\" x=\"68.5\" y=\"-259.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n", |
| "</g>\n", |
| "<!-- table -->\n", |
| "<g id=\"node1\" class=\"node\">\n", |
| "<title>table</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M106,-76C106,-76 31,-76 31,-76 25,-76 19,-70 19,-64 19,-64 19,-24 19,-24 19,-18 25,-12 31,-12 31,-12 106,-12 106,-12 112,-12 118,-18 118,-24 118,-24 118,-64 118,-64 118,-70 112,-76 106,-76\"/>\n", |
| "<text text-anchor=\"start\" x=\"48\" y=\"-54.8\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">table</text>\n", |
| "<text text-anchor=\"start\" x=\"30\" y=\"-26.8\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- saver_node -->\n", |
| "<g id=\"node2\" class=\"node\">\n", |
| "<title>saver_node</title>\n", |
| "<path fill=\"#ffc857\" stroke=\"black\" d=\"M311,-80C311,-84.41 274.92,-88 230.5,-88 186.08,-88 150,-84.41 150,-80 150,-80 150,-8 150,-8 150,-3.59 186.08,0 230.5,0 274.92,0 311,-3.59 311,-8 311,-8 311,-80 311,-80\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M311,-80C311,-75.59 274.92,-72 230.5,-72 186.08,-72 150,-75.59 150,-80\"/>\n", |
| "<text text-anchor=\"start\" x=\"185.5\" y=\"-54.8\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">saver_node</text>\n", |
| "<text text-anchor=\"start\" x=\"161\" y=\"-26.8\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DltDestinationSaver</text>\n", |
| "</g>\n", |
| "<!-- table->saver_node -->\n", |
| "<g id=\"edge1\" class=\"edge\">\n", |
| "<title>table->saver_node</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M118.37,-44C125.26,-44 132.51,-44 139.87,-44\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"139.96,-47.5 149.96,-44 139.96,-40.5 139.96,-47.5\"/>\n", |
| "</g>\n", |
| "<!-- function -->\n", |
| "<g id=\"node3\" class=\"node\">\n", |
| "<title>function</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M96.5,-243.5C96.5,-243.5 40.5,-243.5 40.5,-243.5 34.5,-243.5 28.5,-237.5 28.5,-231.5 28.5,-231.5 28.5,-218.5 28.5,-218.5 28.5,-212.5 34.5,-206.5 40.5,-206.5 40.5,-206.5 96.5,-206.5 96.5,-206.5 102.5,-206.5 108.5,-212.5 108.5,-218.5 108.5,-218.5 108.5,-231.5 108.5,-231.5 108.5,-237.5 102.5,-243.5 96.5,-243.5\"/>\n", |
| "<text text-anchor=\"middle\" x=\"68.5\" y=\"-221.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n", |
| "</g>\n", |
| "<!-- output -->\n", |
| "<g id=\"node4\" class=\"node\">\n", |
| "<title>output</title>\n", |
| "<path fill=\"#ffc857\" stroke=\"black\" d=\"M90.5,-188.5C90.5,-188.5 46.5,-188.5 46.5,-188.5 40.5,-188.5 34.5,-182.5 34.5,-176.5 34.5,-176.5 34.5,-163.5 34.5,-163.5 34.5,-157.5 40.5,-151.5 46.5,-151.5 46.5,-151.5 90.5,-151.5 90.5,-151.5 96.5,-151.5 102.5,-157.5 102.5,-163.5 102.5,-163.5 102.5,-176.5 102.5,-176.5 102.5,-182.5 96.5,-188.5 90.5,-188.5\"/>\n", |
| "<text text-anchor=\"middle\" x=\"68.5\" y=\"-166.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">output</text>\n", |
| "</g>\n", |
| "<!-- materializer -->\n", |
| "<g id=\"node5\" class=\"node\">\n", |
| "<title>materializer</title>\n", |
| "<path fill=\"#ffffff\" stroke=\"black\" d=\"M121,-130.26C121,-132.26 97.47,-133.88 68.5,-133.88 39.53,-133.88 16,-132.26 16,-130.26 16,-130.26 16,-97.74 16,-97.74 16,-95.74 39.53,-94.12 68.5,-94.12 97.47,-94.12 121,-95.74 121,-97.74 121,-97.74 121,-130.26 121,-130.26\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M121,-130.26C121,-128.27 97.47,-126.65 68.5,-126.65 39.53,-126.65 16,-128.27 16,-130.26\"/>\n", |
| "<text text-anchor=\"middle\" x=\"68.5\" y=\"-110.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">materializer</text>\n", |
| "</g>\n", |
| "</g>\n", |
| "</svg>\n" |
| ], |
| "text/plain": [ |
| "<graphviz.graphs.Digraph at 0x7f47a81ebe80>" |
| ] |
| }, |
| "execution_count": 5, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "materializers = [\n", |
| " to.dlt(\n", |
| " id=\"saver_node\",\n", |
| " dependencies=[\"table\"],\n", |
| " table_name=\"my_table\",\n", |
| " pipeline=saver_pipeline,\n", |
| " )\n", |
| "]\n", |
| "results, _ = dr.materialize(*materializers)\n", |
| "print(results[\"saver_node\"])\n", |
| "dr.visualize_materialization(*materializers)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "### Alternative dataframe libraries\n", |
| "By default, dlt only supports Python `Iterable` of records (e.g., JSON objects), pandas (`pd.DataFrame`) and pyarrow (`pyarrow.Table`, `pyarrow.BatchedRecords`). To save a polars, dask, vaex, velox, or duckdb object, you would need to convert it to a supported type first.\n", |
| "\n", |
| "Hamilton provides adapter to make the process easy! Simply add the adapter to the `combine=` keyword of the data saver." |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 7, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "{'dlt_metadata': {'pipeline': {'pipeline_name': 'saver_pipe'}, 'metrics': [{'started_at': DateTime(2024, 4, 17, 20, 22, 24, 280884, tzinfo=Timezone('UTC')), 'finished_at': DateTime(2024, 4, 17, 20, 22, 24, 447750, tzinfo=Timezone('UTC')), 'load_id': '1713385343.9070144'}], 'destination_type': 'dlt.destinations.duckdb', 'destination_displayable_credentials': 'duckdb:////home/tjean/projects/dagworks/hamilton/examples/dlt/saver_pipe.duckdb', 'destination_name': 'duckdb', 'environment': None, 'staging_type': None, 'staging_name': None, 'staging_displayable_credentials': None, 'destination_fingerprint': '', 'dataset_name': 'saver_pipe_dataset', 'loads_ids': ['1713385343.9070144'], 'load_packages': [{'load_id': '1713385343.9070144', 'package_path': '/home/tjean/.dlt/pipelines/saver_pipe/load/loaded/1713385343.9070144', 'state': 'loaded', 'completed_at': DateTime(2024, 4, 17, 20, 22, 24, 425481, tzinfo=Timezone('UTC')), 'jobs': [{'state': 'completed_jobs', 'file_path': '/home/tjean/.dlt/pipelines/saver_pipe/load/loaded/1713385343.9070144/completed_jobs/my_polars_table.a4e2d05d46.0.parquet', 'file_size': 574, 'created_at': DateTime(2024, 4, 17, 20, 22, 23, 915481, tzinfo=Timezone('UTC')), 'elapsed': 0.5099999904632568, 'failed_message': None, 'table_name': 'my_polars_table', 'file_id': 'a4e2d05d46', 'retry_count': 0, 'file_format': 'parquet'}], 'schema_hash': '4ezuw/Ke94mRLdyi/MbomA4EPL+AciFUjmfshpA07dU=', 'schema_name': 'saver_pipe', 'tables': []}], 'first_run': False, 'started_at': DateTime(2024, 4, 17, 20, 22, 24, 280884, tzinfo=Timezone('UTC')), 'finished_at': DateTime(2024, 4, 17, 20, 22, 24, 447750, tzinfo=Timezone('UTC'))}}\n" |
| ] |
| }, |
| { |
| "data": { |
| "image/svg+xml": [ |
| "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", |
| "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", |
| " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", |
| "<!-- Generated by graphviz version 2.43.0 (0)\n", |
| " -->\n", |
| "<!-- Title: %3 Pages: 1 -->\n", |
| "<svg width=\"626pt\" height=\"291pt\"\n", |
| " viewBox=\"0.00 0.00 626.00 291.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", |
| "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 287)\">\n", |
| "<title>%3</title>\n", |
| "<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-287 622,-287 622,4 -4,4\"/>\n", |
| "<g id=\"clust1\" class=\"cluster\">\n", |
| "<title>cluster__legend</title>\n", |
| "<polygon fill=\"#ffffff\" stroke=\"black\" points=\"8,-86 8,-275 129,-275 129,-86 8,-86\"/>\n", |
| "<text text-anchor=\"middle\" x=\"68.5\" y=\"-259.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n", |
| "</g>\n", |
| "<!-- polars_saver_node_build_result -->\n", |
| "<g id=\"node1\" class=\"node\">\n", |
| "<title>polars_saver_node_build_result</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M410,-76C410,-76 169,-76 169,-76 163,-76 157,-70 157,-64 157,-64 157,-24 157,-24 157,-18 163,-12 169,-12 169,-12 410,-12 410,-12 416,-12 422,-18 422,-24 422,-24 422,-64 422,-64 422,-70 416,-76 410,-76\"/>\n", |
| "<text text-anchor=\"start\" x=\"168\" y=\"-54.8\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">polars_saver_node_build_result</text>\n", |
| "<text text-anchor=\"start\" x=\"271\" y=\"-26.8\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Table</text>\n", |
| "</g>\n", |
| "<!-- polars_saver_node -->\n", |
| "<g id=\"node3\" class=\"node\">\n", |
| "<title>polars_saver_node</title>\n", |
| "<path fill=\"#ffc857\" stroke=\"black\" d=\"M618,-80C618,-84.41 580.57,-88 534.5,-88 488.43,-88 451,-84.41 451,-80 451,-80 451,-8 451,-8 451,-3.59 488.43,0 534.5,0 580.57,0 618,-3.59 618,-8 618,-8 618,-80 618,-80\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M618,-80C618,-75.59 580.57,-72 534.5,-72 488.43,-72 451,-75.59 451,-80\"/>\n", |
| "<text text-anchor=\"start\" x=\"462\" y=\"-54.8\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">polars_saver_node</text>\n", |
| "<text text-anchor=\"start\" x=\"465\" y=\"-26.8\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DltDestinationSaver</text>\n", |
| "</g>\n", |
| "<!-- polars_saver_node_build_result->polars_saver_node -->\n", |
| "<g id=\"edge2\" class=\"edge\">\n", |
| "<title>polars_saver_node_build_result->polars_saver_node</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M422.31,-44C428.53,-44 434.71,-44 440.79,-44\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"440.92,-47.5 450.92,-44 440.92,-40.5 440.92,-47.5\"/>\n", |
| "</g>\n", |
| "<!-- polars_table -->\n", |
| "<g id=\"node2\" class=\"node\">\n", |
| "<title>polars_table</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M116,-76C116,-76 21,-76 21,-76 15,-76 9,-70 9,-64 9,-64 9,-24 9,-24 9,-18 15,-12 21,-12 21,-12 116,-12 116,-12 122,-12 128,-18 128,-24 128,-24 128,-64 128,-64 128,-70 122,-76 116,-76\"/>\n", |
| "<text text-anchor=\"start\" x=\"20\" y=\"-54.8\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">polars_table</text>\n", |
| "<text text-anchor=\"start\" x=\"30\" y=\"-26.8\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n", |
| "</g>\n", |
| "<!-- polars_table->polars_saver_node_build_result -->\n", |
| "<g id=\"edge1\" class=\"edge\">\n", |
| "<title>polars_table->polars_saver_node_build_result</title>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M128.19,-44C134.09,-44 140.25,-44 146.56,-44\"/>\n", |
| "<polygon fill=\"black\" stroke=\"black\" points=\"146.6,-47.5 156.6,-44 146.6,-40.5 146.6,-47.5\"/>\n", |
| "</g>\n", |
| "<!-- function -->\n", |
| "<g id=\"node4\" class=\"node\">\n", |
| "<title>function</title>\n", |
| "<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M96.5,-243.5C96.5,-243.5 40.5,-243.5 40.5,-243.5 34.5,-243.5 28.5,-237.5 28.5,-231.5 28.5,-231.5 28.5,-218.5 28.5,-218.5 28.5,-212.5 34.5,-206.5 40.5,-206.5 40.5,-206.5 96.5,-206.5 96.5,-206.5 102.5,-206.5 108.5,-212.5 108.5,-218.5 108.5,-218.5 108.5,-231.5 108.5,-231.5 108.5,-237.5 102.5,-243.5 96.5,-243.5\"/>\n", |
| "<text text-anchor=\"middle\" x=\"68.5\" y=\"-221.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n", |
| "</g>\n", |
| "<!-- output -->\n", |
| "<g id=\"node5\" class=\"node\">\n", |
| "<title>output</title>\n", |
| "<path fill=\"#ffc857\" stroke=\"black\" d=\"M90.5,-188.5C90.5,-188.5 46.5,-188.5 46.5,-188.5 40.5,-188.5 34.5,-182.5 34.5,-176.5 34.5,-176.5 34.5,-163.5 34.5,-163.5 34.5,-157.5 40.5,-151.5 46.5,-151.5 46.5,-151.5 90.5,-151.5 90.5,-151.5 96.5,-151.5 102.5,-157.5 102.5,-163.5 102.5,-163.5 102.5,-176.5 102.5,-176.5 102.5,-182.5 96.5,-188.5 90.5,-188.5\"/>\n", |
| "<text text-anchor=\"middle\" x=\"68.5\" y=\"-166.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">output</text>\n", |
| "</g>\n", |
| "<!-- materializer -->\n", |
| "<g id=\"node6\" class=\"node\">\n", |
| "<title>materializer</title>\n", |
| "<path fill=\"#ffffff\" stroke=\"black\" d=\"M121,-130.26C121,-132.26 97.47,-133.88 68.5,-133.88 39.53,-133.88 16,-132.26 16,-130.26 16,-130.26 16,-97.74 16,-97.74 16,-95.74 39.53,-94.12 68.5,-94.12 97.47,-94.12 121,-95.74 121,-97.74 121,-97.74 121,-130.26 121,-130.26\"/>\n", |
| "<path fill=\"none\" stroke=\"black\" d=\"M121,-130.26C121,-128.27 97.47,-126.65 68.5,-126.65 39.53,-126.65 16,-128.27 16,-130.26\"/>\n", |
| "<text text-anchor=\"middle\" x=\"68.5\" y=\"-110.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">materializer</text>\n", |
| "</g>\n", |
| "</g>\n", |
| "</svg>\n" |
| ], |
| "text/plain": [ |
| "<graphviz.graphs.Digraph at 0x7f4799119b10>" |
| ] |
| }, |
| "execution_count": 7, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "from hamilton.plugins import h_pyarrow\n", |
| "\n", |
| "materializers = [\n", |
| " to.dlt(\n", |
| " id=\"polars_saver_node\",\n", |
| " dependencies=[\"polars_table\"],\n", |
| " combine=h_pyarrow.PyarrowTableResult(),\n", |
| " table_name=\"my_polars_table\",\n", |
| " pipeline=saver_pipeline,\n", |
| " )\n", |
| "]\n", |
| "results, _ = dr.materialize(*materializers)\n", |
| "print(results[\"polars_saver_node\"])\n", |
| "dr.visualize_materialization(*materializers)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## DataLoader\n", |
| "With ETL as a frame of reference, the `DataLoader` uses dlt to run the \"Extract\" step for the passed dlt `Resource`. \n", |
| "\n", |
| "Internally, it creates a temporary dlt Pipeline to run the extract and normalize steps then reads the files in-memory. The dlt Pipeline is then deleted. " |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 8, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# this is a mock dlt Source for demo purposes\n", |
| "@dlt.source\n", |
| "def mock_source():\n", |
| " iterable_data = [{\"col\": 1}, {\"col\": 2}, {\"col\": 3}] * 100\n", |
| " \n", |
| " @dlt.resource\n", |
| " def mock_resource():\n", |
| " yield from iterable_data\n", |
| " \n", |
| " yield mock_resource\n", |
| " \n", |
| "my_mock_source = mock_source()" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "### Single resource\n", |
| "To define the materializer, give it a `target` Hamilton node and pass a dlt Resource to `resource`. When working with a dlt Source, you can access individual resources via the dictionary `Source.resource[RESOURCE_NAME]`" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 9, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "from print_df_head:\n", |
| " col _dlt_load_id _dlt_id\n", |
| "0 1 1713385353.1057432 nV52FbDDaG8Hng\n", |
| "1 2 1713385353.1057432 1PnuRBfd/pFmbg\n", |
| "2 3 1713385353.1057432 E29IvCLX2o0hBw\n", |
| "3 1 1713385353.1057432 PHnW5pOvp3WRmA\n", |
| "4 2 1713385353.1057432 oRTCJeKpMP2OCQ\n" |
| ] |
| } |
| ], |
| "source": [ |
| "materializers = [\n", |
| " from_.dlt(\n", |
| " target=\"external\",\n", |
| " resource=my_mock_source.resources[\"mock_resource\"],\n", |
| " ),\n", |
| "]\n", |
| "\n", |
| "metadata, _ = dr.materialize(\n", |
| " *materializers,\n", |
| " additional_vars=[\"print_df_head\"]\n", |
| ")" |
| ] |
| } |
| ], |
| "metadata": { |
| "kernelspec": { |
| "display_name": "venv", |
| "language": "python", |
| "name": "python3" |
| }, |
| "language_info": { |
| "codemirror_mode": { |
| "name": "ipython", |
| "version": 3 |
| }, |
| "file_extension": ".py", |
| "mimetype": "text/x-python", |
| "name": "python", |
| "nbconvert_exporter": "python", |
| "pygments_lexer": "ipython3", |
| "version": "3.10.9" |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 2 |
| } |