| { |
| "cells": [ |
| { |
| "cell_type": "markdown", |
| "source": [ |
| "# Lineage Code Snippets\n", |
| "Here is a notebook that shows you code snippets that you can use for lineage purposes. It uses the Hamilton code operating over the Titanic dataset for demonstration purposes." |
| ], |
| "metadata": { |
| "collapsed": false |
| } |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 1, |
| "outputs": [], |
| "source": [ |
| "from hamilton import base\n", |
| "from hamilton import driver\n", |
| "\n", |
| "# modules that house the Hamilton code we build the lineage/DAG from.\n", |
| "import data_loading\n", |
| "import features\n", |
| "import model_pipeline\n", |
| "import sets\n" |
| ], |
| "metadata": { |
| "collapsed": false, |
| "ExecuteTime": { |
| "end_time": "2023-11-07T23:50:19.946421Z", |
| "start_time": "2023-11-07T23:50:14.877286Z" |
| } |
| } |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 2, |
| "outputs": [ |
| { |
| "name": "stderr", |
| "output_type": "stream", |
| "text": [ |
| "Note: Hamilton collects completely anonymous data about usage. This will help us improve Hamilton over time. See https://github.com/dagworks-inc/hamilton#usage-analytics--data-privacy for details.\n" |
| ] |
| } |
| ], |
| "source": [ |
| "# Determine configuration for creating the DAG.\n", |
| "config = {} # This example has no configuration that changes the DAG/lineage shape.\n", |
| "# instantiate the driver\n", |
| "adapter = base.DefaultAdapter()\n", |
| "dr = driver.Driver(config, data_loading, features, sets, model_pipeline, adapter=adapter)\n" |
| ], |
| "metadata": { |
| "collapsed": false, |
| "ExecuteTime": { |
| "end_time": "2023-11-07T23:50:19.981600Z", |
| "start_time": "2023-11-07T23:50:19.955048Z" |
| } |
| } |
| }, |
| { |
| "cell_type": "markdown", |
| "source": [ |
| "# Display everything\n", |
| "Useful for a global overview." |
| ], |
| "metadata": { |
| "collapsed": false |
| } |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 3, |
| "outputs": [ |
| { |
| "data": { |
| "image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 8.0.5 (20230430.1635)\n -->\n<!-- Pages: 1 -->\n<svg width=\"1544pt\" height=\"1091pt\"\n viewBox=\"0.00 0.00 1543.90 1090.60\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 1086.6)\">\n<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-1086.6 1539.9,-1086.6 1539.9,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster__legend</title>\n<polygon fill=\"none\" stroke=\"black\" points=\"11.38,-601.8 11.38,-731.8 96.22,-731.8 96.22,-601.8 11.38,-601.8\"/>\n<text text-anchor=\"middle\" x=\"53.8\" y=\"-714.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n</g>\n<!-- embarked_category -->\n<g id=\"node1\" class=\"node\">\n<title>embarked_category</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M718.5,-279.6C718.5,-279.6 592.65,-279.6 592.65,-279.6 586.65,-279.6 580.65,-273.6 580.65,-267.6 580.65,-267.6 580.65,-228 580.65,-228 580.65,-222 586.65,-216 592.65,-216 592.65,-216 718.5,-216 718.5,-216 724.5,-216 730.5,-222 730.5,-228 730.5,-228 730.5,-267.6 730.5,-267.6 730.5,-273.6 724.5,-279.6 718.5,-279.6\"/>\n<text text-anchor=\"start\" x=\"591.45\" y=\"-256.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">embarked_category</text>\n<text text-anchor=\"start\" x=\"636.45\" y=\"-228.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- training_set_v1 -->\n<g id=\"node17\" class=\"node\">\n<title>training_set_v1</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M908.1,-361.6C908.1,-361.6 811.5,-361.6 811.5,-361.6 805.5,-361.6 799.5,-355.6 799.5,-349.6 799.5,-349.6 799.5,-310 799.5,-310 799.5,-304 805.5,-298 811.5,-298 811.5,-298 908.1,-298 908.1,-298 914.1,-298 920.1,-304 920.1,-310 920.1,-310 920.1,-349.6 920.1,-349.6 920.1,-355.6 914.1,-361.6 908.1,-361.6\"/>\n<text text-anchor=\"start\" x=\"810.3\" y=\"-338.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">training_set_v1</text>\n<text text-anchor=\"start\" x=\"825.67\" y=\"-310.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- embarked_category->training_set_v1 -->\n<g id=\"edge30\" class=\"edge\">\n<title>embarked_category->training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M730.73,-277.87C749.84,-285.62 770.33,-293.93 789.2,-301.58\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"787.62,-305.12 798.2,-305.63 790.25,-298.63 787.62,-305.12\"/>\n</g>\n<!-- cabin_t -->\n<g id=\"node2\" class=\"node\">\n<title>cabin_t</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M501.02,-530.6C501.02,-530.6 456.17,-530.6 456.17,-530.6 450.17,-530.6 444.17,-524.6 444.17,-518.6 444.17,-518.6 444.17,-479 444.17,-479 444.17,-473 450.17,-467 456.17,-467 456.17,-467 501.02,-467 501.02,-467 507.02,-467 513.02,-473 513.02,-479 513.02,-479 513.02,-518.6 513.02,-518.6 513.02,-524.6 507.02,-530.6 501.02,-530.6\"/>\n<text text-anchor=\"start\" x=\"454.97\" y=\"-507.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">cabin_t</text>\n<text text-anchor=\"start\" x=\"459.47\" y=\"-479.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- target -->\n<g id=\"node3\" class=\"node\">\n<title>target</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M343.92,-1082.6C343.92,-1082.6 308.07,-1082.6 308.07,-1082.6 302.07,-1082.6 296.07,-1076.6 296.07,-1070.6 296.07,-1070.6 296.07,-1031 296.07,-1031 296.07,-1025 302.07,-1019 308.07,-1019 308.07,-1019 343.92,-1019 343.92,-1019 349.92,-1019 355.92,-1025 355.92,-1031 355.92,-1031 355.92,-1070.6 355.92,-1070.6 355.92,-1076.6 349.92,-1082.6 343.92,-1082.6\"/>\n<text text-anchor=\"start\" x=\"307.25\" y=\"-1059.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">target</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-1031.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- train_test_split_func -->\n<g id=\"node10\" class=\"node\">\n<title>train_test_split_func</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1150.7,-361.6C1150.7,-361.6 1021.1,-361.6 1021.1,-361.6 1015.1,-361.6 1009.1,-355.6 1009.1,-349.6 1009.1,-349.6 1009.1,-310 1009.1,-310 1009.1,-304 1015.1,-298 1021.1,-298 1021.1,-298 1150.7,-298 1150.7,-298 1156.7,-298 1162.7,-304 1162.7,-310 1162.7,-310 1162.7,-349.6 1162.7,-349.6 1162.7,-355.6 1156.7,-361.6 1150.7,-361.6\"/>\n<text text-anchor=\"start\" x=\"1019.9\" y=\"-338.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">train_test_split_func</text>\n<text text-anchor=\"start\" x=\"1075.4\" y=\"-310.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">dict</text>\n</g>\n<!-- target->train_test_split_func -->\n<g id=\"edge15\" class=\"edge\">\n<title>target->train_test_split_func</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.23,-1051.85C401.43,-1052.14 489.54,-1047.3 551.65,-1009.8 821.8,-846.71 1008.01,-492.57 1065.84,-371.42\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1069.3,-373.3 1070.41,-362.76 1062.97,-370.3 1069.3,-373.3\"/>\n</g>\n<!-- titanic_data -->\n<g id=\"node4\" class=\"node\">\n<title>titanic_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M227.82,-590.6C227.82,-590.6 155.22,-590.6 155.22,-590.6 149.22,-590.6 143.22,-584.6 143.22,-578.6 143.22,-578.6 143.22,-539 143.22,-539 143.22,-533 149.22,-527 155.22,-527 155.22,-527 227.82,-527 227.82,-527 233.82,-527 239.82,-533 239.82,-539 239.82,-539 239.82,-578.6 239.82,-578.6 239.82,-584.6 233.82,-590.6 227.82,-590.6\"/>\n<text text-anchor=\"start\" x=\"154.02\" y=\"-567.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">titanic_data</text>\n<text text-anchor=\"start\" x=\"157.4\" y=\"-539.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- titanic_data->target -->\n<g id=\"edge4\" class=\"edge\">\n<title>titanic_data->target</title>\n<path fill=\"none\" stroke=\"black\" d=\"M195.56,-590.92C204.58,-681.63 233.24,-937.88 275.45,-1009.8 278.67,-1015.29 282.92,-1020.37 287.57,-1024.95\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"284.88,-1027.13 294.67,-1031.16 289.53,-1021.9 284.88,-1027.13\"/>\n</g>\n<!-- parch -->\n<g id=\"node6\" class=\"node\">\n<title>parch</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M343.92,-672.6C343.92,-672.6 308.07,-672.6 308.07,-672.6 302.07,-672.6 296.07,-666.6 296.07,-660.6 296.07,-660.6 296.07,-621 296.07,-621 296.07,-615 302.07,-609 308.07,-609 308.07,-609 343.92,-609 343.92,-609 349.92,-609 355.92,-615 355.92,-621 355.92,-621 355.92,-660.6 355.92,-660.6 355.92,-666.6 349.92,-672.6 343.92,-672.6\"/>\n<text text-anchor=\"start\" x=\"307.62\" y=\"-649.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">parch</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-621.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data->parch -->\n<g id=\"edge8\" class=\"edge\">\n<title>titanic_data->parch</title>\n<path fill=\"none\" stroke=\"black\" d=\"M239.98,-588.17C255.2,-597.6 271.87,-607.91 286.39,-616.9\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"284.23,-620.3 294.58,-622.59 287.91,-614.35 284.23,-620.3\"/>\n</g>\n<!-- name -->\n<g id=\"node13\" class=\"node\">\n<title>name</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M343.92,-63.6C343.92,-63.6 308.07,-63.6 308.07,-63.6 302.07,-63.6 296.07,-57.6 296.07,-51.6 296.07,-51.6 296.07,-12 296.07,-12 296.07,-6 302.07,0 308.07,0 308.07,0 343.92,0 343.92,0 349.92,0 355.92,-6 355.92,-12 355.92,-12 355.92,-51.6 355.92,-51.6 355.92,-57.6 349.92,-63.6 343.92,-63.6\"/>\n<text text-anchor=\"start\" x=\"308\" y=\"-40.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">name</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-12.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data->name -->\n<g id=\"edge21\" class=\"edge\">\n<title>titanic_data->name</title>\n<path fill=\"none\" stroke=\"black\" d=\"M195.09,-526.61C203.16,-431.1 230.19,-151.07 275.45,-72.8 278.64,-67.29 282.86,-62.2 287.5,-57.62\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"289.47,-60.66 294.6,-51.4 284.81,-55.44 289.47,-60.66\"/>\n</g>\n<!-- pclass -->\n<g id=\"node14\" class=\"node\">\n<title>pclass</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M345.8,-145.6C345.8,-145.6 306.2,-145.6 306.2,-145.6 300.2,-145.6 294.2,-139.6 294.2,-133.6 294.2,-133.6 294.2,-94 294.2,-94 294.2,-88 300.2,-82 306.2,-82 306.2,-82 345.8,-82 345.8,-82 351.8,-82 357.8,-88 357.8,-94 357.8,-94 357.8,-133.6 357.8,-133.6 357.8,-139.6 351.8,-145.6 345.8,-145.6\"/>\n<text text-anchor=\"start\" x=\"305\" y=\"-122.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">pclass</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-94.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data->pclass -->\n<g id=\"edge22\" class=\"edge\">\n<title>titanic_data->pclass</title>\n<path fill=\"none\" stroke=\"black\" d=\"M192.11,-526.61C192.59,-454.64 201.79,-277.4 275.45,-154.8 278.29,-150.08 281.87,-145.65 285.81,-141.58\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"287.79,-144.59 292.72,-135.22 283.02,-139.47 287.79,-144.59\"/>\n</g>\n<!-- ticket -->\n<g id=\"node18\" class=\"node\">\n<title>ticket</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M343.92,-1000.6C343.92,-1000.6 308.07,-1000.6 308.07,-1000.6 302.07,-1000.6 296.07,-994.6 296.07,-988.6 296.07,-988.6 296.07,-949 296.07,-949 296.07,-943 302.07,-937 308.07,-937 308.07,-937 343.92,-937 343.92,-937 349.92,-937 355.92,-943 355.92,-949 355.92,-949 355.92,-988.6 355.92,-988.6 355.92,-994.6 349.92,-1000.6 343.92,-1000.6\"/>\n<text text-anchor=\"start\" x=\"308.37\" y=\"-977.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">ticket</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-949.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data->ticket -->\n<g id=\"edge32\" class=\"edge\">\n<title>titanic_data->ticket</title>\n<path fill=\"none\" stroke=\"black\" d=\"M192.93,-591C195.16,-658.47 207.81,-817.55 275.45,-927.8 278.74,-933.17 282.99,-938.16 287.61,-942.68\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"284.83,-944.79 294.63,-948.82 289.49,-939.56 284.83,-944.79\"/>\n</g>\n<!-- cabin -->\n<g id=\"node20\" class=\"node\">\n<title>cabin</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M343.92,-426.6C343.92,-426.6 308.07,-426.6 308.07,-426.6 302.07,-426.6 296.07,-420.6 296.07,-414.6 296.07,-414.6 296.07,-375 296.07,-375 296.07,-369 302.07,-363 308.07,-363 308.07,-363 343.92,-363 343.92,-363 349.92,-363 355.92,-369 355.92,-375 355.92,-375 355.92,-414.6 355.92,-414.6 355.92,-420.6 349.92,-426.6 343.92,-426.6\"/>\n<text text-anchor=\"start\" x=\"308.37\" y=\"-403.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">cabin</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-375.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data->cabin -->\n<g id=\"edge35\" class=\"edge\">\n<title>titanic_data->cabin</title>\n<path fill=\"none\" stroke=\"black\" d=\"M210.68,-526.68C226.14,-500.74 249.85,-464.04 275.45,-435.8 279.15,-431.72 283.27,-427.68 287.5,-423.83\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"289.46,-425.9 294.71,-416.69 284.87,-420.62 289.46,-425.9\"/>\n</g>\n<!-- fare -->\n<g id=\"node24\" class=\"node\">\n<title>fare</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M343.92,-590.6C343.92,-590.6 308.07,-590.6 308.07,-590.6 302.07,-590.6 296.07,-584.6 296.07,-578.6 296.07,-578.6 296.07,-539 296.07,-539 296.07,-533 302.07,-527 308.07,-527 308.07,-527 343.92,-527 343.92,-527 349.92,-527 355.92,-533 355.92,-539 355.92,-539 355.92,-578.6 355.92,-578.6 355.92,-584.6 349.92,-590.6 343.92,-590.6\"/>\n<text text-anchor=\"start\" x=\"313.62\" y=\"-567.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">fare</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-539.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data->fare -->\n<g id=\"edge40\" class=\"edge\">\n<title>titanic_data->fare</title>\n<path fill=\"none\" stroke=\"black\" d=\"M239.98,-558.8C254.62,-558.8 270.6,-558.8 284.72,-558.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"284.58,-562.3 294.58,-558.8 284.58,-555.3 284.58,-562.3\"/>\n</g>\n<!-- survived -->\n<g id=\"node25\" class=\"node\">\n<title>survived</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M352.55,-918.6C352.55,-918.6 299.45,-918.6 299.45,-918.6 293.45,-918.6 287.45,-912.6 287.45,-906.6 287.45,-906.6 287.45,-867 287.45,-867 287.45,-861 293.45,-855 299.45,-855 299.45,-855 352.55,-855 352.55,-855 358.55,-855 364.55,-861 364.55,-867 364.55,-867 364.55,-906.6 364.55,-906.6 364.55,-912.6 358.55,-918.6 352.55,-918.6\"/>\n<text text-anchor=\"start\" x=\"298.25\" y=\"-895.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">survived</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-867.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data->survived -->\n<g id=\"edge41\" class=\"edge\">\n<title>titanic_data->survived</title>\n<path fill=\"none\" stroke=\"black\" d=\"M195.6,-591.09C202.22,-647.08 221.84,-764.05 275.45,-845.8 276.9,-848.01 278.5,-850.15 280.21,-852.24\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"276.95,-853.95 286.31,-858.92 282.1,-849.2 276.95,-853.95\"/>\n</g>\n<!-- passengerid -->\n<g id=\"node27\" class=\"node\">\n<title>passengerid</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M364.55,-836.6C364.55,-836.6 287.45,-836.6 287.45,-836.6 281.45,-836.6 275.45,-830.6 275.45,-824.6 275.45,-824.6 275.45,-785 275.45,-785 275.45,-779 281.45,-773 287.45,-773 287.45,-773 364.55,-773 364.55,-773 370.55,-773 376.55,-779 376.55,-785 376.55,-785 376.55,-824.6 376.55,-824.6 376.55,-830.6 370.55,-836.6 364.55,-836.6\"/>\n<text text-anchor=\"start\" x=\"286.25\" y=\"-813.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">passengerid</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-785.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data->passengerid -->\n<g id=\"edge44\" class=\"edge\">\n<title>titanic_data->passengerid</title>\n<path fill=\"none\" stroke=\"black\" d=\"M200.16,-590.74C211.45,-633.26 235.65,-709.44 275.45,-763.8 275.59,-763.99 275.73,-764.18 275.87,-764.37\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"273.77,-766.31 282.9,-771.69 279.12,-761.79 273.77,-766.31\"/>\n</g>\n<!-- embarked -->\n<g id=\"node28\" class=\"node\">\n<title>embarked</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M357.05,-328.6C357.05,-328.6 294.95,-328.6 294.95,-328.6 288.95,-328.6 282.95,-322.6 282.95,-316.6 282.95,-316.6 282.95,-277 282.95,-277 282.95,-271 288.95,-265 294.95,-265 294.95,-265 357.05,-265 357.05,-265 363.05,-265 369.05,-271 369.05,-277 369.05,-277 369.05,-316.6 369.05,-316.6 369.05,-322.6 363.05,-328.6 357.05,-328.6\"/>\n<text text-anchor=\"start\" x=\"293.75\" y=\"-305.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">embarked</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-277.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data->embarked -->\n<g id=\"edge45\" class=\"edge\">\n<title>titanic_data->embarked</title>\n<path fill=\"none\" stroke=\"black\" d=\"M201.85,-526.7C214.77,-484.99 240.53,-410.75 275.45,-353.8 278.91,-348.15 282.94,-342.51 287.17,-337.1\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"290.33,-339.77 293.94,-329.81 284.91,-335.34 290.33,-339.77\"/>\n</g>\n<!-- age -->\n<g id=\"node30\" class=\"node\">\n<title>age</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M343.92,-508.6C343.92,-508.6 308.07,-508.6 308.07,-508.6 302.07,-508.6 296.07,-502.6 296.07,-496.6 296.07,-496.6 296.07,-457 296.07,-457 296.07,-451 302.07,-445 308.07,-445 308.07,-445 343.92,-445 343.92,-445 349.92,-445 355.92,-451 355.92,-457 355.92,-457 355.92,-496.6 355.92,-496.6 355.92,-502.6 349.92,-508.6 343.92,-508.6\"/>\n<text text-anchor=\"start\" x=\"314.37\" y=\"-485.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">age</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-457.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data->age -->\n<g id=\"edge47\" class=\"edge\">\n<title>titanic_data->age</title>\n<path fill=\"none\" stroke=\"black\" d=\"M239.98,-529.43C255.2,-520 271.87,-509.69 286.39,-500.7\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"287.91,-503.25 294.58,-495.01 284.23,-497.3 287.91,-503.25\"/>\n</g>\n<!-- sex -->\n<g id=\"node31\" class=\"node\">\n<title>sex</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M343.92,-227.6C343.92,-227.6 308.07,-227.6 308.07,-227.6 302.07,-227.6 296.07,-221.6 296.07,-215.6 296.07,-215.6 296.07,-176 296.07,-176 296.07,-170 302.07,-164 308.07,-164 308.07,-164 343.92,-164 343.92,-164 349.92,-164 355.92,-170 355.92,-176 355.92,-176 355.92,-215.6 355.92,-215.6 355.92,-221.6 349.92,-227.6 343.92,-227.6\"/>\n<text text-anchor=\"start\" x=\"314.75\" y=\"-204.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">sex</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-176.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data->sex -->\n<g id=\"edge48\" class=\"edge\">\n<title>titanic_data->sex</title>\n<path fill=\"none\" stroke=\"black\" d=\"M196.77,-526.65C205.28,-469.49 227.84,-347.53 275.45,-255.8 279.26,-248.46 284.07,-241.21 289.2,-234.41\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"292.34,-237.09 295.81,-227.08 286.85,-232.74 292.34,-237.09\"/>\n</g>\n<!-- sibsp -->\n<g id=\"node32\" class=\"node\">\n<title>sibsp</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M343.92,-754.6C343.92,-754.6 308.07,-754.6 308.07,-754.6 302.07,-754.6 296.07,-748.6 296.07,-742.6 296.07,-742.6 296.07,-703 296.07,-703 296.07,-697 302.07,-691 308.07,-691 308.07,-691 343.92,-691 343.92,-691 349.92,-691 355.92,-697 355.92,-703 355.92,-703 355.92,-742.6 355.92,-742.6 355.92,-748.6 349.92,-754.6 343.92,-754.6\"/>\n<text text-anchor=\"start\" x=\"308.37\" y=\"-731.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">sibsp</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-703.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data->sibsp -->\n<g id=\"edge49\" class=\"edge\">\n<title>titanic_data->sibsp</title>\n<path fill=\"none\" stroke=\"black\" d=\"M210.68,-590.92C226.14,-616.86 249.85,-653.56 275.45,-681.8 279.15,-685.88 283.27,-689.92 287.5,-693.77\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"284.87,-696.98 294.71,-700.91 289.46,-691.7 284.87,-696.98\"/>\n</g>\n<!-- embarked_encoder -->\n<g id=\"node5\" class=\"node\">\n<title>embarked_encoder</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M539.65,-328.6C539.65,-328.6 417.55,-328.6 417.55,-328.6 411.55,-328.6 405.55,-322.6 405.55,-316.6 405.55,-316.6 405.55,-277 405.55,-277 405.55,-271 411.55,-265 417.55,-265 417.55,-265 539.65,-265 539.65,-265 545.65,-265 551.65,-271 551.65,-277 551.65,-277 551.65,-316.6 551.65,-316.6 551.65,-322.6 545.65,-328.6 539.65,-328.6\"/>\n<text text-anchor=\"start\" x=\"416.35\" y=\"-305.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">embarked_encoder</text>\n<text text-anchor=\"start\" x=\"436.97\" y=\"-277.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">LabelEncoder</text>\n</g>\n<!-- embarked_encoder->embarked_category -->\n<g id=\"edge2\" class=\"edge\">\n<title>embarked_encoder->embarked_category</title>\n<path fill=\"none\" stroke=\"black\" d=\"M552.03,-276.52C557.85,-274.89 563.76,-273.23 569.65,-271.58\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"570.53,-274.69 579.22,-268.62 568.65,-267.95 570.53,-274.69\"/>\n</g>\n<!-- encoders -->\n<g id=\"node9\" class=\"node\">\n<title>encoders</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M684.37,-361.6C684.37,-361.6 626.77,-361.6 626.77,-361.6 620.77,-361.6 614.77,-355.6 614.77,-349.6 614.77,-349.6 614.77,-310 614.77,-310 614.77,-304 620.77,-298 626.77,-298 626.77,-298 684.37,-298 684.37,-298 690.37,-298 696.37,-304 696.37,-310 696.37,-310 696.37,-349.6 696.37,-349.6 696.37,-355.6 690.37,-361.6 684.37,-361.6\"/>\n<text text-anchor=\"start\" x=\"625.57\" y=\"-338.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">encoders</text>\n<text text-anchor=\"start\" x=\"645.07\" y=\"-310.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">dict</text>\n</g>\n<!-- embarked_encoder->encoders -->\n<g id=\"edge13\" class=\"edge\">\n<title>embarked_encoder->encoders</title>\n<path fill=\"none\" stroke=\"black\" d=\"M552.03,-310.46C569.62,-313.78 588.04,-317.25 604.34,-320.33\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"603.24,-323.87 613.71,-322.28 604.53,-316.99 603.24,-323.87\"/>\n</g>\n<!-- family -->\n<g id=\"node19\" class=\"node\">\n<title>family</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M497.27,-661.6C497.27,-661.6 459.92,-661.6 459.92,-661.6 453.92,-661.6 447.92,-655.6 447.92,-649.6 447.92,-649.6 447.92,-610 447.92,-610 447.92,-604 453.92,-598 459.92,-598 459.92,-598 497.27,-598 497.27,-598 503.27,-598 509.27,-604 509.27,-610 509.27,-610 509.27,-649.6 509.27,-649.6 509.27,-655.6 503.27,-661.6 497.27,-661.6\"/>\n<text text-anchor=\"start\" x=\"458.72\" y=\"-638.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">family</text>\n<text text-anchor=\"start\" x=\"459.47\" y=\"-610.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- parch->family -->\n<g id=\"edge34\" class=\"edge\">\n<title>parch->family</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.39,-638.65C379.29,-636.98 411.33,-634.64 436.72,-632.79\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"436.79,-636.22 446.51,-632 436.28,-629.24 436.79,-636.22\"/>\n</g>\n<!-- X_train -->\n<g id=\"node7\" class=\"node\">\n<title>X_train</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1323.67,-484.6C1323.67,-484.6 1257.83,-484.6 1257.83,-484.6 1251.83,-484.6 1245.83,-478.6 1245.83,-472.6 1245.83,-472.6 1245.83,-433 1245.83,-433 1245.83,-427 1251.83,-421 1257.83,-421 1257.83,-421 1323.67,-421 1323.67,-421 1329.67,-421 1335.67,-427 1335.67,-433 1335.67,-433 1335.67,-472.6 1335.67,-472.6 1335.67,-478.6 1329.67,-484.6 1323.67,-484.6\"/>\n<text text-anchor=\"start\" x=\"1267.88\" y=\"-461.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">X_train</text>\n<text text-anchor=\"start\" x=\"1256.62\" y=\"-433.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- fit_random_forest -->\n<g id=\"node11\" class=\"node\">\n<title>fit_random_forest</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1523.9,-402.6C1523.9,-402.6 1410.8,-402.6 1410.8,-402.6 1404.8,-402.6 1398.8,-396.6 1398.8,-390.6 1398.8,-390.6 1398.8,-351 1398.8,-351 1398.8,-345 1404.8,-339 1410.8,-339 1410.8,-339 1523.9,-339 1523.9,-339 1529.9,-339 1535.9,-345 1535.9,-351 1535.9,-351 1535.9,-390.6 1535.9,-390.6 1535.9,-396.6 1529.9,-402.6 1523.9,-402.6\"/>\n<text text-anchor=\"start\" x=\"1409.6\" y=\"-379.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">fit_random_forest</text>\n<text text-anchor=\"start\" x=\"1423.47\" y=\"-351.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">ClassifierMixin</text>\n</g>\n<!-- X_train->fit_random_forest -->\n<g id=\"edge18\" class=\"edge\">\n<title>X_train->fit_random_forest</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1335.81,-432.11C1351.91,-424.55 1370.63,-415.75 1388.79,-407.23\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1389.94,-410.08 1397.5,-402.66 1386.96,-403.75 1389.94,-410.08\"/>\n</g>\n<!-- X_test -->\n<g id=\"node8\" class=\"node\">\n<title>X_test</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1323.67,-238.6C1323.67,-238.6 1257.83,-238.6 1257.83,-238.6 1251.83,-238.6 1245.83,-232.6 1245.83,-226.6 1245.83,-226.6 1245.83,-187 1245.83,-187 1245.83,-181 1251.83,-175 1257.83,-175 1257.83,-175 1323.67,-175 1323.67,-175 1329.67,-175 1335.67,-181 1335.67,-187 1335.67,-187 1335.67,-226.6 1335.67,-226.6 1335.67,-232.6 1329.67,-238.6 1323.67,-238.6\"/>\n<text text-anchor=\"start\" x=\"1270.5\" y=\"-215.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">X_test</text>\n<text text-anchor=\"start\" x=\"1256.62\" y=\"-187.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- train_test_split_func->X_train -->\n<g id=\"edge9\" class=\"edge\">\n<title>train_test_split_func->X_train</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1133.62,-362.06C1156.77,-377.63 1185.34,-396.27 1211.7,-411.8 1219.47,-416.38 1227.8,-421.02 1236.02,-425.46\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1234.11,-428.94 1244.58,-430.56 1237.4,-422.76 1234.11,-428.94\"/>\n</g>\n<!-- train_test_split_func->X_test -->\n<g id=\"edge10\" class=\"edge\">\n<title>train_test_split_func->X_test</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1133.62,-297.54C1156.77,-281.97 1185.34,-263.33 1211.7,-247.8 1219.47,-243.22 1227.8,-238.58 1236.02,-234.14\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1237.4,-236.84 1244.58,-229.04 1234.11,-230.66 1237.4,-236.84\"/>\n</g>\n<!-- y_train -->\n<g id=\"node12\" class=\"node\">\n<title>y_train</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1311.67,-402.6C1311.67,-402.6 1269.83,-402.6 1269.83,-402.6 1263.83,-402.6 1257.83,-396.6 1257.83,-390.6 1257.83,-390.6 1257.83,-351 1257.83,-351 1257.83,-345 1263.83,-339 1269.83,-339 1269.83,-339 1311.67,-339 1311.67,-339 1317.67,-339 1323.67,-345 1323.67,-351 1323.67,-351 1323.67,-390.6 1323.67,-390.6 1323.67,-396.6 1317.67,-402.6 1311.67,-402.6\"/>\n<text text-anchor=\"start\" x=\"1268.62\" y=\"-379.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">y_train</text>\n<text text-anchor=\"start\" x=\"1271.62\" y=\"-351.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- train_test_split_func->y_train -->\n<g id=\"edge20\" class=\"edge\">\n<title>train_test_split_func->y_train</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1163.01,-345.18C1191.38,-350.92 1222.57,-357.22 1246.91,-362.14\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1246.09,-365.75 1256.58,-364.3 1247.47,-358.89 1246.09,-365.75\"/>\n</g>\n<!-- y_test -->\n<g id=\"node29\" class=\"node\">\n<title>y_test</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1309.05,-320.6C1309.05,-320.6 1272.45,-320.6 1272.45,-320.6 1266.45,-320.6 1260.45,-314.6 1260.45,-308.6 1260.45,-308.6 1260.45,-269 1260.45,-269 1260.45,-263 1266.45,-257 1272.45,-257 1272.45,-257 1309.05,-257 1309.05,-257 1315.05,-257 1321.05,-263 1321.05,-269 1321.05,-269 1321.05,-308.6 1321.05,-308.6 1321.05,-314.6 1315.05,-320.6 1309.05,-320.6\"/>\n<text text-anchor=\"start\" x=\"1271.25\" y=\"-297.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">y_test</text>\n<text text-anchor=\"start\" x=\"1271.62\" y=\"-269.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- train_test_split_func->y_test -->\n<g id=\"edge46\" class=\"edge\">\n<title>train_test_split_func->y_test</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1163.01,-314.42C1192.36,-308.49 1224.72,-301.94 1249.4,-296.96\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1250.05,-300.19 1259.16,-294.78 1248.66,-293.33 1250.05,-300.19\"/>\n</g>\n<!-- y_train->fit_random_forest -->\n<g id=\"edge19\" class=\"edge\">\n<title>y_train->fit_random_forest</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1323.77,-370.8C1341.71,-370.8 1364.93,-370.8 1387.41,-370.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1387.36,-374.3 1397.36,-370.8 1387.36,-367.3 1387.36,-374.3\"/>\n</g>\n<!-- pclass->training_set_v1 -->\n<g id=\"edge25\" class=\"edge\">\n<title>pclass->training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M358.14,-102.99C432.64,-79.3 622.89,-32.02 730.5,-124.8 786.56,-173.13 713.12,-231.11 759.5,-288.8 767.4,-298.62 778.04,-306.12 789.44,-311.83\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"787.59,-315.28 798.14,-316.22 790.47,-308.91 787.59,-315.28\"/>\n</g>\n<!-- ticket_t -->\n<g id=\"node15\" class=\"node\">\n<title>ticket_t</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M501.02,-1000.6C501.02,-1000.6 456.17,-1000.6 456.17,-1000.6 450.17,-1000.6 444.17,-994.6 444.17,-988.6 444.17,-988.6 444.17,-949 444.17,-949 444.17,-943 450.17,-937 456.17,-937 456.17,-937 501.02,-937 501.02,-937 507.02,-937 513.02,-943 513.02,-949 513.02,-949 513.02,-988.6 513.02,-988.6 513.02,-994.6 507.02,-1000.6 501.02,-1000.6\"/>\n<text text-anchor=\"start\" x=\"454.97\" y=\"-977.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">ticket_t</text>\n<text text-anchor=\"start\" x=\"459.47\" y=\"-949.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- sex_encoder -->\n<g id=\"node16\" class=\"node\">\n<title>sex_encoder</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M519.02,-208.6C519.02,-208.6 438.17,-208.6 438.17,-208.6 432.17,-208.6 426.17,-202.6 426.17,-196.6 426.17,-196.6 426.17,-157 426.17,-157 426.17,-151 432.17,-145 438.17,-145 438.17,-145 519.02,-145 519.02,-145 525.02,-145 531.02,-151 531.02,-157 531.02,-157 531.02,-196.6 531.02,-196.6 531.02,-202.6 525.02,-208.6 519.02,-208.6\"/>\n<text text-anchor=\"start\" x=\"437.35\" y=\"-185.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">sex_encoder</text>\n<text text-anchor=\"start\" x=\"436.97\" y=\"-157.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">LabelEncoder</text>\n</g>\n<!-- sex_encoder->encoders -->\n<g id=\"edge11\" class=\"edge\">\n<title>sex_encoder->encoders</title>\n<path fill=\"none\" stroke=\"black\" d=\"M509.18,-208.96C522.26,-223.25 537.78,-240.31 551.65,-255.8 564.67,-270.35 565.44,-276.55 580.65,-288.8 588.12,-294.82 596.6,-300.44 605.09,-305.47\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"603.19,-308.99 613.61,-310.89 606.65,-302.9 603.19,-308.99\"/>\n</g>\n<!-- sex_category -->\n<g id=\"node22\" class=\"node\">\n<title>sex_category</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M697.5,-197.6C697.5,-197.6 613.65,-197.6 613.65,-197.6 607.65,-197.6 601.65,-191.6 601.65,-185.6 601.65,-185.6 601.65,-146 601.65,-146 601.65,-140 607.65,-134 613.65,-134 613.65,-134 697.5,-134 697.5,-134 703.5,-134 709.5,-140 709.5,-146 709.5,-146 709.5,-185.6 709.5,-185.6 709.5,-191.6 703.5,-197.6 697.5,-197.6\"/>\n<text text-anchor=\"start\" x=\"612.45\" y=\"-174.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">sex_category</text>\n<text text-anchor=\"start\" x=\"636.45\" y=\"-146.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- sex_encoder->sex_category -->\n<g id=\"edge38\" class=\"edge\">\n<title>sex_encoder->sex_category</title>\n<path fill=\"none\" stroke=\"black\" d=\"M531.15,-173.56C549.69,-172.39 570.84,-171.06 590.33,-169.84\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"590.51,-173.27 600.27,-169.15 590.07,-166.29 590.51,-173.27\"/>\n</g>\n<!-- training_set_v1->train_test_split_func -->\n<g id=\"edge14\" class=\"edge\">\n<title>training_set_v1->train_test_split_func</title>\n<path fill=\"none\" stroke=\"black\" d=\"M920.56,-329.8C944.27,-329.8 972.01,-329.8 997.88,-329.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"997.83,-333.3 1007.83,-329.8 997.83,-326.3 997.83,-333.3\"/>\n</g>\n<!-- ticket->ticket_t -->\n<g id=\"edge23\" class=\"edge\">\n<title>ticket->ticket_t</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.39,-968.8C378.14,-968.8 408.16,-968.8 432.87,-968.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"432.81,-972.3 442.81,-968.8 432.81,-965.3 432.81,-972.3\"/>\n</g>\n<!-- family->training_set_v1 -->\n<g id=\"edge31\" class=\"edge\">\n<title>family->training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M509.52,-620.42C545.28,-608.59 606.36,-586.42 654.57,-558.8\"/>\n</g>\n<!-- cabin->cabin_t -->\n<g id=\"edge3\" class=\"edge\">\n<title>cabin->cabin_t</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.13,-417.04C370.78,-428.04 388.92,-441.37 405.55,-452.8 414.91,-459.23 425.12,-465.93 434.82,-472.16\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"432.62,-475.54 442.93,-477.97 436.38,-469.64 432.62,-475.54\"/>\n</g>\n<!-- cabin_encoder -->\n<g id=\"node23\" class=\"node\">\n<title>cabin_encoder</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M525.02,-410.6C525.02,-410.6 432.17,-410.6 432.17,-410.6 426.17,-410.6 420.17,-404.6 420.17,-398.6 420.17,-398.6 420.17,-359 420.17,-359 420.17,-353 426.17,-347 432.17,-347 432.17,-347 525.02,-347 525.02,-347 531.02,-347 537.02,-353 537.02,-359 537.02,-359 537.02,-398.6 537.02,-398.6 537.02,-404.6 531.02,-410.6 525.02,-410.6\"/>\n<text text-anchor=\"start\" x=\"430.97\" y=\"-387.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">cabin_encoder</text>\n<text text-anchor=\"start\" x=\"436.97\" y=\"-359.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">LabelEncoder</text>\n</g>\n<!-- cabin->cabin_encoder -->\n<g id=\"edge39\" class=\"edge\">\n<title>cabin->cabin_encoder</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.39,-391.68C371.49,-390.07 390.57,-388.05 409.1,-386.08\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"409.34,-389.47 418.91,-384.93 408.6,-382.51 409.34,-389.47\"/>\n</g>\n<!-- cabin_category -->\n<g id=\"node26\" class=\"node\">\n<title>cabin_category</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M703.87,-443.6C703.87,-443.6 607.27,-443.6 607.27,-443.6 601.27,-443.6 595.27,-437.6 595.27,-431.6 595.27,-431.6 595.27,-392 595.27,-392 595.27,-386 601.27,-380 607.27,-380 607.27,-380 703.87,-380 703.87,-380 709.87,-380 715.87,-386 715.87,-392 715.87,-392 715.87,-431.6 715.87,-431.6 715.87,-437.6 709.87,-443.6 703.87,-443.6\"/>\n<text text-anchor=\"start\" x=\"606.07\" y=\"-420.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">cabin_category</text>\n<text text-anchor=\"start\" x=\"636.45\" y=\"-392.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- cabin->cabin_category -->\n<g id=\"edge42\" class=\"edge\">\n<title>cabin->cabin_category</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.32,-406.5C370.87,-411.69 388.86,-417.21 405.55,-419.8 465.28,-429.08 534.01,-425.84 584.11,-420.99\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"584.23,-424.39 593.82,-419.9 583.52,-417.43 584.23,-424.39\"/>\n</g>\n<!-- prefit_random_forest -->\n<g id=\"node21\" class=\"node\">\n<title>prefit_random_forest</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1357.8,-156.6C1357.8,-156.6 1223.7,-156.6 1223.7,-156.6 1217.7,-156.6 1211.7,-150.6 1211.7,-144.6 1211.7,-144.6 1211.7,-105 1211.7,-105 1211.7,-99 1217.7,-93 1223.7,-93 1223.7,-93 1357.8,-93 1357.8,-93 1363.8,-93 1369.8,-99 1369.8,-105 1369.8,-105 1369.8,-144.6 1369.8,-144.6 1369.8,-150.6 1363.8,-156.6 1357.8,-156.6\"/>\n<text text-anchor=\"start\" x=\"1222.5\" y=\"-133.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">prefit_random_forest</text>\n<text text-anchor=\"start\" x=\"1246.88\" y=\"-105.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">ClassifierMixin</text>\n</g>\n<!-- prefit_random_forest->fit_random_forest -->\n<g id=\"edge17\" class=\"edge\">\n<title>prefit_random_forest->fit_random_forest</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1360.15,-157.07C1363.59,-159.8 1366.84,-162.7 1369.8,-165.8 1414.38,-212.36 1441.45,-283.31 1455.16,-328.35\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1452.01,-329.05 1458.2,-337.65 1458.72,-327.07 1452.01,-329.05\"/>\n</g>\n<!-- sex_category->training_set_v1 -->\n<g id=\"edge29\" class=\"edge\">\n<title>sex_category->training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M709.84,-189.03C717.49,-194.06 724.72,-199.97 730.5,-206.8 755.47,-236.31 732.88,-260.77 759.5,-288.8 767.84,-297.59 778.35,-304.55 789.38,-310.06\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"787.62,-313.56 798.17,-314.53 790.53,-307.19 787.62,-313.56\"/>\n</g>\n<!-- cabin_encoder->encoders -->\n<g id=\"edge12\" class=\"edge\">\n<title>cabin_encoder->encoders</title>\n<path fill=\"none\" stroke=\"black\" d=\"M537.38,-362.62C558.82,-356.61 582.97,-349.85 603.75,-344.03\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"604.59,-347.15 613.28,-341.08 602.71,-340.41 604.59,-347.15\"/>\n</g>\n<!-- cabin_encoder->cabin_category -->\n<g id=\"edge43\" class=\"edge\">\n<title>cabin_encoder->cabin_category</title>\n<path fill=\"none\" stroke=\"black\" d=\"M537.38,-389.7C552.42,-392.53 568.8,-395.62 584.4,-398.57\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"583.58,-402.16 594.06,-400.58 584.88,-395.28 583.58,-402.16\"/>\n</g>\n<!-- fare->training_set_v1 -->\n<g id=\"edge27\" class=\"edge\">\n<title>fare->training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.3,-556.54C386.76,-554.72 435.74,-553.27 477.6,-558.8\"/>\n</g>\n<!-- cabin_category->training_set_v1 -->\n<g id=\"edge28\" class=\"edge\">\n<title>cabin_category->training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M716.16,-387.64C739.09,-378.34 765.48,-367.64 789.28,-357.99\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"790.34,-360.93 798.3,-353.93 787.71,-354.44 790.34,-360.93\"/>\n</g>\n<!-- embarked->embarked_category -->\n<g id=\"edge1\" class=\"edge\">\n<title>embarked->embarked_category</title>\n<path fill=\"none\" stroke=\"black\" d=\"M369.21,-270.37C380.6,-264.49 393.2,-259.04 405.55,-255.8 458.84,-241.82 520.75,-239.77 569.5,-241.24\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"569.21,-244.77 579.33,-241.63 569.46,-237.77 569.21,-244.77\"/>\n</g>\n<!-- embarked->embarked_encoder -->\n<g id=\"edge7\" class=\"edge\">\n<title>embarked->embarked_encoder</title>\n<path fill=\"none\" stroke=\"black\" d=\"M369.35,-296.8C377.18,-296.8 385.6,-296.8 394.17,-296.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"394.06,-300.3 404.06,-296.8 394.06,-293.3 394.06,-300.3\"/>\n</g>\n<!-- age->training_set_v1 -->\n<g id=\"edge26\" class=\"edge\">\n<title>age->training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.03,-500.55C377.21,-517.8 402.68,-538.35 405.55,-539.8 435.12,-554.71 444.77,-554.47 477.6,-558.8\"/>\n<path fill=\"none\" stroke=\"black\" d=\"M479.6,-558.8C556.7,-568.98 587.1,-597.46 654.57,-558.8\"/>\n<path fill=\"none\" stroke=\"black\" d=\"M656.57,-558.8C736.68,-512.91 801.34,-423.57 834.52,-371.07\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"837.84,-373.37 840.16,-363.03 831.9,-369.66 837.84,-373.37\"/>\n</g>\n<!-- sex->sex_encoder -->\n<g id=\"edge24\" class=\"edge\">\n<title>sex->sex_encoder</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.39,-192.09C373.08,-189.99 394.64,-187.27 414.93,-184.71\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"415.23,-188.07 424.71,-183.35 414.35,-181.13 415.23,-188.07\"/>\n</g>\n<!-- sex->sex_category -->\n<g id=\"edge37\" class=\"edge\">\n<title>sex->sex_category</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.42,-171.86C376.24,-156 399.78,-137.75 405.55,-135.8 466.33,-115.24 539.64,-128.29 590.93,-143.07\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"589.62,-146.63 600.2,-146.13 591.62,-139.92 589.62,-146.63\"/>\n</g>\n<!-- sibsp->family -->\n<g id=\"edge33\" class=\"edge\">\n<title>sibsp->family</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.39,-704.65C379.8,-690.19 412.77,-669.83 438.42,-653.99\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"439.84,-656.62 446.51,-648.38 436.16,-650.66 439.84,-656.62\"/>\n</g>\n<!-- _target_inputs -->\n<g id=\"node33\" class=\"node\">\n<title>_target_inputs</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"246.45,-1073.1 136.6,-1073.1 136.6,-1028.5 246.45,-1028.5 246.45,-1073.1\"/>\n<text text-anchor=\"start\" x=\"151.4\" y=\"-1045\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">target_col</text>\n<text text-anchor=\"start\" x=\"216.65\" y=\"-1045\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n</g>\n<!-- _target_inputs->target -->\n<g id=\"edge5\" class=\"edge\">\n<title>_target_inputs->target</title>\n<path fill=\"none\" stroke=\"black\" d=\"M246.79,-1050.8C259.46,-1050.8 272.74,-1050.8 284.68,-1050.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"284.65,-1054.3 294.65,-1050.8 284.65,-1047.3 284.65,-1054.3\"/>\n</g>\n<!-- _titanic_data_inputs -->\n<g id=\"node34\" class=\"node\">\n<title>_titanic_data_inputs</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"107.6,-591.6 0,-591.6 0,-526 107.6,-526 107.6,-591.6\"/>\n<text text-anchor=\"start\" x=\"20.05\" y=\"-563.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">location</text>\n<text text-anchor=\"start\" x=\"77.8\" y=\"-563.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n<text text-anchor=\"start\" x=\"14.8\" y=\"-542.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">index_col</text>\n<text text-anchor=\"start\" x=\"77.8\" y=\"-542.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n</g>\n<!-- _titanic_data_inputs->titanic_data -->\n<g id=\"edge6\" class=\"edge\">\n<title>_titanic_data_inputs->titanic_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M108.05,-558.8C116.02,-558.8 124.26,-558.8 132.33,-558.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"132.16,-562.3 142.16,-558.8 132.16,-555.3 132.16,-562.3\"/>\n</g>\n<!-- _train_test_split_func_inputs -->\n<g id=\"node35\" class=\"node\">\n<title>_train_test_split_func_inputs</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"960.1,-279.6 759.5,-279.6 759.5,-214 960.1,-214 960.1,-279.6\"/>\n<text text-anchor=\"start\" x=\"802.67\" y=\"-251.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">random_state</text>\n<text text-anchor=\"start\" x=\"925.67\" y=\"-251.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">int</text>\n<text text-anchor=\"start\" x=\"774.17\" y=\"-230.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">validation_size_fraction</text>\n<text text-anchor=\"start\" x=\"920.05\" y=\"-230.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">float</text>\n</g>\n<!-- _train_test_split_func_inputs->train_test_split_func -->\n<g id=\"edge16\" class=\"edge\">\n<title>_train_test_split_func_inputs->train_test_split_func</title>\n<path fill=\"none\" stroke=\"black\" d=\"M950.66,-280.08C966.44,-285.93 982.84,-292 998.52,-297.81\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"997.12,-301.39 1007.71,-301.58 999.55,-294.83 997.12,-301.39\"/>\n</g>\n<!-- _prefit_random_forest_inputs -->\n<g id=\"node36\" class=\"node\">\n<title>_prefit_random_forest_inputs</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"1182.7,-157.6 989.1,-157.6 989.1,-92 1182.7,-92 1182.7,-157.6\"/>\n<text text-anchor=\"start\" x=\"1003.77\" y=\"-129.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">random_state</text>\n<text text-anchor=\"start\" x=\"1123.27\" y=\"-129.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">int</text>\n<text text-anchor=\"start\" x=\"1011.65\" y=\"-108.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">max_depth</text>\n<text text-anchor=\"start\" x=\"1092.9\" y=\"-108.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">typing.Union</text>\n</g>\n<!-- _prefit_random_forest_inputs->prefit_random_forest -->\n<g id=\"edge36\" class=\"edge\">\n<title>_prefit_random_forest_inputs->prefit_random_forest</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1182.98,-124.8C1188.93,-124.8 1194.91,-124.8 1200.83,-124.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1200.4,-128.3 1210.4,-124.8 1200.4,-121.3 1200.4,-128.3\"/>\n</g>\n<!-- input -->\n<g id=\"node37\" class=\"node\">\n<title>input</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"80.8,-701.1 26.8,-701.1 26.8,-664.5 80.8,-664.5 80.8,-701.1\"/>\n<text text-anchor=\"middle\" x=\"53.8\" y=\"-677\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n</g>\n<!-- function -->\n<g id=\"node38\" class=\"node\">\n<title>function</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M76.22,-646.1C76.22,-646.1 31.37,-646.1 31.37,-646.1 25.37,-646.1 19.37,-640.1 19.37,-634.1 19.37,-634.1 19.37,-621.5 19.37,-621.5 19.37,-615.5 25.37,-609.5 31.37,-609.5 31.37,-609.5 76.22,-609.5 76.22,-609.5 82.22,-609.5 88.22,-615.5 88.22,-621.5 88.22,-621.5 88.22,-634.1 88.22,-634.1 88.22,-640.1 82.22,-646.1 76.22,-646.1\"/>\n<text text-anchor=\"middle\" x=\"53.8\" y=\"-622\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n</g>\n</g>\n</svg>\n", |
| "text/plain": "<graphviz.graphs.Digraph at 0x13f057fa0>" |
| }, |
| "execution_count": 3, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "dr.display_all_functions() # pass in a path if you want to save the image." |
| ], |
| "metadata": { |
| "collapsed": false, |
| "ExecuteTime": { |
| "end_time": "2023-11-07T23:50:21.143368Z", |
| "start_time": "2023-11-07T23:50:20.003Z" |
| } |
| } |
| }, |
| { |
| "cell_type": "markdown", |
| "source": [ |
| "# Visualize how something is built: what sequence of operations produced this data/model?\n", |
| "E.g. how do the feature encoders get computed and what flows into them?" |
| ], |
| "metadata": { |
| "collapsed": false |
| } |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 4, |
| "outputs": [ |
| { |
| "data": { |
| "image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 8.0.5 (20230430.1635)\n -->\n<!-- Pages: 1 -->\n<svg width=\"642pt\" height=\"358pt\"\n viewBox=\"0.00 0.00 642.00 357.80\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 353.8)\">\n<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-353.8 638,-353.8 638,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster__legend</title>\n<polygon fill=\"none\" stroke=\"black\" points=\"11.38,-156.8 11.38,-341.8 96.22,-341.8 96.22,-156.8 11.38,-156.8\"/>\n<text text-anchor=\"middle\" x=\"53.8\" y=\"-324.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n</g>\n<!-- embarked -->\n<g id=\"node1\" class=\"node\">\n<title>embarked</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M336.3,-227.6C336.3,-227.6 274.2,-227.6 274.2,-227.6 268.2,-227.6 262.2,-221.6 262.2,-215.6 262.2,-215.6 262.2,-176 262.2,-176 262.2,-170 268.2,-164 274.2,-164 274.2,-164 336.3,-164 336.3,-164 342.3,-164 348.3,-170 348.3,-176 348.3,-176 348.3,-215.6 348.3,-215.6 348.3,-221.6 342.3,-227.6 336.3,-227.6\"/>\n<text text-anchor=\"start\" x=\"273\" y=\"-204.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">embarked</text>\n<text text-anchor=\"start\" x=\"286.12\" y=\"-176.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- embarked_encoder -->\n<g id=\"node3\" class=\"node\">\n<title>embarked_encoder</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M511.4,-227.6C511.4,-227.6 389.3,-227.6 389.3,-227.6 383.3,-227.6 377.3,-221.6 377.3,-215.6 377.3,-215.6 377.3,-176 377.3,-176 377.3,-170 383.3,-164 389.3,-164 389.3,-164 511.4,-164 511.4,-164 517.4,-164 523.4,-170 523.4,-176 523.4,-176 523.4,-215.6 523.4,-215.6 523.4,-221.6 517.4,-227.6 511.4,-227.6\"/>\n<text text-anchor=\"start\" x=\"388.1\" y=\"-204.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">embarked_encoder</text>\n<text text-anchor=\"start\" x=\"408.72\" y=\"-176.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">LabelEncoder</text>\n</g>\n<!-- embarked->embarked_encoder -->\n<g id=\"edge3\" class=\"edge\">\n<title>embarked->embarked_encoder</title>\n<path fill=\"none\" stroke=\"black\" d=\"M348.8,-195.8C354.36,-195.8 360.2,-195.8 366.15,-195.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"365.95,-199.3 375.95,-195.8 365.95,-192.3 365.95,-199.3\"/>\n</g>\n<!-- titanic_data -->\n<g id=\"node2\" class=\"node\">\n<title>titanic_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M221.2,-145.6C221.2,-145.6 148.6,-145.6 148.6,-145.6 142.6,-145.6 136.6,-139.6 136.6,-133.6 136.6,-133.6 136.6,-94 136.6,-94 136.6,-88 142.6,-82 148.6,-82 148.6,-82 221.2,-82 221.2,-82 227.2,-82 233.2,-88 233.2,-94 233.2,-94 233.2,-133.6 233.2,-133.6 233.2,-139.6 227.2,-145.6 221.2,-145.6\"/>\n<text text-anchor=\"start\" x=\"147.4\" y=\"-122.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">titanic_data</text>\n<text text-anchor=\"start\" x=\"150.77\" y=\"-94.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- titanic_data->embarked -->\n<g id=\"edge1\" class=\"edge\">\n<title>titanic_data->embarked</title>\n<path fill=\"none\" stroke=\"black\" d=\"M232.34,-145.97C239.17,-150.71 246.23,-155.6 253.15,-160.4\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"250.59,-163.57 260.8,-166.39 254.58,-157.82 250.59,-163.57\"/>\n</g>\n<!-- cabin -->\n<g id=\"node5\" class=\"node\">\n<title>cabin</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M323.17,-145.6C323.17,-145.6 287.32,-145.6 287.32,-145.6 281.32,-145.6 275.32,-139.6 275.32,-133.6 275.32,-133.6 275.32,-94 275.32,-94 275.32,-88 281.32,-82 287.32,-82 287.32,-82 323.17,-82 323.17,-82 329.17,-82 335.17,-88 335.17,-94 335.17,-94 335.17,-133.6 335.17,-133.6 335.17,-139.6 329.17,-145.6 323.17,-145.6\"/>\n<text text-anchor=\"start\" x=\"287.62\" y=\"-122.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">cabin</text>\n<text text-anchor=\"start\" x=\"286.12\" y=\"-94.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data->cabin -->\n<g id=\"edge5\" class=\"edge\">\n<title>titanic_data->cabin</title>\n<path fill=\"none\" stroke=\"black\" d=\"M233.7,-113.8C243.86,-113.8 254.48,-113.8 264.32,-113.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"264.01,-117.3 274.01,-113.8 264.01,-110.3 264.01,-117.3\"/>\n</g>\n<!-- sex -->\n<g id=\"node6\" class=\"node\">\n<title>sex</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M323.17,-63.6C323.17,-63.6 287.32,-63.6 287.32,-63.6 281.32,-63.6 275.32,-57.6 275.32,-51.6 275.32,-51.6 275.32,-12 275.32,-12 275.32,-6 281.32,0 287.32,0 287.32,0 323.17,0 323.17,0 329.17,0 335.17,-6 335.17,-12 335.17,-12 335.17,-51.6 335.17,-51.6 335.17,-57.6 329.17,-63.6 323.17,-63.6\"/>\n<text text-anchor=\"start\" x=\"294\" y=\"-40.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">sex</text>\n<text text-anchor=\"start\" x=\"286.12\" y=\"-12.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data->sex -->\n<g id=\"edge6\" class=\"edge\">\n<title>titanic_data->sex</title>\n<path fill=\"none\" stroke=\"black\" d=\"M232.34,-81.63C243.57,-73.84 255.44,-65.62 266.23,-58.14\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"267.78,-60.63 274.01,-52.06 263.79,-54.88 267.78,-60.63\"/>\n</g>\n<!-- encoders -->\n<g id=\"node8\" class=\"node\">\n<title>encoders</title>\n<path fill=\"#ffc857\" stroke=\"black\" d=\"M622,-145.6C622,-145.6 564.4,-145.6 564.4,-145.6 558.4,-145.6 552.4,-139.6 552.4,-133.6 552.4,-133.6 552.4,-94 552.4,-94 552.4,-88 558.4,-82 564.4,-82 564.4,-82 622,-82 622,-82 628,-82 634,-88 634,-94 634,-94 634,-133.6 634,-133.6 634,-139.6 628,-145.6 622,-145.6\"/>\n<text text-anchor=\"start\" x=\"563.2\" y=\"-122.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">encoders</text>\n<text text-anchor=\"start\" x=\"582.7\" y=\"-94.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">dict</text>\n</g>\n<!-- embarked_encoder->encoders -->\n<g id=\"edge10\" class=\"edge\">\n<title>embarked_encoder->encoders</title>\n<path fill=\"none\" stroke=\"black\" d=\"M508.05,-163.68C513.25,-160.69 518.43,-157.7 523.4,-154.8 529.7,-151.13 536.3,-147.25 542.82,-143.39\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"544.45,-145.9 551.26,-137.78 540.88,-139.88 544.45,-145.9\"/>\n</g>\n<!-- cabin_encoder -->\n<g id=\"node4\" class=\"node\">\n<title>cabin_encoder</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M496.77,-145.6C496.77,-145.6 403.92,-145.6 403.92,-145.6 397.92,-145.6 391.92,-139.6 391.92,-133.6 391.92,-133.6 391.92,-94 391.92,-94 391.92,-88 397.92,-82 403.92,-82 403.92,-82 496.77,-82 496.77,-82 502.77,-82 508.77,-88 508.77,-94 508.77,-94 508.77,-133.6 508.77,-133.6 508.77,-139.6 502.77,-145.6 496.77,-145.6\"/>\n<text text-anchor=\"start\" x=\"402.72\" y=\"-122.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">cabin_encoder</text>\n<text text-anchor=\"start\" x=\"408.72\" y=\"-94.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">LabelEncoder</text>\n</g>\n<!-- cabin_encoder->encoders -->\n<g id=\"edge9\" class=\"edge\">\n<title>cabin_encoder->encoders</title>\n<path fill=\"none\" stroke=\"black\" d=\"M509.05,-113.8C519.65,-113.8 530.64,-113.8 541.04,-113.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"541.03,-117.3 551.03,-113.8 541.03,-110.3 541.03,-117.3\"/>\n</g>\n<!-- cabin->cabin_encoder -->\n<g id=\"edge4\" class=\"edge\">\n<title>cabin->cabin_encoder</title>\n<path fill=\"none\" stroke=\"black\" d=\"M335.55,-113.8C348.71,-113.8 364.83,-113.8 380.76,-113.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"380.71,-117.3 390.71,-113.8 380.71,-110.3 380.71,-117.3\"/>\n</g>\n<!-- sex_encoder -->\n<g id=\"node7\" class=\"node\">\n<title>sex_encoder</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M490.77,-63.6C490.77,-63.6 409.92,-63.6 409.92,-63.6 403.92,-63.6 397.92,-57.6 397.92,-51.6 397.92,-51.6 397.92,-12 397.92,-12 397.92,-6 403.92,0 409.92,0 409.92,0 490.77,0 490.77,0 496.77,0 502.77,-6 502.77,-12 502.77,-12 502.77,-51.6 502.77,-51.6 502.77,-57.6 496.77,-63.6 490.77,-63.6\"/>\n<text text-anchor=\"start\" x=\"409.1\" y=\"-40.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">sex_encoder</text>\n<text text-anchor=\"start\" x=\"408.72\" y=\"-12.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">LabelEncoder</text>\n</g>\n<!-- sex->sex_encoder -->\n<g id=\"edge7\" class=\"edge\">\n<title>sex->sex_encoder</title>\n<path fill=\"none\" stroke=\"black\" d=\"M335.55,-31.8C350.32,-31.8 368.82,-31.8 386.58,-31.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"386.51,-35.3 396.51,-31.8 386.51,-28.3 386.51,-35.3\"/>\n</g>\n<!-- sex_encoder->encoders -->\n<g id=\"edge8\" class=\"edge\">\n<title>sex_encoder->encoders</title>\n<path fill=\"none\" stroke=\"black\" d=\"M502.98,-61.01C509.86,-64.95 516.81,-68.96 523.4,-72.8 529.7,-76.47 536.3,-80.35 542.82,-84.21\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"540.88,-87.72 551.26,-89.82 544.45,-81.7 540.88,-87.72\"/>\n</g>\n<!-- _titanic_data_inputs -->\n<g id=\"node9\" class=\"node\">\n<title>_titanic_data_inputs</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"107.6,-146.6 0,-146.6 0,-81 107.6,-81 107.6,-146.6\"/>\n<text text-anchor=\"start\" x=\"20.05\" y=\"-118.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">location</text>\n<text text-anchor=\"start\" x=\"77.8\" y=\"-118.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n<text text-anchor=\"start\" x=\"14.8\" y=\"-97.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">index_col</text>\n<text text-anchor=\"start\" x=\"77.8\" y=\"-97.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n</g>\n<!-- _titanic_data_inputs->titanic_data -->\n<g id=\"edge2\" class=\"edge\">\n<title>_titanic_data_inputs->titanic_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M108.06,-113.8C113.73,-113.8 119.52,-113.8 125.25,-113.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"125.14,-117.3 135.14,-113.8 125.14,-110.3 125.14,-117.3\"/>\n</g>\n<!-- input -->\n<g id=\"node10\" class=\"node\">\n<title>input</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"80.8,-311.1 26.8,-311.1 26.8,-274.5 80.8,-274.5 80.8,-311.1\"/>\n<text text-anchor=\"middle\" x=\"53.8\" y=\"-287\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n</g>\n<!-- function -->\n<g id=\"node11\" class=\"node\">\n<title>function</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M76.22,-256.1C76.22,-256.1 31.37,-256.1 31.37,-256.1 25.37,-256.1 19.37,-250.1 19.37,-244.1 19.37,-244.1 19.37,-231.5 19.37,-231.5 19.37,-225.5 25.37,-219.5 31.37,-219.5 31.37,-219.5 76.22,-219.5 76.22,-219.5 82.22,-219.5 88.22,-225.5 88.22,-231.5 88.22,-231.5 88.22,-244.1 88.22,-244.1 88.22,-250.1 82.22,-256.1 76.22,-256.1\"/>\n<text text-anchor=\"middle\" x=\"53.8\" y=\"-232\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n</g>\n<!-- output -->\n<g id=\"node12\" class=\"node\">\n<title>output</title>\n<path fill=\"#ffc857\" stroke=\"black\" d=\"M71.35,-201.1C71.35,-201.1 36.25,-201.1 36.25,-201.1 30.25,-201.1 24.25,-195.1 24.25,-189.1 24.25,-189.1 24.25,-176.5 24.25,-176.5 24.25,-170.5 30.25,-164.5 36.25,-164.5 36.25,-164.5 71.35,-164.5 71.35,-164.5 77.35,-164.5 83.35,-170.5 83.35,-176.5 83.35,-176.5 83.35,-189.1 83.35,-189.1 83.35,-195.1 77.35,-201.1 71.35,-201.1\"/>\n<text text-anchor=\"middle\" x=\"53.8\" y=\"-177\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">output</text>\n</g>\n</g>\n</svg>\n", |
| "text/plain": "<graphviz.graphs.Digraph at 0x13f066ca0>" |
| }, |
| "execution_count": 4, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "inputs = {\n", |
| " \"location\": \"data/train.csv\",\n", |
| " \"index_col\": \"passengerid\",\n", |
| " \"target_col\": \"survived\",\n", |
| " \"random_state\": 42,\n", |
| " \"max_depth\": None,\n", |
| " \"validation_size_fraction\": 0.33,\n", |
| "}\n", |
| "dr.visualize_execution(\n", |
| " [features.encoders], None, {}, inputs=inputs # pass in a path if you want to save the image.\n", |
| ")" |
| ], |
| "metadata": { |
| "collapsed": false, |
| "ExecuteTime": { |
| "end_time": "2023-11-07T23:50:21.680500Z", |
| "start_time": "2023-11-07T23:50:21.012804Z" |
| } |
| } |
| }, |
| { |
| "cell_type": "markdown", |
| "source": [ |
| "# Understand the upstream lineage of a particular output: Whose/What data sources led to this artifact/model?\n", |
| "E.g. There is something funky with the Random Forest model and we want to double-check\n", |
| "for the current production model, what the data sources are and who owns them,\n", |
| "so we can go ping them" |
| ], |
| "metadata": { |
| "collapsed": false |
| } |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 5, |
| "outputs": [ |
| { |
| "data": { |
| "text/plain": "[{'team': 'data-engineering',\n 'function': 'titanic_data',\n 'source': 'prod.titantic'}]" |
| }, |
| "execution_count": 5, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "upstream_nodes = dr.what_is_upstream_of(\"fit_random_forest\")\n", |
| "teams = []\n", |
| "# iterate through\n", |
| "for node in upstream_nodes:\n", |
| " # filter to nodes that we're interested in getting information about\n", |
| " if node.tags.get(\"source\"):\n", |
| " # append for output\n", |
| " teams.append({\n", |
| " \"team\": node.tags.get(\"owner\"),\n", |
| " \"function\": node.name,\n", |
| " \"source\": node.tags.get(\"source\"),\n", |
| " })\n", |
| "teams" |
| ], |
| "metadata": { |
| "collapsed": false, |
| "ExecuteTime": { |
| "end_time": "2023-11-07T23:50:21.684005Z", |
| "start_time": "2023-11-07T23:50:21.612361Z" |
| } |
| } |
| }, |
| { |
| "cell_type": "markdown", |
| "source": [ |
| "# Understand the downstream lineage of a particular output: Who/What is downstream of this transform?\n", |
| "E.g. Say we're on data engineering and want to change the source data. How could we determine\n", |
| "what the artifacts that use this data are and who owns them?" |
| ], |
| "metadata": { |
| "collapsed": false |
| } |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 6, |
| "outputs": [ |
| { |
| "data": { |
| "text/plain": "[{'team': 'data-science',\n 'function': 'fit_random_forest',\n 'artifact': 'model'},\n {'team': 'data-science',\n 'function': 'training_set_v1',\n 'artifact': 'training_set'},\n {'team': 'data-science', 'function': 'encoders', 'artifact': 'encoders'}]" |
| }, |
| "execution_count": 6, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "downstream_nodes = dr.what_is_downstream_of(\"titanic_data\")\n", |
| "artifacts = []\n", |
| "for node in downstream_nodes:\n", |
| " # if it's an artifact function\n", |
| " if node.tags.get(\"artifact\"):\n", |
| " # pull out the information we want\n", |
| " artifacts.append({\n", |
| " \"team\": node.tags.get(\"owner\"),\n", |
| " \"function\": node.name,\n", |
| " \"artifact\": node.tags.get(\"artifact\"),\n", |
| " })\n", |
| "artifacts" |
| ], |
| "metadata": { |
| "collapsed": false, |
| "ExecuteTime": { |
| "end_time": "2023-11-07T23:50:21.735886Z", |
| "start_time": "2023-11-07T23:50:21.625900Z" |
| } |
| } |
| }, |
| { |
| "cell_type": "markdown", |
| "source": [ |
| "# More advanced queries: What is defined as PII data, and what does it end up in?\n", |
| "E.g. Let's say our compliance team has come to us to understand how we're using PII data,\n", |
| "i.e. what artifacts does it end up in? They want this report every month" |
| ], |
| "metadata": { |
| "collapsed": false |
| } |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 7, |
| "outputs": [ |
| { |
| "data": { |
| "text/plain": "{'sex': [{'team': 'data-science',\n 'function': 'training_set_v1',\n 'artifact': 'training_set'},\n {'team': 'data-science',\n 'function': 'fit_random_forest',\n 'artifact': 'model'},\n {'team': 'data-science', 'function': 'encoders', 'artifact': 'encoders'}],\n 'age': [{'team': 'data-science',\n 'function': 'training_set_v1',\n 'artifact': 'training_set'},\n {'team': 'data-science',\n 'function': 'fit_random_forest',\n 'artifact': 'model'}]}" |
| }, |
| "execution_count": 7, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "pii_nodes = [n for n in dr.list_available_variables()\n", |
| " if n.tags.get(\"PII\") == \"true\"]\n", |
| "pii_to_artifacts = {}\n", |
| "# loop through each PII node\n", |
| "for node in pii_nodes:\n", |
| " pii_to_artifacts[node.name] = []\n", |
| " # ask what is downstream\n", |
| " downstream_nodes = dr.what_is_downstream_of(node.name)\n", |
| " for dwn_node in downstream_nodes:\n", |
| " # Filter to nodes of interest\n", |
| " if dwn_node.tags.get(\"artifact\"):\n", |
| " # pull out information\n", |
| " pii_to_artifacts[node.name].append({\n", |
| " \"team\": dwn_node.tags.get(\"owner\"),\n", |
| " \"function\": dwn_node.name,\n", |
| " \"artifact\": dwn_node.tags.get(\"artifact\"),\n", |
| " })\n", |
| "pii_to_artifacts" |
| ], |
| "metadata": { |
| "collapsed": false, |
| "ExecuteTime": { |
| "end_time": "2023-11-07T23:50:21.871125Z", |
| "start_time": "2023-11-07T23:50:21.704409Z" |
| } |
| } |
| }, |
| { |
| "cell_type": "markdown", |
| "source": [ |
| "# Understanding how two things connect more deeply: What is in between this data source and this model?\n", |
| "E.g. say we're new to the team and want to understand how the data flows from the data source to\n", |
| "the model. We can use the `what_is_the_path_between` function to understand the lineage between two nodes." |
| ], |
| "metadata": { |
| "collapsed": false |
| } |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 14, |
| "outputs": [ |
| { |
| "data": { |
| "image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 8.0.5 (20230430.1635)\n -->\n<!-- Pages: 1 -->\n<svg width=\"1051pt\" height=\"646pt\"\n viewBox=\"0.00 0.00 1050.98 645.60\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 641.6)\">\n<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-641.6 1046.98,-641.6 1046.98,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster__legend</title>\n<polygon fill=\"none\" stroke=\"black\" points=\"8,-442.8 8,-572.8 92.85,-572.8 92.85,-442.8 8,-442.8\"/>\n<text text-anchor=\"middle\" x=\"50.42\" y=\"-555.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n</g>\n<!-- embarked_category -->\n<g id=\"node1\" class=\"node\">\n<title>embarked_category</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M265.58,-555.6C265.58,-555.6 139.73,-555.6 139.73,-555.6 133.73,-555.6 127.73,-549.6 127.73,-543.6 127.73,-543.6 127.73,-504 127.73,-504 127.73,-498 133.73,-492 139.73,-492 139.73,-492 265.58,-492 265.58,-492 271.58,-492 277.58,-498 277.58,-504 277.58,-504 277.58,-543.6 277.58,-543.6 277.58,-549.6 271.58,-555.6 265.58,-555.6\"/>\n<text text-anchor=\"start\" x=\"138.53\" y=\"-532.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">embarked_category</text>\n<text text-anchor=\"start\" x=\"183.53\" y=\"-504.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- training_set_v1 -->\n<g id=\"node3\" class=\"node\">\n<title>training_set_v1</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M455.18,-350.6C455.18,-350.6 358.58,-350.6 358.58,-350.6 352.58,-350.6 346.58,-344.6 346.58,-338.6 346.58,-338.6 346.58,-299 346.58,-299 346.58,-293 352.58,-287 358.58,-287 358.58,-287 455.18,-287 455.18,-287 461.18,-287 467.18,-293 467.18,-299 467.18,-299 467.18,-338.6 467.18,-338.6 467.18,-344.6 461.18,-350.6 455.18,-350.6\"/>\n<text text-anchor=\"start\" x=\"357.38\" y=\"-327.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">training_set_v1</text>\n<text text-anchor=\"start\" x=\"372.75\" y=\"-299.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- embarked_category->training_set_v1 -->\n<g id=\"edge9\" class=\"edge\">\n<title>embarked_category->training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M266.19,-491.57C270.17,-488.78 274,-485.85 277.58,-482.8 319.47,-447.03 356.95,-395.74 380.42,-360.07\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"383.86,-362.2 386.37,-351.91 377.99,-358.39 383.86,-362.2\"/>\n</g>\n<!-- train_test_split_func -->\n<g id=\"node2\" class=\"node\">\n<title>train_test_split_func</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M677.78,-350.6C677.78,-350.6 548.18,-350.6 548.18,-350.6 542.18,-350.6 536.18,-344.6 536.18,-338.6 536.18,-338.6 536.18,-299 536.18,-299 536.18,-293 542.18,-287 548.18,-287 548.18,-287 677.78,-287 677.78,-287 683.78,-287 689.78,-293 689.78,-299 689.78,-299 689.78,-338.6 689.78,-338.6 689.78,-344.6 683.78,-350.6 677.78,-350.6\"/>\n<text text-anchor=\"start\" x=\"546.98\" y=\"-327.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">train_test_split_func</text>\n<text text-anchor=\"start\" x=\"602.48\" y=\"-299.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">dict</text>\n</g>\n<!-- X_train -->\n<g id=\"node10\" class=\"node\">\n<title>X_train</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M830.75,-391.6C830.75,-391.6 764.9,-391.6 764.9,-391.6 758.9,-391.6 752.9,-385.6 752.9,-379.6 752.9,-379.6 752.9,-340 752.9,-340 752.9,-334 758.9,-328 764.9,-328 764.9,-328 830.75,-328 830.75,-328 836.75,-328 842.75,-334 842.75,-340 842.75,-340 842.75,-379.6 842.75,-379.6 842.75,-385.6 836.75,-391.6 830.75,-391.6\"/>\n<text text-anchor=\"start\" x=\"774.95\" y=\"-368.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">X_train</text>\n<text text-anchor=\"start\" x=\"763.7\" y=\"-340.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- train_test_split_func->X_train -->\n<g id=\"edge16\" class=\"edge\">\n<title>train_test_split_func->X_train</title>\n<path fill=\"none\" stroke=\"red\" d=\"M690.2,-335.89C707.64,-339.8 725.84,-343.88 742.17,-347.54\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"741.08,-351.11 751.6,-349.88 742.61,-344.28 741.08,-351.11\"/>\n</g>\n<!-- y_train -->\n<g id=\"node11\" class=\"node\">\n<title>y_train</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M818.75,-309.6C818.75,-309.6 776.9,-309.6 776.9,-309.6 770.9,-309.6 764.9,-303.6 764.9,-297.6 764.9,-297.6 764.9,-258 764.9,-258 764.9,-252 770.9,-246 776.9,-246 776.9,-246 818.75,-246 818.75,-246 824.75,-246 830.75,-252 830.75,-258 830.75,-258 830.75,-297.6 830.75,-297.6 830.75,-303.6 824.75,-309.6 818.75,-309.6\"/>\n<text text-anchor=\"start\" x=\"775.7\" y=\"-286.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">y_train</text>\n<text text-anchor=\"start\" x=\"778.7\" y=\"-258.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- train_test_split_func->y_train -->\n<g id=\"edge17\" class=\"edge\">\n<title>train_test_split_func->y_train</title>\n<path fill=\"none\" stroke=\"red\" d=\"M690.2,-301.71C711.99,-296.82 734.96,-291.67 753.98,-287.41\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"754.69,-290.61 763.68,-285.01 753.16,-283.78 754.69,-290.61\"/>\n</g>\n<!-- training_set_v1->train_test_split_func -->\n<g id=\"edge1\" class=\"edge\">\n<title>training_set_v1->train_test_split_func</title>\n<path fill=\"none\" stroke=\"red\" d=\"M467.46,-318.8C485.44,-318.8 505.57,-318.8 524.96,-318.8\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"524.95,-322.3 534.95,-318.8 524.95,-315.3 524.95,-322.3\"/>\n</g>\n<!-- cabin_category -->\n<g id=\"node4\" class=\"node\">\n<title>cabin_category</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M250.95,-227.6C250.95,-227.6 154.35,-227.6 154.35,-227.6 148.35,-227.6 142.35,-221.6 142.35,-215.6 142.35,-215.6 142.35,-176 142.35,-176 142.35,-170 148.35,-164 154.35,-164 154.35,-164 250.95,-164 250.95,-164 256.95,-164 262.95,-170 262.95,-176 262.95,-176 262.95,-215.6 262.95,-215.6 262.95,-221.6 256.95,-227.6 250.95,-227.6\"/>\n<text text-anchor=\"start\" x=\"153.15\" y=\"-204.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">cabin_category</text>\n<text text-anchor=\"start\" x=\"183.53\" y=\"-176.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- cabin_category->training_set_v1 -->\n<g id=\"edge7\" class=\"edge\">\n<title>cabin_category->training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M263.39,-225.74C268.39,-229.19 273.2,-232.88 277.58,-236.8 294.2,-251.7 289.13,-263.88 306.58,-277.8 315.51,-284.92 325.85,-290.99 336.41,-296.09\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"334.59,-299.56 345.15,-300.5 337.49,-293.18 334.59,-299.56\"/>\n</g>\n<!-- sex_category -->\n<g id=\"node5\" class=\"node\">\n<title>sex_category</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M244.58,-145.6C244.58,-145.6 160.73,-145.6 160.73,-145.6 154.73,-145.6 148.73,-139.6 148.73,-133.6 148.73,-133.6 148.73,-94 148.73,-94 148.73,-88 154.73,-82 160.73,-82 160.73,-82 244.58,-82 244.58,-82 250.58,-82 256.58,-88 256.58,-94 256.58,-94 256.58,-133.6 256.58,-133.6 256.58,-139.6 250.58,-145.6 244.58,-145.6\"/>\n<text text-anchor=\"start\" x=\"159.53\" y=\"-122.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">sex_category</text>\n<text text-anchor=\"start\" x=\"183.53\" y=\"-94.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- sex_category->training_set_v1 -->\n<g id=\"edge8\" class=\"edge\">\n<title>sex_category->training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M256.83,-136C264.68,-141.2 271.99,-147.43 277.58,-154.8 311.48,-199.58 270.16,-235.04 306.58,-277.8 314.57,-287.19 325.06,-294.47 336.23,-300.09\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"334.61,-303.66 345.16,-304.62 337.52,-297.29 334.61,-303.66\"/>\n</g>\n<!-- target -->\n<g id=\"node6\" class=\"node\">\n<title>target</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M220.58,-637.6C220.58,-637.6 184.73,-637.6 184.73,-637.6 178.73,-637.6 172.73,-631.6 172.73,-625.6 172.73,-625.6 172.73,-586 172.73,-586 172.73,-580 178.73,-574 184.73,-574 184.73,-574 220.58,-574 220.58,-574 226.58,-574 232.58,-580 232.58,-586 232.58,-586 232.58,-625.6 232.58,-625.6 232.58,-631.6 226.58,-637.6 220.58,-637.6\"/>\n<text text-anchor=\"start\" x=\"183.9\" y=\"-614.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">target</text>\n<text text-anchor=\"start\" x=\"183.53\" y=\"-586.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- target->train_test_split_func -->\n<g id=\"edge2\" class=\"edge\">\n<title>target->train_test_split_func</title>\n<path fill=\"none\" stroke=\"black\" d=\"M232.94,-590.67C246.72,-583.27 263.25,-573.99 277.58,-564.8 383.1,-497.12 499.6,-407.84 563.45,-357.56\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"565.05,-359.97 570.73,-351.03 560.71,-354.48 565.05,-359.97\"/>\n</g>\n<!-- fit_random_forest -->\n<g id=\"node7\" class=\"node\">\n<title>fit_random_forest</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M1030.98,-309.6C1030.98,-309.6 917.88,-309.6 917.88,-309.6 911.88,-309.6 905.88,-303.6 905.88,-297.6 905.88,-297.6 905.88,-258 905.88,-258 905.88,-252 911.88,-246 917.88,-246 917.88,-246 1030.98,-246 1030.98,-246 1036.98,-246 1042.98,-252 1042.98,-258 1042.98,-258 1042.98,-297.6 1042.98,-297.6 1042.98,-303.6 1036.98,-309.6 1030.98,-309.6\"/>\n<text text-anchor=\"start\" x=\"916.68\" y=\"-286.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">fit_random_forest</text>\n<text text-anchor=\"start\" x=\"930.55\" y=\"-258.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">ClassifierMixin</text>\n</g>\n<!-- titanic_data -->\n<g id=\"node8\" class=\"node\">\n<title>titanic_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M86.73,-432.6C86.73,-432.6 14.13,-432.6 14.13,-432.6 8.13,-432.6 2.13,-426.6 2.13,-420.6 2.13,-420.6 2.13,-381 2.13,-381 2.13,-375 8.13,-369 14.13,-369 14.13,-369 86.73,-369 86.73,-369 92.73,-369 98.73,-375 98.73,-381 98.73,-381 98.73,-420.6 98.73,-420.6 98.73,-426.6 92.73,-432.6 86.73,-432.6\"/>\n<text text-anchor=\"start\" x=\"12.93\" y=\"-409.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">titanic_data</text>\n<text text-anchor=\"start\" x=\"16.3\" y=\"-381.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- titanic_data->target -->\n<g id=\"edge11\" class=\"edge\">\n<title>titanic_data->target</title>\n<path fill=\"none\" stroke=\"black\" d=\"M94.65,-433.09C96.12,-434.93 97.49,-436.84 98.73,-438.8 129.25,-487.48 93.16,-518.89 127.73,-564.8 136.59,-576.57 149.87,-585.42 162.74,-591.82\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"160.88,-595.27 171.43,-596.25 163.79,-588.91 160.88,-595.27\"/>\n</g>\n<!-- age -->\n<g id=\"node9\" class=\"node\">\n<title>age</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M220.58,-473.6C220.58,-473.6 184.73,-473.6 184.73,-473.6 178.73,-473.6 172.73,-467.6 172.73,-461.6 172.73,-461.6 172.73,-422 172.73,-422 172.73,-416 178.73,-410 184.73,-410 184.73,-410 220.58,-410 220.58,-410 226.58,-410 232.58,-416 232.58,-422 232.58,-422 232.58,-461.6 232.58,-461.6 232.58,-467.6 226.58,-473.6 220.58,-473.6\"/>\n<text text-anchor=\"start\" x=\"191.03\" y=\"-450.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">age</text>\n<text text-anchor=\"start\" x=\"183.53\" y=\"-422.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data->age -->\n<g id=\"edge15\" class=\"edge\">\n<title>titanic_data->age</title>\n<path fill=\"none\" stroke=\"black\" d=\"M98.96,-413.77C119.17,-419.29 142.48,-425.65 161.78,-430.92\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"160.76,-434.54 171.33,-433.8 162.6,-427.79 160.76,-434.54\"/>\n</g>\n<!-- pclass -->\n<g id=\"node14\" class=\"node\">\n<title>pclass</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M222.45,-391.6C222.45,-391.6 182.85,-391.6 182.85,-391.6 176.85,-391.6 170.85,-385.6 170.85,-379.6 170.85,-379.6 170.85,-340 170.85,-340 170.85,-334 176.85,-328 182.85,-328 182.85,-328 222.45,-328 222.45,-328 228.45,-328 234.45,-334 234.45,-340 234.45,-340 234.45,-379.6 234.45,-379.6 234.45,-385.6 228.45,-391.6 222.45,-391.6\"/>\n<text text-anchor=\"start\" x=\"181.65\" y=\"-368.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">pclass</text>\n<text text-anchor=\"start\" x=\"183.53\" y=\"-340.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data->pclass -->\n<g id=\"edge19\" class=\"edge\">\n<title>titanic_data->pclass</title>\n<path fill=\"none\" stroke=\"black\" d=\"M98.96,-387.83C118.59,-382.47 141.15,-376.31 160.11,-371.14\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"160.8,-374.3 169.52,-368.3 158.95,-367.55 160.8,-374.3\"/>\n</g>\n<!-- fare -->\n<g id=\"node15\" class=\"node\">\n<title>fare</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M220.58,-309.6C220.58,-309.6 184.73,-309.6 184.73,-309.6 178.73,-309.6 172.73,-303.6 172.73,-297.6 172.73,-297.6 172.73,-258 172.73,-258 172.73,-252 178.73,-246 184.73,-246 184.73,-246 220.58,-246 220.58,-246 226.58,-246 232.58,-252 232.58,-258 232.58,-258 232.58,-297.6 232.58,-297.6 232.58,-303.6 226.58,-309.6 220.58,-309.6\"/>\n<text text-anchor=\"start\" x=\"190.28\" y=\"-286.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">fare</text>\n<text text-anchor=\"start\" x=\"183.53\" y=\"-258.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data->fare -->\n<g id=\"edge20\" class=\"edge\">\n<title>titanic_data->fare</title>\n<path fill=\"none\" stroke=\"black\" d=\"M77.24,-368.54C91.13,-352.41 109.19,-333.3 127.73,-318.8 138.38,-310.46 150.95,-302.81 162.67,-296.44\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"164.03,-299.15 171.26,-291.4 160.78,-292.95 164.03,-299.15\"/>\n</g>\n<!-- age->training_set_v1 -->\n<g id=\"edge5\" class=\"edge\">\n<title>age->training_set_v1</title>\n<path fill=\"none\" stroke=\"red\" d=\"M233.03,-426.05C246.69,-418.59 263.08,-409.44 277.58,-400.8 300.82,-386.94 325.99,-371.06 347.83,-356.98\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"349.48,-359.44 355.97,-351.07 345.68,-353.56 349.48,-359.44\"/>\n</g>\n<!-- X_train->fit_random_forest -->\n<g id=\"edge13\" class=\"edge\">\n<title>X_train->fit_random_forest</title>\n<path fill=\"none\" stroke=\"red\" d=\"M842.88,-339.11C858.98,-331.55 877.71,-322.75 895.86,-314.23\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"897.01,-317.08 904.58,-309.66 894.04,-310.75 897.01,-317.08\"/>\n</g>\n<!-- y_train->fit_random_forest -->\n<g id=\"edge14\" class=\"edge\">\n<title>y_train->fit_random_forest</title>\n<path fill=\"none\" stroke=\"red\" d=\"M830.85,-277.8C848.79,-277.8 872.01,-277.8 894.49,-277.8\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"894.44,-281.3 904.44,-277.8 894.44,-274.3 894.44,-281.3\"/>\n</g>\n<!-- family -->\n<g id=\"node12\" class=\"node\">\n<title>family</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M221.33,-63.6C221.33,-63.6 183.98,-63.6 183.98,-63.6 177.98,-63.6 171.98,-57.6 171.98,-51.6 171.98,-51.6 171.98,-12 171.98,-12 171.98,-6 177.98,0 183.98,0 183.98,0 221.33,0 221.33,0 227.33,0 233.33,-6 233.33,-12 233.33,-12 233.33,-51.6 233.33,-51.6 233.33,-57.6 227.33,-63.6 221.33,-63.6\"/>\n<text text-anchor=\"start\" x=\"182.78\" y=\"-40.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">family</text>\n<text text-anchor=\"start\" x=\"183.53\" y=\"-12.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- family->training_set_v1 -->\n<g id=\"edge10\" class=\"edge\">\n<title>family->training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M233.75,-41.15C249.31,-47.56 267.18,-57.68 277.58,-72.8 329.72,-148.62 250.17,-205.1 306.58,-277.8 314.35,-287.82 324.99,-295.4 336.44,-301.15\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"334.64,-304.63 345.19,-305.55 337.52,-298.24 334.64,-304.63\"/>\n</g>\n<!-- prefit_random_forest -->\n<g id=\"node13\" class=\"node\">\n<title>prefit_random_forest</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M864.88,-227.6C864.88,-227.6 730.78,-227.6 730.78,-227.6 724.78,-227.6 718.78,-221.6 718.78,-215.6 718.78,-215.6 718.78,-176 718.78,-176 718.78,-170 724.78,-164 730.78,-164 730.78,-164 864.88,-164 864.88,-164 870.88,-164 876.88,-170 876.88,-176 876.88,-176 876.88,-215.6 876.88,-215.6 876.88,-221.6 870.88,-227.6 864.88,-227.6\"/>\n<text text-anchor=\"start\" x=\"729.58\" y=\"-204.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">prefit_random_forest</text>\n<text text-anchor=\"start\" x=\"753.95\" y=\"-176.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">ClassifierMixin</text>\n</g>\n<!-- prefit_random_forest->fit_random_forest -->\n<g id=\"edge12\" class=\"edge\">\n<title>prefit_random_forest->fit_random_forest</title>\n<path fill=\"none\" stroke=\"black\" d=\"M867.58,-228.09C876.86,-232.45 886.43,-236.94 895.85,-241.37\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"894.05,-244.86 904.59,-245.94 897.03,-238.52 894.05,-244.86\"/>\n</g>\n<!-- pclass->training_set_v1 -->\n<g id=\"edge4\" class=\"edge\">\n<title>pclass->training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M234.8,-353.48C261.61,-348.05 301.29,-340 335.69,-333.03\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"336.06,-336.32 345.17,-330.9 334.67,-329.46 336.06,-336.32\"/>\n</g>\n<!-- fare->training_set_v1 -->\n<g id=\"edge6\" class=\"edge\">\n<title>fare->training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M233.06,-283.76C259.86,-289.2 300.51,-297.44 335.66,-304.56\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"334.87,-308.18 345.37,-306.74 336.26,-301.32 334.87,-308.18\"/>\n</g>\n<!-- _train_test_split_func_inputs -->\n<g id=\"node16\" class=\"node\">\n<title>_train_test_split_func_inputs</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"507.18,-268.6 306.58,-268.6 306.58,-203 507.18,-203 507.18,-268.6\"/>\n<text text-anchor=\"start\" x=\"349.75\" y=\"-240.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">random_state</text>\n<text text-anchor=\"start\" x=\"472.75\" y=\"-240.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">int</text>\n<text text-anchor=\"start\" x=\"321.25\" y=\"-219.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">validation_size_fraction</text>\n<text text-anchor=\"start\" x=\"467.13\" y=\"-219.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">float</text>\n</g>\n<!-- _train_test_split_func_inputs->train_test_split_func -->\n<g id=\"edge3\" class=\"edge\">\n<title>_train_test_split_func_inputs->train_test_split_func</title>\n<path fill=\"none\" stroke=\"black\" d=\"M489.71,-269.08C501.58,-273.91 513.83,-278.89 525.79,-283.75\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"524.16,-287.27 534.74,-287.8 526.8,-280.79 524.16,-287.27\"/>\n</g>\n<!-- _prefit_random_forest_inputs -->\n<g id=\"node17\" class=\"node\">\n<title>_prefit_random_forest_inputs</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"679.53,-218.1 546.43,-218.1 546.43,-173.5 679.53,-173.5 679.53,-218.1\"/>\n<text text-anchor=\"start\" x=\"561.23\" y=\"-190\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">random_state</text>\n<text text-anchor=\"start\" x=\"650.48\" y=\"-190\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">int</text>\n</g>\n<!-- _prefit_random_forest_inputs->prefit_random_forest -->\n<g id=\"edge18\" class=\"edge\">\n<title>_prefit_random_forest_inputs->prefit_random_forest</title>\n<path fill=\"none\" stroke=\"black\" d=\"M679.99,-195.8C688.96,-195.8 698.28,-195.8 707.58,-195.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"707.45,-199.3 717.45,-195.8 707.45,-192.3 707.45,-199.3\"/>\n</g>\n<!-- input -->\n<g id=\"node18\" class=\"node\">\n<title>input</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"77.43,-542.1 23.43,-542.1 23.43,-505.5 77.43,-505.5 77.43,-542.1\"/>\n<text text-anchor=\"middle\" x=\"50.43\" y=\"-518\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n</g>\n<!-- function -->\n<g id=\"node19\" class=\"node\">\n<title>function</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M72.85,-487.1C72.85,-487.1 28,-487.1 28,-487.1 22,-487.1 16,-481.1 16,-475.1 16,-475.1 16,-462.5 16,-462.5 16,-456.5 22,-450.5 28,-450.5 28,-450.5 72.85,-450.5 72.85,-450.5 78.85,-450.5 84.85,-456.5 84.85,-462.5 84.85,-462.5 84.85,-475.1 84.85,-475.1 84.85,-481.1 78.85,-487.1 72.85,-487.1\"/>\n<text text-anchor=\"middle\" x=\"50.43\" y=\"-463\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n</g>\n</g>\n</svg>\n", |
| "text/plain": "<graphviz.graphs.Digraph at 0x13f6092e0>" |
| }, |
| "execution_count": 14, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "# Visualize a particular path:\n", |
| "dr.visualize_path_between(\"age\", \"fit_random_forest\") # pass in a path if you want to save the image.\n" |
| ], |
| "metadata": { |
| "collapsed": false, |
| "ExecuteTime": { |
| "end_time": "2023-11-07T23:52:35.938980Z", |
| "start_time": "2023-11-07T23:52:35.127130Z" |
| } |
| } |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 9, |
| "outputs": [ |
| { |
| "data": { |
| "image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 8.0.5 (20230430.1635)\n -->\n<!-- Pages: 1 -->\n<svg width=\"710pt\" height=\"206pt\"\n viewBox=\"0.00 0.00 710.00 205.80\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 201.8)\">\n<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-201.8 706,-201.8 706,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster__legend</title>\n<polygon fill=\"none\" stroke=\"black\" points=\"8,-114.8 8,-189.8 92.85,-189.8 92.85,-114.8 8,-114.8\"/>\n<text text-anchor=\"middle\" x=\"50.42\" y=\"-172.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n</g>\n<!-- train_test_split_func -->\n<g id=\"node1\" class=\"node\">\n<title>train_test_split_func</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M405.05,-104.6C405.05,-104.6 275.45,-104.6 275.45,-104.6 269.45,-104.6 263.45,-98.6 263.45,-92.6 263.45,-92.6 263.45,-53 263.45,-53 263.45,-47 269.45,-41 275.45,-41 275.45,-41 405.05,-41 405.05,-41 411.05,-41 417.05,-47 417.05,-53 417.05,-53 417.05,-92.6 417.05,-92.6 417.05,-98.6 411.05,-104.6 405.05,-104.6\"/>\n<text text-anchor=\"start\" x=\"274.25\" y=\"-81.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">train_test_split_func</text>\n<text text-anchor=\"start\" x=\"329.75\" y=\"-53.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">dict</text>\n</g>\n<!-- X_train -->\n<g id=\"node5\" class=\"node\">\n<title>X_train</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M523.9,-145.6C523.9,-145.6 458.05,-145.6 458.05,-145.6 452.05,-145.6 446.05,-139.6 446.05,-133.6 446.05,-133.6 446.05,-94 446.05,-94 446.05,-88 452.05,-82 458.05,-82 458.05,-82 523.9,-82 523.9,-82 529.9,-82 535.9,-88 535.9,-94 535.9,-94 535.9,-133.6 535.9,-133.6 535.9,-139.6 529.9,-145.6 523.9,-145.6\"/>\n<text text-anchor=\"start\" x=\"468.1\" y=\"-122.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">X_train</text>\n<text text-anchor=\"start\" x=\"456.85\" y=\"-94.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- train_test_split_func->X_train -->\n<g id=\"edge5\" class=\"edge\">\n<title>train_test_split_func->X_train</title>\n<path fill=\"none\" stroke=\"red\" d=\"M417.36,-93.78C423.43,-95.46 429.49,-97.13 435.37,-98.75\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"434.16,-102.32 444.73,-101.6 436.02,-95.57 434.16,-102.32\"/>\n</g>\n<!-- y_train -->\n<g id=\"node6\" class=\"node\">\n<title>y_train</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M511.9,-63.6C511.9,-63.6 470.05,-63.6 470.05,-63.6 464.05,-63.6 458.05,-57.6 458.05,-51.6 458.05,-51.6 458.05,-12 458.05,-12 458.05,-6 464.05,0 470.05,0 470.05,0 511.9,0 511.9,0 517.9,0 523.9,-6 523.9,-12 523.9,-12 523.9,-51.6 523.9,-51.6 523.9,-57.6 517.9,-63.6 511.9,-63.6\"/>\n<text text-anchor=\"start\" x=\"468.85\" y=\"-40.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">y_train</text>\n<text text-anchor=\"start\" x=\"471.85\" y=\"-12.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- train_test_split_func->y_train -->\n<g id=\"edge6\" class=\"edge\">\n<title>train_test_split_func->y_train</title>\n<path fill=\"none\" stroke=\"red\" d=\"M417.36,-51.82C427.74,-48.96 438.08,-46.11 447.56,-43.49\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"448.15,-46.68 456.86,-40.65 446.29,-39.94 448.15,-46.68\"/>\n</g>\n<!-- training_set_v1 -->\n<g id=\"node2\" class=\"node\">\n<title>training_set_v1</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M222.45,-104.6C222.45,-104.6 125.85,-104.6 125.85,-104.6 119.85,-104.6 113.85,-98.6 113.85,-92.6 113.85,-92.6 113.85,-53 113.85,-53 113.85,-47 119.85,-41 125.85,-41 125.85,-41 222.45,-41 222.45,-41 228.45,-41 234.45,-47 234.45,-53 234.45,-53 234.45,-92.6 234.45,-92.6 234.45,-98.6 228.45,-104.6 222.45,-104.6\"/>\n<text text-anchor=\"start\" x=\"124.65\" y=\"-81.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">training_set_v1</text>\n<text text-anchor=\"start\" x=\"140.02\" y=\"-53.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- training_set_v1->train_test_split_func -->\n<g id=\"edge1\" class=\"edge\">\n<title>training_set_v1->train_test_split_func</title>\n<path fill=\"none\" stroke=\"red\" d=\"M234.86,-72.8C240.59,-72.8 246.47,-72.8 252.39,-72.8\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"252.05,-76.3 262.05,-72.8 252.05,-69.3 252.05,-76.3\"/>\n</g>\n<!-- fit_random_forest -->\n<g id=\"node3\" class=\"node\">\n<title>fit_random_forest</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M690,-104.6C690,-104.6 576.9,-104.6 576.9,-104.6 570.9,-104.6 564.9,-98.6 564.9,-92.6 564.9,-92.6 564.9,-53 564.9,-53 564.9,-47 570.9,-41 576.9,-41 576.9,-41 690,-41 690,-41 696,-41 702,-47 702,-53 702,-53 702,-92.6 702,-92.6 702,-98.6 696,-104.6 690,-104.6\"/>\n<text text-anchor=\"start\" x=\"575.7\" y=\"-81.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">fit_random_forest</text>\n<text text-anchor=\"start\" x=\"589.58\" y=\"-53.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">ClassifierMixin</text>\n</g>\n<!-- age -->\n<g id=\"node4\" class=\"node\">\n<title>age</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M68.35,-104.6C68.35,-104.6 32.5,-104.6 32.5,-104.6 26.5,-104.6 20.5,-98.6 20.5,-92.6 20.5,-92.6 20.5,-53 20.5,-53 20.5,-47 26.5,-41 32.5,-41 32.5,-41 68.35,-41 68.35,-41 74.35,-41 80.35,-47 80.35,-53 80.35,-53 80.35,-92.6 80.35,-92.6 80.35,-98.6 74.35,-104.6 68.35,-104.6\"/>\n<text text-anchor=\"start\" x=\"38.8\" y=\"-81.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">age</text>\n<text text-anchor=\"start\" x=\"31.3\" y=\"-53.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- age->training_set_v1 -->\n<g id=\"edge2\" class=\"edge\">\n<title>age->training_set_v1</title>\n<path fill=\"none\" stroke=\"red\" d=\"M80.58,-72.8C87.45,-72.8 95.06,-72.8 102.89,-72.8\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"102.64,-76.3 112.64,-72.8 102.64,-69.3 102.64,-76.3\"/>\n</g>\n<!-- X_train->fit_random_forest -->\n<g id=\"edge3\" class=\"edge\">\n<title>X_train->fit_random_forest</title>\n<path fill=\"none\" stroke=\"red\" d=\"M536.04,-100.94C541.86,-99.24 547.97,-97.46 554.16,-95.65\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"555,-98.76 563.62,-92.6 553.04,-92.04 555,-98.76\"/>\n</g>\n<!-- y_train->fit_random_forest -->\n<g id=\"edge4\" class=\"edge\">\n<title>y_train->fit_random_forest</title>\n<path fill=\"none\" stroke=\"red\" d=\"M524.2,-41.21C533.25,-43.85 543.51,-46.84 554.02,-49.91\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"552.99,-53.55 563.57,-52.99 554.95,-46.83 552.99,-53.55\"/>\n</g>\n<!-- function -->\n<g id=\"node7\" class=\"node\">\n<title>function</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M72.85,-159.1C72.85,-159.1 28,-159.1 28,-159.1 22,-159.1 16,-153.1 16,-147.1 16,-147.1 16,-134.5 16,-134.5 16,-128.5 22,-122.5 28,-122.5 28,-122.5 72.85,-122.5 72.85,-122.5 78.85,-122.5 84.85,-128.5 84.85,-134.5 84.85,-134.5 84.85,-147.1 84.85,-147.1 84.85,-153.1 78.85,-159.1 72.85,-159.1\"/>\n<text text-anchor=\"middle\" x=\"50.42\" y=\"-135\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n</g>\n</g>\n</svg>\n", |
| "text/plain": "<graphviz.graphs.Digraph at 0x13f0667f0>" |
| }, |
| "execution_count": 9, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "# If we want to just focus on the path we can do:\n", |
| "dr.visualize_path_between(\"age\", \"fit_random_forest\", strict_path_visualization=True) # pass in values if you want to save the image.\n" |
| ], |
| "metadata": { |
| "collapsed": false, |
| "ExecuteTime": { |
| "end_time": "2023-11-07T23:50:22.866435Z", |
| "start_time": "2023-11-07T23:50:22.394442Z" |
| } |
| } |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 10, |
| "outputs": [ |
| { |
| "data": { |
| "text/plain": "[Variable(name='train_test_split_func', type=typing.Dict[str, typing.Union[pandas.core.frame.DataFrame, pandas.core.series.Series]], tags={'module': 'model_pipeline'}, is_external_input=False, originating_functions=(<function train_test_split_func at 0x13f043040>,)),\n Variable(name='training_set_v1', type=<class 'pandas.core.frame.DataFrame'>, tags={'module': 'sets', 'owner': 'data-science', 'importance': 'production', 'artifact': 'training_set'}, is_external_input=False, originating_functions=(<function training_set_v1 at 0x13f043280>,)),\n Variable(name='fit_random_forest', type=<class 'sklearn.base.ClassifierMixin'>, tags={'module': 'model_pipeline', 'owner': 'data-science', 'importance': 'production', 'artifact': 'model'}, is_external_input=False, originating_functions=(<function fit_random_forest at 0x13f043160>,)),\n Variable(name='age', type=<class 'pandas.core.series.Series'>, tags={'module': 'data_loading', 'PII': 'true'}, is_external_input=False, originating_functions=(<function titanic_data at 0x12073f5e0>,)),\n Variable(name='X_train', type=<class 'pandas.core.frame.DataFrame'>, tags={'module': 'model_pipeline'}, is_external_input=False, originating_functions=(<function train_test_split_func at 0x13f043040>,)),\n Variable(name='y_train', type=<class 'pandas.core.series.Series'>, tags={'module': 'model_pipeline'}, is_external_input=False, originating_functions=(<function train_test_split_func at 0x13f043040>,))]" |
| }, |
| "execution_count": 10, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "# to programmatically get the nodes in the path:\n", |
| "nodes_in_path = dr.what_is_the_path_between(\"age\", \"fit_random_forest\")\n", |
| "nodes_in_path" |
| ], |
| "metadata": { |
| "collapsed": false, |
| "ExecuteTime": { |
| "end_time": "2023-11-07T23:50:23.082151Z", |
| "start_time": "2023-11-07T23:50:23.009952Z" |
| } |
| } |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 10, |
| "outputs": [], |
| "source": [], |
| "metadata": { |
| "collapsed": false, |
| "ExecuteTime": { |
| "end_time": "2023-11-07T02:19:27.919791Z", |
| "start_time": "2023-11-07T02:19:27.882617Z" |
| } |
| } |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 10, |
| "outputs": [], |
| "source": [], |
| "metadata": { |
| "collapsed": false, |
| "ExecuteTime": { |
| "end_time": "2023-11-07T02:19:27.934486Z", |
| "start_time": "2023-11-07T02:19:27.890850Z" |
| } |
| } |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 10, |
| "outputs": [], |
| "source": [], |
| "metadata": { |
| "collapsed": false, |
| "ExecuteTime": { |
| "end_time": "2023-11-07T02:19:27.934943Z", |
| "start_time": "2023-11-07T02:19:27.898178Z" |
| } |
| } |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "outputs": [], |
| "source": [], |
| "metadata": { |
| "collapsed": false |
| } |
| } |
| ], |
| "metadata": { |
| "kernelspec": { |
| "display_name": "Python 3", |
| "language": "python", |
| "name": "python3" |
| }, |
| "language_info": { |
| "codemirror_mode": { |
| "name": "ipython", |
| "version": 2 |
| }, |
| "file_extension": ".py", |
| "mimetype": "text/x-python", |
| "name": "python", |
| "nbconvert_exporter": "python", |
| "pygments_lexer": "ipython2", |
| "version": "2.7.6" |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 0 |
| } |