blob: c9296d47799b0e81ecf9cb1e490b0a9ec5e30d42 [file] [log] [blame]
{
"cells": [
{
"cell_type": "markdown",
"source": [
"# Lineage Code Snippets\n",
"Here is a notebook that shows you code snippets that you can use for lineage purposes. It uses the Hamilton code operating over the Titanic dataset for demonstration purposes."
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 1,
"outputs": [],
"source": [
"from hamilton import base\n",
"from hamilton import driver\n",
"\n",
"# modules that house the Hamilton code we build the lineage/DAG from.\n",
"import data_loading\n",
"import features\n",
"import model_pipeline\n",
"import sets\n"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-07T23:50:19.946421Z",
"start_time": "2023-11-07T23:50:14.877286Z"
}
}
},
{
"cell_type": "code",
"execution_count": 2,
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Note: Hamilton collects completely anonymous data about usage. This will help us improve Hamilton over time. See https://github.com/dagworks-inc/hamilton#usage-analytics--data-privacy for details.\n"
]
}
],
"source": [
"# Determine configuration for creating the DAG.\n",
"config = {} # This example has no configuration that changes the DAG/lineage shape.\n",
"# instantiate the driver\n",
"adapter = base.DefaultAdapter()\n",
"dr = driver.Driver(config, data_loading, features, sets, model_pipeline, adapter=adapter)\n"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-07T23:50:19.981600Z",
"start_time": "2023-11-07T23:50:19.955048Z"
}
}
},
{
"cell_type": "markdown",
"source": [
"# Display everything\n",
"Useful for a global overview."
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 3,
"outputs": [
{
"data": {
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 8.0.5 (20230430.1635)\n -->\n<!-- Pages: 1 -->\n<svg width=\"1544pt\" height=\"1091pt\"\n viewBox=\"0.00 0.00 1543.90 1090.60\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 1086.6)\">\n<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-1086.6 1539.9,-1086.6 1539.9,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster__legend</title>\n<polygon fill=\"none\" stroke=\"black\" points=\"11.38,-601.8 11.38,-731.8 96.22,-731.8 96.22,-601.8 11.38,-601.8\"/>\n<text text-anchor=\"middle\" x=\"53.8\" y=\"-714.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n</g>\n<!-- embarked_category -->\n<g id=\"node1\" class=\"node\">\n<title>embarked_category</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M718.5,-279.6C718.5,-279.6 592.65,-279.6 592.65,-279.6 586.65,-279.6 580.65,-273.6 580.65,-267.6 580.65,-267.6 580.65,-228 580.65,-228 580.65,-222 586.65,-216 592.65,-216 592.65,-216 718.5,-216 718.5,-216 724.5,-216 730.5,-222 730.5,-228 730.5,-228 730.5,-267.6 730.5,-267.6 730.5,-273.6 724.5,-279.6 718.5,-279.6\"/>\n<text text-anchor=\"start\" x=\"591.45\" y=\"-256.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">embarked_category</text>\n<text text-anchor=\"start\" x=\"636.45\" y=\"-228.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- training_set_v1 -->\n<g id=\"node17\" class=\"node\">\n<title>training_set_v1</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M908.1,-361.6C908.1,-361.6 811.5,-361.6 811.5,-361.6 805.5,-361.6 799.5,-355.6 799.5,-349.6 799.5,-349.6 799.5,-310 799.5,-310 799.5,-304 805.5,-298 811.5,-298 811.5,-298 908.1,-298 908.1,-298 914.1,-298 920.1,-304 920.1,-310 920.1,-310 920.1,-349.6 920.1,-349.6 920.1,-355.6 914.1,-361.6 908.1,-361.6\"/>\n<text text-anchor=\"start\" x=\"810.3\" y=\"-338.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">training_set_v1</text>\n<text text-anchor=\"start\" x=\"825.67\" y=\"-310.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- embarked_category&#45;&gt;training_set_v1 -->\n<g id=\"edge30\" class=\"edge\">\n<title>embarked_category&#45;&gt;training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M730.73,-277.87C749.84,-285.62 770.33,-293.93 789.2,-301.58\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"787.62,-305.12 798.2,-305.63 790.25,-298.63 787.62,-305.12\"/>\n</g>\n<!-- cabin_t -->\n<g id=\"node2\" class=\"node\">\n<title>cabin_t</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M501.02,-530.6C501.02,-530.6 456.17,-530.6 456.17,-530.6 450.17,-530.6 444.17,-524.6 444.17,-518.6 444.17,-518.6 444.17,-479 444.17,-479 444.17,-473 450.17,-467 456.17,-467 456.17,-467 501.02,-467 501.02,-467 507.02,-467 513.02,-473 513.02,-479 513.02,-479 513.02,-518.6 513.02,-518.6 513.02,-524.6 507.02,-530.6 501.02,-530.6\"/>\n<text text-anchor=\"start\" x=\"454.97\" y=\"-507.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">cabin_t</text>\n<text text-anchor=\"start\" x=\"459.47\" y=\"-479.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- target -->\n<g id=\"node3\" class=\"node\">\n<title>target</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M343.92,-1082.6C343.92,-1082.6 308.07,-1082.6 308.07,-1082.6 302.07,-1082.6 296.07,-1076.6 296.07,-1070.6 296.07,-1070.6 296.07,-1031 296.07,-1031 296.07,-1025 302.07,-1019 308.07,-1019 308.07,-1019 343.92,-1019 343.92,-1019 349.92,-1019 355.92,-1025 355.92,-1031 355.92,-1031 355.92,-1070.6 355.92,-1070.6 355.92,-1076.6 349.92,-1082.6 343.92,-1082.6\"/>\n<text text-anchor=\"start\" x=\"307.25\" y=\"-1059.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">target</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-1031.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- train_test_split_func -->\n<g id=\"node10\" class=\"node\">\n<title>train_test_split_func</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1150.7,-361.6C1150.7,-361.6 1021.1,-361.6 1021.1,-361.6 1015.1,-361.6 1009.1,-355.6 1009.1,-349.6 1009.1,-349.6 1009.1,-310 1009.1,-310 1009.1,-304 1015.1,-298 1021.1,-298 1021.1,-298 1150.7,-298 1150.7,-298 1156.7,-298 1162.7,-304 1162.7,-310 1162.7,-310 1162.7,-349.6 1162.7,-349.6 1162.7,-355.6 1156.7,-361.6 1150.7,-361.6\"/>\n<text text-anchor=\"start\" x=\"1019.9\" y=\"-338.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">train_test_split_func</text>\n<text text-anchor=\"start\" x=\"1075.4\" y=\"-310.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">dict</text>\n</g>\n<!-- target&#45;&gt;train_test_split_func -->\n<g id=\"edge15\" class=\"edge\">\n<title>target&#45;&gt;train_test_split_func</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.23,-1051.85C401.43,-1052.14 489.54,-1047.3 551.65,-1009.8 821.8,-846.71 1008.01,-492.57 1065.84,-371.42\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1069.3,-373.3 1070.41,-362.76 1062.97,-370.3 1069.3,-373.3\"/>\n</g>\n<!-- titanic_data -->\n<g id=\"node4\" class=\"node\">\n<title>titanic_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M227.82,-590.6C227.82,-590.6 155.22,-590.6 155.22,-590.6 149.22,-590.6 143.22,-584.6 143.22,-578.6 143.22,-578.6 143.22,-539 143.22,-539 143.22,-533 149.22,-527 155.22,-527 155.22,-527 227.82,-527 227.82,-527 233.82,-527 239.82,-533 239.82,-539 239.82,-539 239.82,-578.6 239.82,-578.6 239.82,-584.6 233.82,-590.6 227.82,-590.6\"/>\n<text text-anchor=\"start\" x=\"154.02\" y=\"-567.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">titanic_data</text>\n<text text-anchor=\"start\" x=\"157.4\" y=\"-539.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- titanic_data&#45;&gt;target -->\n<g id=\"edge4\" class=\"edge\">\n<title>titanic_data&#45;&gt;target</title>\n<path fill=\"none\" stroke=\"black\" d=\"M195.56,-590.92C204.58,-681.63 233.24,-937.88 275.45,-1009.8 278.67,-1015.29 282.92,-1020.37 287.57,-1024.95\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"284.88,-1027.13 294.67,-1031.16 289.53,-1021.9 284.88,-1027.13\"/>\n</g>\n<!-- parch -->\n<g id=\"node6\" class=\"node\">\n<title>parch</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M343.92,-672.6C343.92,-672.6 308.07,-672.6 308.07,-672.6 302.07,-672.6 296.07,-666.6 296.07,-660.6 296.07,-660.6 296.07,-621 296.07,-621 296.07,-615 302.07,-609 308.07,-609 308.07,-609 343.92,-609 343.92,-609 349.92,-609 355.92,-615 355.92,-621 355.92,-621 355.92,-660.6 355.92,-660.6 355.92,-666.6 349.92,-672.6 343.92,-672.6\"/>\n<text text-anchor=\"start\" x=\"307.62\" y=\"-649.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">parch</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-621.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data&#45;&gt;parch -->\n<g id=\"edge8\" class=\"edge\">\n<title>titanic_data&#45;&gt;parch</title>\n<path fill=\"none\" stroke=\"black\" d=\"M239.98,-588.17C255.2,-597.6 271.87,-607.91 286.39,-616.9\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"284.23,-620.3 294.58,-622.59 287.91,-614.35 284.23,-620.3\"/>\n</g>\n<!-- name -->\n<g id=\"node13\" class=\"node\">\n<title>name</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M343.92,-63.6C343.92,-63.6 308.07,-63.6 308.07,-63.6 302.07,-63.6 296.07,-57.6 296.07,-51.6 296.07,-51.6 296.07,-12 296.07,-12 296.07,-6 302.07,0 308.07,0 308.07,0 343.92,0 343.92,0 349.92,0 355.92,-6 355.92,-12 355.92,-12 355.92,-51.6 355.92,-51.6 355.92,-57.6 349.92,-63.6 343.92,-63.6\"/>\n<text text-anchor=\"start\" x=\"308\" y=\"-40.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">name</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-12.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data&#45;&gt;name -->\n<g id=\"edge21\" class=\"edge\">\n<title>titanic_data&#45;&gt;name</title>\n<path fill=\"none\" stroke=\"black\" d=\"M195.09,-526.61C203.16,-431.1 230.19,-151.07 275.45,-72.8 278.64,-67.29 282.86,-62.2 287.5,-57.62\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"289.47,-60.66 294.6,-51.4 284.81,-55.44 289.47,-60.66\"/>\n</g>\n<!-- pclass -->\n<g id=\"node14\" class=\"node\">\n<title>pclass</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M345.8,-145.6C345.8,-145.6 306.2,-145.6 306.2,-145.6 300.2,-145.6 294.2,-139.6 294.2,-133.6 294.2,-133.6 294.2,-94 294.2,-94 294.2,-88 300.2,-82 306.2,-82 306.2,-82 345.8,-82 345.8,-82 351.8,-82 357.8,-88 357.8,-94 357.8,-94 357.8,-133.6 357.8,-133.6 357.8,-139.6 351.8,-145.6 345.8,-145.6\"/>\n<text text-anchor=\"start\" x=\"305\" y=\"-122.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">pclass</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-94.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data&#45;&gt;pclass -->\n<g id=\"edge22\" class=\"edge\">\n<title>titanic_data&#45;&gt;pclass</title>\n<path fill=\"none\" stroke=\"black\" d=\"M192.11,-526.61C192.59,-454.64 201.79,-277.4 275.45,-154.8 278.29,-150.08 281.87,-145.65 285.81,-141.58\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"287.79,-144.59 292.72,-135.22 283.02,-139.47 287.79,-144.59\"/>\n</g>\n<!-- ticket -->\n<g id=\"node18\" class=\"node\">\n<title>ticket</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M343.92,-1000.6C343.92,-1000.6 308.07,-1000.6 308.07,-1000.6 302.07,-1000.6 296.07,-994.6 296.07,-988.6 296.07,-988.6 296.07,-949 296.07,-949 296.07,-943 302.07,-937 308.07,-937 308.07,-937 343.92,-937 343.92,-937 349.92,-937 355.92,-943 355.92,-949 355.92,-949 355.92,-988.6 355.92,-988.6 355.92,-994.6 349.92,-1000.6 343.92,-1000.6\"/>\n<text text-anchor=\"start\" x=\"308.37\" y=\"-977.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">ticket</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-949.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data&#45;&gt;ticket -->\n<g id=\"edge32\" class=\"edge\">\n<title>titanic_data&#45;&gt;ticket</title>\n<path fill=\"none\" stroke=\"black\" d=\"M192.93,-591C195.16,-658.47 207.81,-817.55 275.45,-927.8 278.74,-933.17 282.99,-938.16 287.61,-942.68\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"284.83,-944.79 294.63,-948.82 289.49,-939.56 284.83,-944.79\"/>\n</g>\n<!-- cabin -->\n<g id=\"node20\" class=\"node\">\n<title>cabin</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M343.92,-426.6C343.92,-426.6 308.07,-426.6 308.07,-426.6 302.07,-426.6 296.07,-420.6 296.07,-414.6 296.07,-414.6 296.07,-375 296.07,-375 296.07,-369 302.07,-363 308.07,-363 308.07,-363 343.92,-363 343.92,-363 349.92,-363 355.92,-369 355.92,-375 355.92,-375 355.92,-414.6 355.92,-414.6 355.92,-420.6 349.92,-426.6 343.92,-426.6\"/>\n<text text-anchor=\"start\" x=\"308.37\" y=\"-403.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">cabin</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-375.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data&#45;&gt;cabin -->\n<g id=\"edge35\" class=\"edge\">\n<title>titanic_data&#45;&gt;cabin</title>\n<path fill=\"none\" stroke=\"black\" d=\"M210.68,-526.68C226.14,-500.74 249.85,-464.04 275.45,-435.8 279.15,-431.72 283.27,-427.68 287.5,-423.83\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"289.46,-425.9 294.71,-416.69 284.87,-420.62 289.46,-425.9\"/>\n</g>\n<!-- fare -->\n<g id=\"node24\" class=\"node\">\n<title>fare</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M343.92,-590.6C343.92,-590.6 308.07,-590.6 308.07,-590.6 302.07,-590.6 296.07,-584.6 296.07,-578.6 296.07,-578.6 296.07,-539 296.07,-539 296.07,-533 302.07,-527 308.07,-527 308.07,-527 343.92,-527 343.92,-527 349.92,-527 355.92,-533 355.92,-539 355.92,-539 355.92,-578.6 355.92,-578.6 355.92,-584.6 349.92,-590.6 343.92,-590.6\"/>\n<text text-anchor=\"start\" x=\"313.62\" y=\"-567.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">fare</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-539.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data&#45;&gt;fare -->\n<g id=\"edge40\" class=\"edge\">\n<title>titanic_data&#45;&gt;fare</title>\n<path fill=\"none\" stroke=\"black\" d=\"M239.98,-558.8C254.62,-558.8 270.6,-558.8 284.72,-558.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"284.58,-562.3 294.58,-558.8 284.58,-555.3 284.58,-562.3\"/>\n</g>\n<!-- survived -->\n<g id=\"node25\" class=\"node\">\n<title>survived</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M352.55,-918.6C352.55,-918.6 299.45,-918.6 299.45,-918.6 293.45,-918.6 287.45,-912.6 287.45,-906.6 287.45,-906.6 287.45,-867 287.45,-867 287.45,-861 293.45,-855 299.45,-855 299.45,-855 352.55,-855 352.55,-855 358.55,-855 364.55,-861 364.55,-867 364.55,-867 364.55,-906.6 364.55,-906.6 364.55,-912.6 358.55,-918.6 352.55,-918.6\"/>\n<text text-anchor=\"start\" x=\"298.25\" y=\"-895.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">survived</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-867.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data&#45;&gt;survived -->\n<g id=\"edge41\" class=\"edge\">\n<title>titanic_data&#45;&gt;survived</title>\n<path fill=\"none\" stroke=\"black\" d=\"M195.6,-591.09C202.22,-647.08 221.84,-764.05 275.45,-845.8 276.9,-848.01 278.5,-850.15 280.21,-852.24\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"276.95,-853.95 286.31,-858.92 282.1,-849.2 276.95,-853.95\"/>\n</g>\n<!-- passengerid -->\n<g id=\"node27\" class=\"node\">\n<title>passengerid</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M364.55,-836.6C364.55,-836.6 287.45,-836.6 287.45,-836.6 281.45,-836.6 275.45,-830.6 275.45,-824.6 275.45,-824.6 275.45,-785 275.45,-785 275.45,-779 281.45,-773 287.45,-773 287.45,-773 364.55,-773 364.55,-773 370.55,-773 376.55,-779 376.55,-785 376.55,-785 376.55,-824.6 376.55,-824.6 376.55,-830.6 370.55,-836.6 364.55,-836.6\"/>\n<text text-anchor=\"start\" x=\"286.25\" y=\"-813.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">passengerid</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-785.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data&#45;&gt;passengerid -->\n<g id=\"edge44\" class=\"edge\">\n<title>titanic_data&#45;&gt;passengerid</title>\n<path fill=\"none\" stroke=\"black\" d=\"M200.16,-590.74C211.45,-633.26 235.65,-709.44 275.45,-763.8 275.59,-763.99 275.73,-764.18 275.87,-764.37\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"273.77,-766.31 282.9,-771.69 279.12,-761.79 273.77,-766.31\"/>\n</g>\n<!-- embarked -->\n<g id=\"node28\" class=\"node\">\n<title>embarked</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M357.05,-328.6C357.05,-328.6 294.95,-328.6 294.95,-328.6 288.95,-328.6 282.95,-322.6 282.95,-316.6 282.95,-316.6 282.95,-277 282.95,-277 282.95,-271 288.95,-265 294.95,-265 294.95,-265 357.05,-265 357.05,-265 363.05,-265 369.05,-271 369.05,-277 369.05,-277 369.05,-316.6 369.05,-316.6 369.05,-322.6 363.05,-328.6 357.05,-328.6\"/>\n<text text-anchor=\"start\" x=\"293.75\" y=\"-305.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">embarked</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-277.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data&#45;&gt;embarked -->\n<g id=\"edge45\" class=\"edge\">\n<title>titanic_data&#45;&gt;embarked</title>\n<path fill=\"none\" stroke=\"black\" d=\"M201.85,-526.7C214.77,-484.99 240.53,-410.75 275.45,-353.8 278.91,-348.15 282.94,-342.51 287.17,-337.1\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"290.33,-339.77 293.94,-329.81 284.91,-335.34 290.33,-339.77\"/>\n</g>\n<!-- age -->\n<g id=\"node30\" class=\"node\">\n<title>age</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M343.92,-508.6C343.92,-508.6 308.07,-508.6 308.07,-508.6 302.07,-508.6 296.07,-502.6 296.07,-496.6 296.07,-496.6 296.07,-457 296.07,-457 296.07,-451 302.07,-445 308.07,-445 308.07,-445 343.92,-445 343.92,-445 349.92,-445 355.92,-451 355.92,-457 355.92,-457 355.92,-496.6 355.92,-496.6 355.92,-502.6 349.92,-508.6 343.92,-508.6\"/>\n<text text-anchor=\"start\" x=\"314.37\" y=\"-485.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">age</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-457.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data&#45;&gt;age -->\n<g id=\"edge47\" class=\"edge\">\n<title>titanic_data&#45;&gt;age</title>\n<path fill=\"none\" stroke=\"black\" d=\"M239.98,-529.43C255.2,-520 271.87,-509.69 286.39,-500.7\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"287.91,-503.25 294.58,-495.01 284.23,-497.3 287.91,-503.25\"/>\n</g>\n<!-- sex -->\n<g id=\"node31\" class=\"node\">\n<title>sex</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M343.92,-227.6C343.92,-227.6 308.07,-227.6 308.07,-227.6 302.07,-227.6 296.07,-221.6 296.07,-215.6 296.07,-215.6 296.07,-176 296.07,-176 296.07,-170 302.07,-164 308.07,-164 308.07,-164 343.92,-164 343.92,-164 349.92,-164 355.92,-170 355.92,-176 355.92,-176 355.92,-215.6 355.92,-215.6 355.92,-221.6 349.92,-227.6 343.92,-227.6\"/>\n<text text-anchor=\"start\" x=\"314.75\" y=\"-204.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">sex</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-176.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data&#45;&gt;sex -->\n<g id=\"edge48\" class=\"edge\">\n<title>titanic_data&#45;&gt;sex</title>\n<path fill=\"none\" stroke=\"black\" d=\"M196.77,-526.65C205.28,-469.49 227.84,-347.53 275.45,-255.8 279.26,-248.46 284.07,-241.21 289.2,-234.41\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"292.34,-237.09 295.81,-227.08 286.85,-232.74 292.34,-237.09\"/>\n</g>\n<!-- sibsp -->\n<g id=\"node32\" class=\"node\">\n<title>sibsp</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M343.92,-754.6C343.92,-754.6 308.07,-754.6 308.07,-754.6 302.07,-754.6 296.07,-748.6 296.07,-742.6 296.07,-742.6 296.07,-703 296.07,-703 296.07,-697 302.07,-691 308.07,-691 308.07,-691 343.92,-691 343.92,-691 349.92,-691 355.92,-697 355.92,-703 355.92,-703 355.92,-742.6 355.92,-742.6 355.92,-748.6 349.92,-754.6 343.92,-754.6\"/>\n<text text-anchor=\"start\" x=\"308.37\" y=\"-731.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">sibsp</text>\n<text text-anchor=\"start\" x=\"306.87\" y=\"-703.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data&#45;&gt;sibsp -->\n<g id=\"edge49\" class=\"edge\">\n<title>titanic_data&#45;&gt;sibsp</title>\n<path fill=\"none\" stroke=\"black\" d=\"M210.68,-590.92C226.14,-616.86 249.85,-653.56 275.45,-681.8 279.15,-685.88 283.27,-689.92 287.5,-693.77\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"284.87,-696.98 294.71,-700.91 289.46,-691.7 284.87,-696.98\"/>\n</g>\n<!-- embarked_encoder -->\n<g id=\"node5\" class=\"node\">\n<title>embarked_encoder</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M539.65,-328.6C539.65,-328.6 417.55,-328.6 417.55,-328.6 411.55,-328.6 405.55,-322.6 405.55,-316.6 405.55,-316.6 405.55,-277 405.55,-277 405.55,-271 411.55,-265 417.55,-265 417.55,-265 539.65,-265 539.65,-265 545.65,-265 551.65,-271 551.65,-277 551.65,-277 551.65,-316.6 551.65,-316.6 551.65,-322.6 545.65,-328.6 539.65,-328.6\"/>\n<text text-anchor=\"start\" x=\"416.35\" y=\"-305.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">embarked_encoder</text>\n<text text-anchor=\"start\" x=\"436.97\" y=\"-277.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">LabelEncoder</text>\n</g>\n<!-- embarked_encoder&#45;&gt;embarked_category -->\n<g id=\"edge2\" class=\"edge\">\n<title>embarked_encoder&#45;&gt;embarked_category</title>\n<path fill=\"none\" stroke=\"black\" d=\"M552.03,-276.52C557.85,-274.89 563.76,-273.23 569.65,-271.58\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"570.53,-274.69 579.22,-268.62 568.65,-267.95 570.53,-274.69\"/>\n</g>\n<!-- encoders -->\n<g id=\"node9\" class=\"node\">\n<title>encoders</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M684.37,-361.6C684.37,-361.6 626.77,-361.6 626.77,-361.6 620.77,-361.6 614.77,-355.6 614.77,-349.6 614.77,-349.6 614.77,-310 614.77,-310 614.77,-304 620.77,-298 626.77,-298 626.77,-298 684.37,-298 684.37,-298 690.37,-298 696.37,-304 696.37,-310 696.37,-310 696.37,-349.6 696.37,-349.6 696.37,-355.6 690.37,-361.6 684.37,-361.6\"/>\n<text text-anchor=\"start\" x=\"625.57\" y=\"-338.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">encoders</text>\n<text text-anchor=\"start\" x=\"645.07\" y=\"-310.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">dict</text>\n</g>\n<!-- embarked_encoder&#45;&gt;encoders -->\n<g id=\"edge13\" class=\"edge\">\n<title>embarked_encoder&#45;&gt;encoders</title>\n<path fill=\"none\" stroke=\"black\" d=\"M552.03,-310.46C569.62,-313.78 588.04,-317.25 604.34,-320.33\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"603.24,-323.87 613.71,-322.28 604.53,-316.99 603.24,-323.87\"/>\n</g>\n<!-- family -->\n<g id=\"node19\" class=\"node\">\n<title>family</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M497.27,-661.6C497.27,-661.6 459.92,-661.6 459.92,-661.6 453.92,-661.6 447.92,-655.6 447.92,-649.6 447.92,-649.6 447.92,-610 447.92,-610 447.92,-604 453.92,-598 459.92,-598 459.92,-598 497.27,-598 497.27,-598 503.27,-598 509.27,-604 509.27,-610 509.27,-610 509.27,-649.6 509.27,-649.6 509.27,-655.6 503.27,-661.6 497.27,-661.6\"/>\n<text text-anchor=\"start\" x=\"458.72\" y=\"-638.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">family</text>\n<text text-anchor=\"start\" x=\"459.47\" y=\"-610.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- parch&#45;&gt;family -->\n<g id=\"edge34\" class=\"edge\">\n<title>parch&#45;&gt;family</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.39,-638.65C379.29,-636.98 411.33,-634.64 436.72,-632.79\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"436.79,-636.22 446.51,-632 436.28,-629.24 436.79,-636.22\"/>\n</g>\n<!-- X_train -->\n<g id=\"node7\" class=\"node\">\n<title>X_train</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1323.67,-484.6C1323.67,-484.6 1257.83,-484.6 1257.83,-484.6 1251.83,-484.6 1245.83,-478.6 1245.83,-472.6 1245.83,-472.6 1245.83,-433 1245.83,-433 1245.83,-427 1251.83,-421 1257.83,-421 1257.83,-421 1323.67,-421 1323.67,-421 1329.67,-421 1335.67,-427 1335.67,-433 1335.67,-433 1335.67,-472.6 1335.67,-472.6 1335.67,-478.6 1329.67,-484.6 1323.67,-484.6\"/>\n<text text-anchor=\"start\" x=\"1267.88\" y=\"-461.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">X_train</text>\n<text text-anchor=\"start\" x=\"1256.62\" y=\"-433.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- fit_random_forest -->\n<g id=\"node11\" class=\"node\">\n<title>fit_random_forest</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1523.9,-402.6C1523.9,-402.6 1410.8,-402.6 1410.8,-402.6 1404.8,-402.6 1398.8,-396.6 1398.8,-390.6 1398.8,-390.6 1398.8,-351 1398.8,-351 1398.8,-345 1404.8,-339 1410.8,-339 1410.8,-339 1523.9,-339 1523.9,-339 1529.9,-339 1535.9,-345 1535.9,-351 1535.9,-351 1535.9,-390.6 1535.9,-390.6 1535.9,-396.6 1529.9,-402.6 1523.9,-402.6\"/>\n<text text-anchor=\"start\" x=\"1409.6\" y=\"-379.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">fit_random_forest</text>\n<text text-anchor=\"start\" x=\"1423.47\" y=\"-351.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">ClassifierMixin</text>\n</g>\n<!-- X_train&#45;&gt;fit_random_forest -->\n<g id=\"edge18\" class=\"edge\">\n<title>X_train&#45;&gt;fit_random_forest</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1335.81,-432.11C1351.91,-424.55 1370.63,-415.75 1388.79,-407.23\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1389.94,-410.08 1397.5,-402.66 1386.96,-403.75 1389.94,-410.08\"/>\n</g>\n<!-- X_test -->\n<g id=\"node8\" class=\"node\">\n<title>X_test</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1323.67,-238.6C1323.67,-238.6 1257.83,-238.6 1257.83,-238.6 1251.83,-238.6 1245.83,-232.6 1245.83,-226.6 1245.83,-226.6 1245.83,-187 1245.83,-187 1245.83,-181 1251.83,-175 1257.83,-175 1257.83,-175 1323.67,-175 1323.67,-175 1329.67,-175 1335.67,-181 1335.67,-187 1335.67,-187 1335.67,-226.6 1335.67,-226.6 1335.67,-232.6 1329.67,-238.6 1323.67,-238.6\"/>\n<text text-anchor=\"start\" x=\"1270.5\" y=\"-215.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">X_test</text>\n<text text-anchor=\"start\" x=\"1256.62\" y=\"-187.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- train_test_split_func&#45;&gt;X_train -->\n<g id=\"edge9\" class=\"edge\">\n<title>train_test_split_func&#45;&gt;X_train</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1133.62,-362.06C1156.77,-377.63 1185.34,-396.27 1211.7,-411.8 1219.47,-416.38 1227.8,-421.02 1236.02,-425.46\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1234.11,-428.94 1244.58,-430.56 1237.4,-422.76 1234.11,-428.94\"/>\n</g>\n<!-- train_test_split_func&#45;&gt;X_test -->\n<g id=\"edge10\" class=\"edge\">\n<title>train_test_split_func&#45;&gt;X_test</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1133.62,-297.54C1156.77,-281.97 1185.34,-263.33 1211.7,-247.8 1219.47,-243.22 1227.8,-238.58 1236.02,-234.14\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1237.4,-236.84 1244.58,-229.04 1234.11,-230.66 1237.4,-236.84\"/>\n</g>\n<!-- y_train -->\n<g id=\"node12\" class=\"node\">\n<title>y_train</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1311.67,-402.6C1311.67,-402.6 1269.83,-402.6 1269.83,-402.6 1263.83,-402.6 1257.83,-396.6 1257.83,-390.6 1257.83,-390.6 1257.83,-351 1257.83,-351 1257.83,-345 1263.83,-339 1269.83,-339 1269.83,-339 1311.67,-339 1311.67,-339 1317.67,-339 1323.67,-345 1323.67,-351 1323.67,-351 1323.67,-390.6 1323.67,-390.6 1323.67,-396.6 1317.67,-402.6 1311.67,-402.6\"/>\n<text text-anchor=\"start\" x=\"1268.62\" y=\"-379.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">y_train</text>\n<text text-anchor=\"start\" x=\"1271.62\" y=\"-351.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- train_test_split_func&#45;&gt;y_train -->\n<g id=\"edge20\" class=\"edge\">\n<title>train_test_split_func&#45;&gt;y_train</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1163.01,-345.18C1191.38,-350.92 1222.57,-357.22 1246.91,-362.14\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1246.09,-365.75 1256.58,-364.3 1247.47,-358.89 1246.09,-365.75\"/>\n</g>\n<!-- y_test -->\n<g id=\"node29\" class=\"node\">\n<title>y_test</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1309.05,-320.6C1309.05,-320.6 1272.45,-320.6 1272.45,-320.6 1266.45,-320.6 1260.45,-314.6 1260.45,-308.6 1260.45,-308.6 1260.45,-269 1260.45,-269 1260.45,-263 1266.45,-257 1272.45,-257 1272.45,-257 1309.05,-257 1309.05,-257 1315.05,-257 1321.05,-263 1321.05,-269 1321.05,-269 1321.05,-308.6 1321.05,-308.6 1321.05,-314.6 1315.05,-320.6 1309.05,-320.6\"/>\n<text text-anchor=\"start\" x=\"1271.25\" y=\"-297.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">y_test</text>\n<text text-anchor=\"start\" x=\"1271.62\" y=\"-269.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- train_test_split_func&#45;&gt;y_test -->\n<g id=\"edge46\" class=\"edge\">\n<title>train_test_split_func&#45;&gt;y_test</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1163.01,-314.42C1192.36,-308.49 1224.72,-301.94 1249.4,-296.96\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1250.05,-300.19 1259.16,-294.78 1248.66,-293.33 1250.05,-300.19\"/>\n</g>\n<!-- y_train&#45;&gt;fit_random_forest -->\n<g id=\"edge19\" class=\"edge\">\n<title>y_train&#45;&gt;fit_random_forest</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1323.77,-370.8C1341.71,-370.8 1364.93,-370.8 1387.41,-370.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1387.36,-374.3 1397.36,-370.8 1387.36,-367.3 1387.36,-374.3\"/>\n</g>\n<!-- pclass&#45;&gt;training_set_v1 -->\n<g id=\"edge25\" class=\"edge\">\n<title>pclass&#45;&gt;training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M358.14,-102.99C432.64,-79.3 622.89,-32.02 730.5,-124.8 786.56,-173.13 713.12,-231.11 759.5,-288.8 767.4,-298.62 778.04,-306.12 789.44,-311.83\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"787.59,-315.28 798.14,-316.22 790.47,-308.91 787.59,-315.28\"/>\n</g>\n<!-- ticket_t -->\n<g id=\"node15\" class=\"node\">\n<title>ticket_t</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M501.02,-1000.6C501.02,-1000.6 456.17,-1000.6 456.17,-1000.6 450.17,-1000.6 444.17,-994.6 444.17,-988.6 444.17,-988.6 444.17,-949 444.17,-949 444.17,-943 450.17,-937 456.17,-937 456.17,-937 501.02,-937 501.02,-937 507.02,-937 513.02,-943 513.02,-949 513.02,-949 513.02,-988.6 513.02,-988.6 513.02,-994.6 507.02,-1000.6 501.02,-1000.6\"/>\n<text text-anchor=\"start\" x=\"454.97\" y=\"-977.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">ticket_t</text>\n<text text-anchor=\"start\" x=\"459.47\" y=\"-949.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- sex_encoder -->\n<g id=\"node16\" class=\"node\">\n<title>sex_encoder</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M519.02,-208.6C519.02,-208.6 438.17,-208.6 438.17,-208.6 432.17,-208.6 426.17,-202.6 426.17,-196.6 426.17,-196.6 426.17,-157 426.17,-157 426.17,-151 432.17,-145 438.17,-145 438.17,-145 519.02,-145 519.02,-145 525.02,-145 531.02,-151 531.02,-157 531.02,-157 531.02,-196.6 531.02,-196.6 531.02,-202.6 525.02,-208.6 519.02,-208.6\"/>\n<text text-anchor=\"start\" x=\"437.35\" y=\"-185.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">sex_encoder</text>\n<text text-anchor=\"start\" x=\"436.97\" y=\"-157.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">LabelEncoder</text>\n</g>\n<!-- sex_encoder&#45;&gt;encoders -->\n<g id=\"edge11\" class=\"edge\">\n<title>sex_encoder&#45;&gt;encoders</title>\n<path fill=\"none\" stroke=\"black\" d=\"M509.18,-208.96C522.26,-223.25 537.78,-240.31 551.65,-255.8 564.67,-270.35 565.44,-276.55 580.65,-288.8 588.12,-294.82 596.6,-300.44 605.09,-305.47\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"603.19,-308.99 613.61,-310.89 606.65,-302.9 603.19,-308.99\"/>\n</g>\n<!-- sex_category -->\n<g id=\"node22\" class=\"node\">\n<title>sex_category</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M697.5,-197.6C697.5,-197.6 613.65,-197.6 613.65,-197.6 607.65,-197.6 601.65,-191.6 601.65,-185.6 601.65,-185.6 601.65,-146 601.65,-146 601.65,-140 607.65,-134 613.65,-134 613.65,-134 697.5,-134 697.5,-134 703.5,-134 709.5,-140 709.5,-146 709.5,-146 709.5,-185.6 709.5,-185.6 709.5,-191.6 703.5,-197.6 697.5,-197.6\"/>\n<text text-anchor=\"start\" x=\"612.45\" y=\"-174.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">sex_category</text>\n<text text-anchor=\"start\" x=\"636.45\" y=\"-146.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- sex_encoder&#45;&gt;sex_category -->\n<g id=\"edge38\" class=\"edge\">\n<title>sex_encoder&#45;&gt;sex_category</title>\n<path fill=\"none\" stroke=\"black\" d=\"M531.15,-173.56C549.69,-172.39 570.84,-171.06 590.33,-169.84\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"590.51,-173.27 600.27,-169.15 590.07,-166.29 590.51,-173.27\"/>\n</g>\n<!-- training_set_v1&#45;&gt;train_test_split_func -->\n<g id=\"edge14\" class=\"edge\">\n<title>training_set_v1&#45;&gt;train_test_split_func</title>\n<path fill=\"none\" stroke=\"black\" d=\"M920.56,-329.8C944.27,-329.8 972.01,-329.8 997.88,-329.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"997.83,-333.3 1007.83,-329.8 997.83,-326.3 997.83,-333.3\"/>\n</g>\n<!-- ticket&#45;&gt;ticket_t -->\n<g id=\"edge23\" class=\"edge\">\n<title>ticket&#45;&gt;ticket_t</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.39,-968.8C378.14,-968.8 408.16,-968.8 432.87,-968.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"432.81,-972.3 442.81,-968.8 432.81,-965.3 432.81,-972.3\"/>\n</g>\n<!-- family&#45;&gt;training_set_v1 -->\n<g id=\"edge31\" class=\"edge\">\n<title>family&#45;&gt;training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M509.52,-620.42C545.28,-608.59 606.36,-586.42 654.57,-558.8\"/>\n</g>\n<!-- cabin&#45;&gt;cabin_t -->\n<g id=\"edge3\" class=\"edge\">\n<title>cabin&#45;&gt;cabin_t</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.13,-417.04C370.78,-428.04 388.92,-441.37 405.55,-452.8 414.91,-459.23 425.12,-465.93 434.82,-472.16\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"432.62,-475.54 442.93,-477.97 436.38,-469.64 432.62,-475.54\"/>\n</g>\n<!-- cabin_encoder -->\n<g id=\"node23\" class=\"node\">\n<title>cabin_encoder</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M525.02,-410.6C525.02,-410.6 432.17,-410.6 432.17,-410.6 426.17,-410.6 420.17,-404.6 420.17,-398.6 420.17,-398.6 420.17,-359 420.17,-359 420.17,-353 426.17,-347 432.17,-347 432.17,-347 525.02,-347 525.02,-347 531.02,-347 537.02,-353 537.02,-359 537.02,-359 537.02,-398.6 537.02,-398.6 537.02,-404.6 531.02,-410.6 525.02,-410.6\"/>\n<text text-anchor=\"start\" x=\"430.97\" y=\"-387.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">cabin_encoder</text>\n<text text-anchor=\"start\" x=\"436.97\" y=\"-359.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">LabelEncoder</text>\n</g>\n<!-- cabin&#45;&gt;cabin_encoder -->\n<g id=\"edge39\" class=\"edge\">\n<title>cabin&#45;&gt;cabin_encoder</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.39,-391.68C371.49,-390.07 390.57,-388.05 409.1,-386.08\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"409.34,-389.47 418.91,-384.93 408.6,-382.51 409.34,-389.47\"/>\n</g>\n<!-- cabin_category -->\n<g id=\"node26\" class=\"node\">\n<title>cabin_category</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M703.87,-443.6C703.87,-443.6 607.27,-443.6 607.27,-443.6 601.27,-443.6 595.27,-437.6 595.27,-431.6 595.27,-431.6 595.27,-392 595.27,-392 595.27,-386 601.27,-380 607.27,-380 607.27,-380 703.87,-380 703.87,-380 709.87,-380 715.87,-386 715.87,-392 715.87,-392 715.87,-431.6 715.87,-431.6 715.87,-437.6 709.87,-443.6 703.87,-443.6\"/>\n<text text-anchor=\"start\" x=\"606.07\" y=\"-420.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">cabin_category</text>\n<text text-anchor=\"start\" x=\"636.45\" y=\"-392.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- cabin&#45;&gt;cabin_category -->\n<g id=\"edge42\" class=\"edge\">\n<title>cabin&#45;&gt;cabin_category</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.32,-406.5C370.87,-411.69 388.86,-417.21 405.55,-419.8 465.28,-429.08 534.01,-425.84 584.11,-420.99\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"584.23,-424.39 593.82,-419.9 583.52,-417.43 584.23,-424.39\"/>\n</g>\n<!-- prefit_random_forest -->\n<g id=\"node21\" class=\"node\">\n<title>prefit_random_forest</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M1357.8,-156.6C1357.8,-156.6 1223.7,-156.6 1223.7,-156.6 1217.7,-156.6 1211.7,-150.6 1211.7,-144.6 1211.7,-144.6 1211.7,-105 1211.7,-105 1211.7,-99 1217.7,-93 1223.7,-93 1223.7,-93 1357.8,-93 1357.8,-93 1363.8,-93 1369.8,-99 1369.8,-105 1369.8,-105 1369.8,-144.6 1369.8,-144.6 1369.8,-150.6 1363.8,-156.6 1357.8,-156.6\"/>\n<text text-anchor=\"start\" x=\"1222.5\" y=\"-133.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">prefit_random_forest</text>\n<text text-anchor=\"start\" x=\"1246.88\" y=\"-105.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">ClassifierMixin</text>\n</g>\n<!-- prefit_random_forest&#45;&gt;fit_random_forest -->\n<g id=\"edge17\" class=\"edge\">\n<title>prefit_random_forest&#45;&gt;fit_random_forest</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1360.15,-157.07C1363.59,-159.8 1366.84,-162.7 1369.8,-165.8 1414.38,-212.36 1441.45,-283.31 1455.16,-328.35\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1452.01,-329.05 1458.2,-337.65 1458.72,-327.07 1452.01,-329.05\"/>\n</g>\n<!-- sex_category&#45;&gt;training_set_v1 -->\n<g id=\"edge29\" class=\"edge\">\n<title>sex_category&#45;&gt;training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M709.84,-189.03C717.49,-194.06 724.72,-199.97 730.5,-206.8 755.47,-236.31 732.88,-260.77 759.5,-288.8 767.84,-297.59 778.35,-304.55 789.38,-310.06\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"787.62,-313.56 798.17,-314.53 790.53,-307.19 787.62,-313.56\"/>\n</g>\n<!-- cabin_encoder&#45;&gt;encoders -->\n<g id=\"edge12\" class=\"edge\">\n<title>cabin_encoder&#45;&gt;encoders</title>\n<path fill=\"none\" stroke=\"black\" d=\"M537.38,-362.62C558.82,-356.61 582.97,-349.85 603.75,-344.03\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"604.59,-347.15 613.28,-341.08 602.71,-340.41 604.59,-347.15\"/>\n</g>\n<!-- cabin_encoder&#45;&gt;cabin_category -->\n<g id=\"edge43\" class=\"edge\">\n<title>cabin_encoder&#45;&gt;cabin_category</title>\n<path fill=\"none\" stroke=\"black\" d=\"M537.38,-389.7C552.42,-392.53 568.8,-395.62 584.4,-398.57\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"583.58,-402.16 594.06,-400.58 584.88,-395.28 583.58,-402.16\"/>\n</g>\n<!-- fare&#45;&gt;training_set_v1 -->\n<g id=\"edge27\" class=\"edge\">\n<title>fare&#45;&gt;training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.3,-556.54C386.76,-554.72 435.74,-553.27 477.6,-558.8\"/>\n</g>\n<!-- cabin_category&#45;&gt;training_set_v1 -->\n<g id=\"edge28\" class=\"edge\">\n<title>cabin_category&#45;&gt;training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M716.16,-387.64C739.09,-378.34 765.48,-367.64 789.28,-357.99\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"790.34,-360.93 798.3,-353.93 787.71,-354.44 790.34,-360.93\"/>\n</g>\n<!-- embarked&#45;&gt;embarked_category -->\n<g id=\"edge1\" class=\"edge\">\n<title>embarked&#45;&gt;embarked_category</title>\n<path fill=\"none\" stroke=\"black\" d=\"M369.21,-270.37C380.6,-264.49 393.2,-259.04 405.55,-255.8 458.84,-241.82 520.75,-239.77 569.5,-241.24\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"569.21,-244.77 579.33,-241.63 569.46,-237.77 569.21,-244.77\"/>\n</g>\n<!-- embarked&#45;&gt;embarked_encoder -->\n<g id=\"edge7\" class=\"edge\">\n<title>embarked&#45;&gt;embarked_encoder</title>\n<path fill=\"none\" stroke=\"black\" d=\"M369.35,-296.8C377.18,-296.8 385.6,-296.8 394.17,-296.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"394.06,-300.3 404.06,-296.8 394.06,-293.3 394.06,-300.3\"/>\n</g>\n<!-- age&#45;&gt;training_set_v1 -->\n<g id=\"edge26\" class=\"edge\">\n<title>age&#45;&gt;training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.03,-500.55C377.21,-517.8 402.68,-538.35 405.55,-539.8 435.12,-554.71 444.77,-554.47 477.6,-558.8\"/>\n<path fill=\"none\" stroke=\"black\" d=\"M479.6,-558.8C556.7,-568.98 587.1,-597.46 654.57,-558.8\"/>\n<path fill=\"none\" stroke=\"black\" d=\"M656.57,-558.8C736.68,-512.91 801.34,-423.57 834.52,-371.07\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"837.84,-373.37 840.16,-363.03 831.9,-369.66 837.84,-373.37\"/>\n</g>\n<!-- sex&#45;&gt;sex_encoder -->\n<g id=\"edge24\" class=\"edge\">\n<title>sex&#45;&gt;sex_encoder</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.39,-192.09C373.08,-189.99 394.64,-187.27 414.93,-184.71\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"415.23,-188.07 424.71,-183.35 414.35,-181.13 415.23,-188.07\"/>\n</g>\n<!-- sex&#45;&gt;sex_category -->\n<g id=\"edge37\" class=\"edge\">\n<title>sex&#45;&gt;sex_category</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.42,-171.86C376.24,-156 399.78,-137.75 405.55,-135.8 466.33,-115.24 539.64,-128.29 590.93,-143.07\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"589.62,-146.63 600.2,-146.13 591.62,-139.92 589.62,-146.63\"/>\n</g>\n<!-- sibsp&#45;&gt;family -->\n<g id=\"edge33\" class=\"edge\">\n<title>sibsp&#45;&gt;family</title>\n<path fill=\"none\" stroke=\"black\" d=\"M356.39,-704.65C379.8,-690.19 412.77,-669.83 438.42,-653.99\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"439.84,-656.62 446.51,-648.38 436.16,-650.66 439.84,-656.62\"/>\n</g>\n<!-- _target_inputs -->\n<g id=\"node33\" class=\"node\">\n<title>_target_inputs</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"246.45,-1073.1 136.6,-1073.1 136.6,-1028.5 246.45,-1028.5 246.45,-1073.1\"/>\n<text text-anchor=\"start\" x=\"151.4\" y=\"-1045\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">target_col</text>\n<text text-anchor=\"start\" x=\"216.65\" y=\"-1045\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n</g>\n<!-- _target_inputs&#45;&gt;target -->\n<g id=\"edge5\" class=\"edge\">\n<title>_target_inputs&#45;&gt;target</title>\n<path fill=\"none\" stroke=\"black\" d=\"M246.79,-1050.8C259.46,-1050.8 272.74,-1050.8 284.68,-1050.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"284.65,-1054.3 294.65,-1050.8 284.65,-1047.3 284.65,-1054.3\"/>\n</g>\n<!-- _titanic_data_inputs -->\n<g id=\"node34\" class=\"node\">\n<title>_titanic_data_inputs</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"107.6,-591.6 0,-591.6 0,-526 107.6,-526 107.6,-591.6\"/>\n<text text-anchor=\"start\" x=\"20.05\" y=\"-563.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">location</text>\n<text text-anchor=\"start\" x=\"77.8\" y=\"-563.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n<text text-anchor=\"start\" x=\"14.8\" y=\"-542.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">index_col</text>\n<text text-anchor=\"start\" x=\"77.8\" y=\"-542.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n</g>\n<!-- _titanic_data_inputs&#45;&gt;titanic_data -->\n<g id=\"edge6\" class=\"edge\">\n<title>_titanic_data_inputs&#45;&gt;titanic_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M108.05,-558.8C116.02,-558.8 124.26,-558.8 132.33,-558.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"132.16,-562.3 142.16,-558.8 132.16,-555.3 132.16,-562.3\"/>\n</g>\n<!-- _train_test_split_func_inputs -->\n<g id=\"node35\" class=\"node\">\n<title>_train_test_split_func_inputs</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"960.1,-279.6 759.5,-279.6 759.5,-214 960.1,-214 960.1,-279.6\"/>\n<text text-anchor=\"start\" x=\"802.67\" y=\"-251.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">random_state</text>\n<text text-anchor=\"start\" x=\"925.67\" y=\"-251.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">int</text>\n<text text-anchor=\"start\" x=\"774.17\" y=\"-230.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">validation_size_fraction</text>\n<text text-anchor=\"start\" x=\"920.05\" y=\"-230.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">float</text>\n</g>\n<!-- _train_test_split_func_inputs&#45;&gt;train_test_split_func -->\n<g id=\"edge16\" class=\"edge\">\n<title>_train_test_split_func_inputs&#45;&gt;train_test_split_func</title>\n<path fill=\"none\" stroke=\"black\" d=\"M950.66,-280.08C966.44,-285.93 982.84,-292 998.52,-297.81\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"997.12,-301.39 1007.71,-301.58 999.55,-294.83 997.12,-301.39\"/>\n</g>\n<!-- _prefit_random_forest_inputs -->\n<g id=\"node36\" class=\"node\">\n<title>_prefit_random_forest_inputs</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"1182.7,-157.6 989.1,-157.6 989.1,-92 1182.7,-92 1182.7,-157.6\"/>\n<text text-anchor=\"start\" x=\"1003.77\" y=\"-129.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">random_state</text>\n<text text-anchor=\"start\" x=\"1123.27\" y=\"-129.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">int</text>\n<text text-anchor=\"start\" x=\"1011.65\" y=\"-108.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">max_depth</text>\n<text text-anchor=\"start\" x=\"1092.9\" y=\"-108.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">typing.Union</text>\n</g>\n<!-- _prefit_random_forest_inputs&#45;&gt;prefit_random_forest -->\n<g id=\"edge36\" class=\"edge\">\n<title>_prefit_random_forest_inputs&#45;&gt;prefit_random_forest</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1182.98,-124.8C1188.93,-124.8 1194.91,-124.8 1200.83,-124.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1200.4,-128.3 1210.4,-124.8 1200.4,-121.3 1200.4,-128.3\"/>\n</g>\n<!-- input -->\n<g id=\"node37\" class=\"node\">\n<title>input</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"80.8,-701.1 26.8,-701.1 26.8,-664.5 80.8,-664.5 80.8,-701.1\"/>\n<text text-anchor=\"middle\" x=\"53.8\" y=\"-677\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n</g>\n<!-- function -->\n<g id=\"node38\" class=\"node\">\n<title>function</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M76.22,-646.1C76.22,-646.1 31.37,-646.1 31.37,-646.1 25.37,-646.1 19.37,-640.1 19.37,-634.1 19.37,-634.1 19.37,-621.5 19.37,-621.5 19.37,-615.5 25.37,-609.5 31.37,-609.5 31.37,-609.5 76.22,-609.5 76.22,-609.5 82.22,-609.5 88.22,-615.5 88.22,-621.5 88.22,-621.5 88.22,-634.1 88.22,-634.1 88.22,-640.1 82.22,-646.1 76.22,-646.1\"/>\n<text text-anchor=\"middle\" x=\"53.8\" y=\"-622\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n</g>\n</g>\n</svg>\n",
"text/plain": "<graphviz.graphs.Digraph at 0x13f057fa0>"
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dr.display_all_functions() # pass in a path if you want to save the image."
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-07T23:50:21.143368Z",
"start_time": "2023-11-07T23:50:20.003Z"
}
}
},
{
"cell_type": "markdown",
"source": [
"# Visualize how something is built: what sequence of operations produced this data/model?\n",
"E.g. how do the feature encoders get computed and what flows into them?"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 4,
"outputs": [
{
"data": {
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 8.0.5 (20230430.1635)\n -->\n<!-- Pages: 1 -->\n<svg width=\"642pt\" height=\"358pt\"\n viewBox=\"0.00 0.00 642.00 357.80\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 353.8)\">\n<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-353.8 638,-353.8 638,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster__legend</title>\n<polygon fill=\"none\" stroke=\"black\" points=\"11.38,-156.8 11.38,-341.8 96.22,-341.8 96.22,-156.8 11.38,-156.8\"/>\n<text text-anchor=\"middle\" x=\"53.8\" y=\"-324.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n</g>\n<!-- embarked -->\n<g id=\"node1\" class=\"node\">\n<title>embarked</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M336.3,-227.6C336.3,-227.6 274.2,-227.6 274.2,-227.6 268.2,-227.6 262.2,-221.6 262.2,-215.6 262.2,-215.6 262.2,-176 262.2,-176 262.2,-170 268.2,-164 274.2,-164 274.2,-164 336.3,-164 336.3,-164 342.3,-164 348.3,-170 348.3,-176 348.3,-176 348.3,-215.6 348.3,-215.6 348.3,-221.6 342.3,-227.6 336.3,-227.6\"/>\n<text text-anchor=\"start\" x=\"273\" y=\"-204.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">embarked</text>\n<text text-anchor=\"start\" x=\"286.12\" y=\"-176.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- embarked_encoder -->\n<g id=\"node3\" class=\"node\">\n<title>embarked_encoder</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M511.4,-227.6C511.4,-227.6 389.3,-227.6 389.3,-227.6 383.3,-227.6 377.3,-221.6 377.3,-215.6 377.3,-215.6 377.3,-176 377.3,-176 377.3,-170 383.3,-164 389.3,-164 389.3,-164 511.4,-164 511.4,-164 517.4,-164 523.4,-170 523.4,-176 523.4,-176 523.4,-215.6 523.4,-215.6 523.4,-221.6 517.4,-227.6 511.4,-227.6\"/>\n<text text-anchor=\"start\" x=\"388.1\" y=\"-204.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">embarked_encoder</text>\n<text text-anchor=\"start\" x=\"408.72\" y=\"-176.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">LabelEncoder</text>\n</g>\n<!-- embarked&#45;&gt;embarked_encoder -->\n<g id=\"edge3\" class=\"edge\">\n<title>embarked&#45;&gt;embarked_encoder</title>\n<path fill=\"none\" stroke=\"black\" d=\"M348.8,-195.8C354.36,-195.8 360.2,-195.8 366.15,-195.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"365.95,-199.3 375.95,-195.8 365.95,-192.3 365.95,-199.3\"/>\n</g>\n<!-- titanic_data -->\n<g id=\"node2\" class=\"node\">\n<title>titanic_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M221.2,-145.6C221.2,-145.6 148.6,-145.6 148.6,-145.6 142.6,-145.6 136.6,-139.6 136.6,-133.6 136.6,-133.6 136.6,-94 136.6,-94 136.6,-88 142.6,-82 148.6,-82 148.6,-82 221.2,-82 221.2,-82 227.2,-82 233.2,-88 233.2,-94 233.2,-94 233.2,-133.6 233.2,-133.6 233.2,-139.6 227.2,-145.6 221.2,-145.6\"/>\n<text text-anchor=\"start\" x=\"147.4\" y=\"-122.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">titanic_data</text>\n<text text-anchor=\"start\" x=\"150.77\" y=\"-94.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- titanic_data&#45;&gt;embarked -->\n<g id=\"edge1\" class=\"edge\">\n<title>titanic_data&#45;&gt;embarked</title>\n<path fill=\"none\" stroke=\"black\" d=\"M232.34,-145.97C239.17,-150.71 246.23,-155.6 253.15,-160.4\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"250.59,-163.57 260.8,-166.39 254.58,-157.82 250.59,-163.57\"/>\n</g>\n<!-- cabin -->\n<g id=\"node5\" class=\"node\">\n<title>cabin</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M323.17,-145.6C323.17,-145.6 287.32,-145.6 287.32,-145.6 281.32,-145.6 275.32,-139.6 275.32,-133.6 275.32,-133.6 275.32,-94 275.32,-94 275.32,-88 281.32,-82 287.32,-82 287.32,-82 323.17,-82 323.17,-82 329.17,-82 335.17,-88 335.17,-94 335.17,-94 335.17,-133.6 335.17,-133.6 335.17,-139.6 329.17,-145.6 323.17,-145.6\"/>\n<text text-anchor=\"start\" x=\"287.62\" y=\"-122.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">cabin</text>\n<text text-anchor=\"start\" x=\"286.12\" y=\"-94.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data&#45;&gt;cabin -->\n<g id=\"edge5\" class=\"edge\">\n<title>titanic_data&#45;&gt;cabin</title>\n<path fill=\"none\" stroke=\"black\" d=\"M233.7,-113.8C243.86,-113.8 254.48,-113.8 264.32,-113.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"264.01,-117.3 274.01,-113.8 264.01,-110.3 264.01,-117.3\"/>\n</g>\n<!-- sex -->\n<g id=\"node6\" class=\"node\">\n<title>sex</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M323.17,-63.6C323.17,-63.6 287.32,-63.6 287.32,-63.6 281.32,-63.6 275.32,-57.6 275.32,-51.6 275.32,-51.6 275.32,-12 275.32,-12 275.32,-6 281.32,0 287.32,0 287.32,0 323.17,0 323.17,0 329.17,0 335.17,-6 335.17,-12 335.17,-12 335.17,-51.6 335.17,-51.6 335.17,-57.6 329.17,-63.6 323.17,-63.6\"/>\n<text text-anchor=\"start\" x=\"294\" y=\"-40.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">sex</text>\n<text text-anchor=\"start\" x=\"286.12\" y=\"-12.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data&#45;&gt;sex -->\n<g id=\"edge6\" class=\"edge\">\n<title>titanic_data&#45;&gt;sex</title>\n<path fill=\"none\" stroke=\"black\" d=\"M232.34,-81.63C243.57,-73.84 255.44,-65.62 266.23,-58.14\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"267.78,-60.63 274.01,-52.06 263.79,-54.88 267.78,-60.63\"/>\n</g>\n<!-- encoders -->\n<g id=\"node8\" class=\"node\">\n<title>encoders</title>\n<path fill=\"#ffc857\" stroke=\"black\" d=\"M622,-145.6C622,-145.6 564.4,-145.6 564.4,-145.6 558.4,-145.6 552.4,-139.6 552.4,-133.6 552.4,-133.6 552.4,-94 552.4,-94 552.4,-88 558.4,-82 564.4,-82 564.4,-82 622,-82 622,-82 628,-82 634,-88 634,-94 634,-94 634,-133.6 634,-133.6 634,-139.6 628,-145.6 622,-145.6\"/>\n<text text-anchor=\"start\" x=\"563.2\" y=\"-122.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">encoders</text>\n<text text-anchor=\"start\" x=\"582.7\" y=\"-94.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">dict</text>\n</g>\n<!-- embarked_encoder&#45;&gt;encoders -->\n<g id=\"edge10\" class=\"edge\">\n<title>embarked_encoder&#45;&gt;encoders</title>\n<path fill=\"none\" stroke=\"black\" d=\"M508.05,-163.68C513.25,-160.69 518.43,-157.7 523.4,-154.8 529.7,-151.13 536.3,-147.25 542.82,-143.39\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"544.45,-145.9 551.26,-137.78 540.88,-139.88 544.45,-145.9\"/>\n</g>\n<!-- cabin_encoder -->\n<g id=\"node4\" class=\"node\">\n<title>cabin_encoder</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M496.77,-145.6C496.77,-145.6 403.92,-145.6 403.92,-145.6 397.92,-145.6 391.92,-139.6 391.92,-133.6 391.92,-133.6 391.92,-94 391.92,-94 391.92,-88 397.92,-82 403.92,-82 403.92,-82 496.77,-82 496.77,-82 502.77,-82 508.77,-88 508.77,-94 508.77,-94 508.77,-133.6 508.77,-133.6 508.77,-139.6 502.77,-145.6 496.77,-145.6\"/>\n<text text-anchor=\"start\" x=\"402.72\" y=\"-122.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">cabin_encoder</text>\n<text text-anchor=\"start\" x=\"408.72\" y=\"-94.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">LabelEncoder</text>\n</g>\n<!-- cabin_encoder&#45;&gt;encoders -->\n<g id=\"edge9\" class=\"edge\">\n<title>cabin_encoder&#45;&gt;encoders</title>\n<path fill=\"none\" stroke=\"black\" d=\"M509.05,-113.8C519.65,-113.8 530.64,-113.8 541.04,-113.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"541.03,-117.3 551.03,-113.8 541.03,-110.3 541.03,-117.3\"/>\n</g>\n<!-- cabin&#45;&gt;cabin_encoder -->\n<g id=\"edge4\" class=\"edge\">\n<title>cabin&#45;&gt;cabin_encoder</title>\n<path fill=\"none\" stroke=\"black\" d=\"M335.55,-113.8C348.71,-113.8 364.83,-113.8 380.76,-113.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"380.71,-117.3 390.71,-113.8 380.71,-110.3 380.71,-117.3\"/>\n</g>\n<!-- sex_encoder -->\n<g id=\"node7\" class=\"node\">\n<title>sex_encoder</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M490.77,-63.6C490.77,-63.6 409.92,-63.6 409.92,-63.6 403.92,-63.6 397.92,-57.6 397.92,-51.6 397.92,-51.6 397.92,-12 397.92,-12 397.92,-6 403.92,0 409.92,0 409.92,0 490.77,0 490.77,0 496.77,0 502.77,-6 502.77,-12 502.77,-12 502.77,-51.6 502.77,-51.6 502.77,-57.6 496.77,-63.6 490.77,-63.6\"/>\n<text text-anchor=\"start\" x=\"409.1\" y=\"-40.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">sex_encoder</text>\n<text text-anchor=\"start\" x=\"408.72\" y=\"-12.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">LabelEncoder</text>\n</g>\n<!-- sex&#45;&gt;sex_encoder -->\n<g id=\"edge7\" class=\"edge\">\n<title>sex&#45;&gt;sex_encoder</title>\n<path fill=\"none\" stroke=\"black\" d=\"M335.55,-31.8C350.32,-31.8 368.82,-31.8 386.58,-31.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"386.51,-35.3 396.51,-31.8 386.51,-28.3 386.51,-35.3\"/>\n</g>\n<!-- sex_encoder&#45;&gt;encoders -->\n<g id=\"edge8\" class=\"edge\">\n<title>sex_encoder&#45;&gt;encoders</title>\n<path fill=\"none\" stroke=\"black\" d=\"M502.98,-61.01C509.86,-64.95 516.81,-68.96 523.4,-72.8 529.7,-76.47 536.3,-80.35 542.82,-84.21\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"540.88,-87.72 551.26,-89.82 544.45,-81.7 540.88,-87.72\"/>\n</g>\n<!-- _titanic_data_inputs -->\n<g id=\"node9\" class=\"node\">\n<title>_titanic_data_inputs</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"107.6,-146.6 0,-146.6 0,-81 107.6,-81 107.6,-146.6\"/>\n<text text-anchor=\"start\" x=\"20.05\" y=\"-118.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">location</text>\n<text text-anchor=\"start\" x=\"77.8\" y=\"-118.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n<text text-anchor=\"start\" x=\"14.8\" y=\"-97.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">index_col</text>\n<text text-anchor=\"start\" x=\"77.8\" y=\"-97.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">str</text>\n</g>\n<!-- _titanic_data_inputs&#45;&gt;titanic_data -->\n<g id=\"edge2\" class=\"edge\">\n<title>_titanic_data_inputs&#45;&gt;titanic_data</title>\n<path fill=\"none\" stroke=\"black\" d=\"M108.06,-113.8C113.73,-113.8 119.52,-113.8 125.25,-113.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"125.14,-117.3 135.14,-113.8 125.14,-110.3 125.14,-117.3\"/>\n</g>\n<!-- input -->\n<g id=\"node10\" class=\"node\">\n<title>input</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"80.8,-311.1 26.8,-311.1 26.8,-274.5 80.8,-274.5 80.8,-311.1\"/>\n<text text-anchor=\"middle\" x=\"53.8\" y=\"-287\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n</g>\n<!-- function -->\n<g id=\"node11\" class=\"node\">\n<title>function</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M76.22,-256.1C76.22,-256.1 31.37,-256.1 31.37,-256.1 25.37,-256.1 19.37,-250.1 19.37,-244.1 19.37,-244.1 19.37,-231.5 19.37,-231.5 19.37,-225.5 25.37,-219.5 31.37,-219.5 31.37,-219.5 76.22,-219.5 76.22,-219.5 82.22,-219.5 88.22,-225.5 88.22,-231.5 88.22,-231.5 88.22,-244.1 88.22,-244.1 88.22,-250.1 82.22,-256.1 76.22,-256.1\"/>\n<text text-anchor=\"middle\" x=\"53.8\" y=\"-232\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n</g>\n<!-- output -->\n<g id=\"node12\" class=\"node\">\n<title>output</title>\n<path fill=\"#ffc857\" stroke=\"black\" d=\"M71.35,-201.1C71.35,-201.1 36.25,-201.1 36.25,-201.1 30.25,-201.1 24.25,-195.1 24.25,-189.1 24.25,-189.1 24.25,-176.5 24.25,-176.5 24.25,-170.5 30.25,-164.5 36.25,-164.5 36.25,-164.5 71.35,-164.5 71.35,-164.5 77.35,-164.5 83.35,-170.5 83.35,-176.5 83.35,-176.5 83.35,-189.1 83.35,-189.1 83.35,-195.1 77.35,-201.1 71.35,-201.1\"/>\n<text text-anchor=\"middle\" x=\"53.8\" y=\"-177\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">output</text>\n</g>\n</g>\n</svg>\n",
"text/plain": "<graphviz.graphs.Digraph at 0x13f066ca0>"
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"inputs = {\n",
" \"location\": \"data/train.csv\",\n",
" \"index_col\": \"passengerid\",\n",
" \"target_col\": \"survived\",\n",
" \"random_state\": 42,\n",
" \"max_depth\": None,\n",
" \"validation_size_fraction\": 0.33,\n",
"}\n",
"dr.visualize_execution(\n",
" [features.encoders], None, {}, inputs=inputs # pass in a path if you want to save the image.\n",
")"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-07T23:50:21.680500Z",
"start_time": "2023-11-07T23:50:21.012804Z"
}
}
},
{
"cell_type": "markdown",
"source": [
"# Understand the upstream lineage of a particular output: Whose/What data sources led to this artifact/model?\n",
"E.g. There is something funky with the Random Forest model and we want to double-check\n",
"for the current production model, what the data sources are and who owns them,\n",
"so we can go ping them"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 5,
"outputs": [
{
"data": {
"text/plain": "[{'team': 'data-engineering',\n 'function': 'titanic_data',\n 'source': 'prod.titantic'}]"
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"upstream_nodes = dr.what_is_upstream_of(\"fit_random_forest\")\n",
"teams = []\n",
"# iterate through\n",
"for node in upstream_nodes:\n",
" # filter to nodes that we're interested in getting information about\n",
" if node.tags.get(\"source\"):\n",
" # append for output\n",
" teams.append({\n",
" \"team\": node.tags.get(\"owner\"),\n",
" \"function\": node.name,\n",
" \"source\": node.tags.get(\"source\"),\n",
" })\n",
"teams"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-07T23:50:21.684005Z",
"start_time": "2023-11-07T23:50:21.612361Z"
}
}
},
{
"cell_type": "markdown",
"source": [
"# Understand the downstream lineage of a particular output: Who/What is downstream of this transform?\n",
"E.g. Say we're on data engineering and want to change the source data. How could we determine\n",
"what the artifacts that use this data are and who owns them?"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 6,
"outputs": [
{
"data": {
"text/plain": "[{'team': 'data-science',\n 'function': 'fit_random_forest',\n 'artifact': 'model'},\n {'team': 'data-science',\n 'function': 'training_set_v1',\n 'artifact': 'training_set'},\n {'team': 'data-science', 'function': 'encoders', 'artifact': 'encoders'}]"
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"downstream_nodes = dr.what_is_downstream_of(\"titanic_data\")\n",
"artifacts = []\n",
"for node in downstream_nodes:\n",
" # if it's an artifact function\n",
" if node.tags.get(\"artifact\"):\n",
" # pull out the information we want\n",
" artifacts.append({\n",
" \"team\": node.tags.get(\"owner\"),\n",
" \"function\": node.name,\n",
" \"artifact\": node.tags.get(\"artifact\"),\n",
" })\n",
"artifacts"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-07T23:50:21.735886Z",
"start_time": "2023-11-07T23:50:21.625900Z"
}
}
},
{
"cell_type": "markdown",
"source": [
"# More advanced queries: What is defined as PII data, and what does it end up in?\n",
"E.g. Let's say our compliance team has come to us to understand how we're using PII data,\n",
"i.e. what artifacts does it end up in? They want this report every month"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 7,
"outputs": [
{
"data": {
"text/plain": "{'sex': [{'team': 'data-science',\n 'function': 'training_set_v1',\n 'artifact': 'training_set'},\n {'team': 'data-science',\n 'function': 'fit_random_forest',\n 'artifact': 'model'},\n {'team': 'data-science', 'function': 'encoders', 'artifact': 'encoders'}],\n 'age': [{'team': 'data-science',\n 'function': 'training_set_v1',\n 'artifact': 'training_set'},\n {'team': 'data-science',\n 'function': 'fit_random_forest',\n 'artifact': 'model'}]}"
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pii_nodes = [n for n in dr.list_available_variables()\n",
" if n.tags.get(\"PII\") == \"true\"]\n",
"pii_to_artifacts = {}\n",
"# loop through each PII node\n",
"for node in pii_nodes:\n",
" pii_to_artifacts[node.name] = []\n",
" # ask what is downstream\n",
" downstream_nodes = dr.what_is_downstream_of(node.name)\n",
" for dwn_node in downstream_nodes:\n",
" # Filter to nodes of interest\n",
" if dwn_node.tags.get(\"artifact\"):\n",
" # pull out information\n",
" pii_to_artifacts[node.name].append({\n",
" \"team\": dwn_node.tags.get(\"owner\"),\n",
" \"function\": dwn_node.name,\n",
" \"artifact\": dwn_node.tags.get(\"artifact\"),\n",
" })\n",
"pii_to_artifacts"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-07T23:50:21.871125Z",
"start_time": "2023-11-07T23:50:21.704409Z"
}
}
},
{
"cell_type": "markdown",
"source": [
"# Understanding how two things connect more deeply: What is in between this data source and this model?\n",
"E.g. say we're new to the team and want to understand how the data flows from the data source to\n",
"the model. We can use the `what_is_the_path_between` function to understand the lineage between two nodes."
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 14,
"outputs": [
{
"data": {
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 8.0.5 (20230430.1635)\n -->\n<!-- Pages: 1 -->\n<svg width=\"1051pt\" height=\"646pt\"\n viewBox=\"0.00 0.00 1050.98 645.60\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 641.6)\">\n<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-641.6 1046.98,-641.6 1046.98,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster__legend</title>\n<polygon fill=\"none\" stroke=\"black\" points=\"8,-442.8 8,-572.8 92.85,-572.8 92.85,-442.8 8,-442.8\"/>\n<text text-anchor=\"middle\" x=\"50.42\" y=\"-555.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n</g>\n<!-- embarked_category -->\n<g id=\"node1\" class=\"node\">\n<title>embarked_category</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M265.58,-555.6C265.58,-555.6 139.73,-555.6 139.73,-555.6 133.73,-555.6 127.73,-549.6 127.73,-543.6 127.73,-543.6 127.73,-504 127.73,-504 127.73,-498 133.73,-492 139.73,-492 139.73,-492 265.58,-492 265.58,-492 271.58,-492 277.58,-498 277.58,-504 277.58,-504 277.58,-543.6 277.58,-543.6 277.58,-549.6 271.58,-555.6 265.58,-555.6\"/>\n<text text-anchor=\"start\" x=\"138.53\" y=\"-532.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">embarked_category</text>\n<text text-anchor=\"start\" x=\"183.53\" y=\"-504.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- training_set_v1 -->\n<g id=\"node3\" class=\"node\">\n<title>training_set_v1</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M455.18,-350.6C455.18,-350.6 358.58,-350.6 358.58,-350.6 352.58,-350.6 346.58,-344.6 346.58,-338.6 346.58,-338.6 346.58,-299 346.58,-299 346.58,-293 352.58,-287 358.58,-287 358.58,-287 455.18,-287 455.18,-287 461.18,-287 467.18,-293 467.18,-299 467.18,-299 467.18,-338.6 467.18,-338.6 467.18,-344.6 461.18,-350.6 455.18,-350.6\"/>\n<text text-anchor=\"start\" x=\"357.38\" y=\"-327.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">training_set_v1</text>\n<text text-anchor=\"start\" x=\"372.75\" y=\"-299.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- embarked_category&#45;&gt;training_set_v1 -->\n<g id=\"edge9\" class=\"edge\">\n<title>embarked_category&#45;&gt;training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M266.19,-491.57C270.17,-488.78 274,-485.85 277.58,-482.8 319.47,-447.03 356.95,-395.74 380.42,-360.07\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"383.86,-362.2 386.37,-351.91 377.99,-358.39 383.86,-362.2\"/>\n</g>\n<!-- train_test_split_func -->\n<g id=\"node2\" class=\"node\">\n<title>train_test_split_func</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M677.78,-350.6C677.78,-350.6 548.18,-350.6 548.18,-350.6 542.18,-350.6 536.18,-344.6 536.18,-338.6 536.18,-338.6 536.18,-299 536.18,-299 536.18,-293 542.18,-287 548.18,-287 548.18,-287 677.78,-287 677.78,-287 683.78,-287 689.78,-293 689.78,-299 689.78,-299 689.78,-338.6 689.78,-338.6 689.78,-344.6 683.78,-350.6 677.78,-350.6\"/>\n<text text-anchor=\"start\" x=\"546.98\" y=\"-327.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">train_test_split_func</text>\n<text text-anchor=\"start\" x=\"602.48\" y=\"-299.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">dict</text>\n</g>\n<!-- X_train -->\n<g id=\"node10\" class=\"node\">\n<title>X_train</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M830.75,-391.6C830.75,-391.6 764.9,-391.6 764.9,-391.6 758.9,-391.6 752.9,-385.6 752.9,-379.6 752.9,-379.6 752.9,-340 752.9,-340 752.9,-334 758.9,-328 764.9,-328 764.9,-328 830.75,-328 830.75,-328 836.75,-328 842.75,-334 842.75,-340 842.75,-340 842.75,-379.6 842.75,-379.6 842.75,-385.6 836.75,-391.6 830.75,-391.6\"/>\n<text text-anchor=\"start\" x=\"774.95\" y=\"-368.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">X_train</text>\n<text text-anchor=\"start\" x=\"763.7\" y=\"-340.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- train_test_split_func&#45;&gt;X_train -->\n<g id=\"edge16\" class=\"edge\">\n<title>train_test_split_func&#45;&gt;X_train</title>\n<path fill=\"none\" stroke=\"red\" d=\"M690.2,-335.89C707.64,-339.8 725.84,-343.88 742.17,-347.54\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"741.08,-351.11 751.6,-349.88 742.61,-344.28 741.08,-351.11\"/>\n</g>\n<!-- y_train -->\n<g id=\"node11\" class=\"node\">\n<title>y_train</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M818.75,-309.6C818.75,-309.6 776.9,-309.6 776.9,-309.6 770.9,-309.6 764.9,-303.6 764.9,-297.6 764.9,-297.6 764.9,-258 764.9,-258 764.9,-252 770.9,-246 776.9,-246 776.9,-246 818.75,-246 818.75,-246 824.75,-246 830.75,-252 830.75,-258 830.75,-258 830.75,-297.6 830.75,-297.6 830.75,-303.6 824.75,-309.6 818.75,-309.6\"/>\n<text text-anchor=\"start\" x=\"775.7\" y=\"-286.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">y_train</text>\n<text text-anchor=\"start\" x=\"778.7\" y=\"-258.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- train_test_split_func&#45;&gt;y_train -->\n<g id=\"edge17\" class=\"edge\">\n<title>train_test_split_func&#45;&gt;y_train</title>\n<path fill=\"none\" stroke=\"red\" d=\"M690.2,-301.71C711.99,-296.82 734.96,-291.67 753.98,-287.41\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"754.69,-290.61 763.68,-285.01 753.16,-283.78 754.69,-290.61\"/>\n</g>\n<!-- training_set_v1&#45;&gt;train_test_split_func -->\n<g id=\"edge1\" class=\"edge\">\n<title>training_set_v1&#45;&gt;train_test_split_func</title>\n<path fill=\"none\" stroke=\"red\" d=\"M467.46,-318.8C485.44,-318.8 505.57,-318.8 524.96,-318.8\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"524.95,-322.3 534.95,-318.8 524.95,-315.3 524.95,-322.3\"/>\n</g>\n<!-- cabin_category -->\n<g id=\"node4\" class=\"node\">\n<title>cabin_category</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M250.95,-227.6C250.95,-227.6 154.35,-227.6 154.35,-227.6 148.35,-227.6 142.35,-221.6 142.35,-215.6 142.35,-215.6 142.35,-176 142.35,-176 142.35,-170 148.35,-164 154.35,-164 154.35,-164 250.95,-164 250.95,-164 256.95,-164 262.95,-170 262.95,-176 262.95,-176 262.95,-215.6 262.95,-215.6 262.95,-221.6 256.95,-227.6 250.95,-227.6\"/>\n<text text-anchor=\"start\" x=\"153.15\" y=\"-204.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">cabin_category</text>\n<text text-anchor=\"start\" x=\"183.53\" y=\"-176.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- cabin_category&#45;&gt;training_set_v1 -->\n<g id=\"edge7\" class=\"edge\">\n<title>cabin_category&#45;&gt;training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M263.39,-225.74C268.39,-229.19 273.2,-232.88 277.58,-236.8 294.2,-251.7 289.13,-263.88 306.58,-277.8 315.51,-284.92 325.85,-290.99 336.41,-296.09\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"334.59,-299.56 345.15,-300.5 337.49,-293.18 334.59,-299.56\"/>\n</g>\n<!-- sex_category -->\n<g id=\"node5\" class=\"node\">\n<title>sex_category</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M244.58,-145.6C244.58,-145.6 160.73,-145.6 160.73,-145.6 154.73,-145.6 148.73,-139.6 148.73,-133.6 148.73,-133.6 148.73,-94 148.73,-94 148.73,-88 154.73,-82 160.73,-82 160.73,-82 244.58,-82 244.58,-82 250.58,-82 256.58,-88 256.58,-94 256.58,-94 256.58,-133.6 256.58,-133.6 256.58,-139.6 250.58,-145.6 244.58,-145.6\"/>\n<text text-anchor=\"start\" x=\"159.53\" y=\"-122.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">sex_category</text>\n<text text-anchor=\"start\" x=\"183.53\" y=\"-94.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- sex_category&#45;&gt;training_set_v1 -->\n<g id=\"edge8\" class=\"edge\">\n<title>sex_category&#45;&gt;training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M256.83,-136C264.68,-141.2 271.99,-147.43 277.58,-154.8 311.48,-199.58 270.16,-235.04 306.58,-277.8 314.57,-287.19 325.06,-294.47 336.23,-300.09\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"334.61,-303.66 345.16,-304.62 337.52,-297.29 334.61,-303.66\"/>\n</g>\n<!-- target -->\n<g id=\"node6\" class=\"node\">\n<title>target</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M220.58,-637.6C220.58,-637.6 184.73,-637.6 184.73,-637.6 178.73,-637.6 172.73,-631.6 172.73,-625.6 172.73,-625.6 172.73,-586 172.73,-586 172.73,-580 178.73,-574 184.73,-574 184.73,-574 220.58,-574 220.58,-574 226.58,-574 232.58,-580 232.58,-586 232.58,-586 232.58,-625.6 232.58,-625.6 232.58,-631.6 226.58,-637.6 220.58,-637.6\"/>\n<text text-anchor=\"start\" x=\"183.9\" y=\"-614.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">target</text>\n<text text-anchor=\"start\" x=\"183.53\" y=\"-586.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- target&#45;&gt;train_test_split_func -->\n<g id=\"edge2\" class=\"edge\">\n<title>target&#45;&gt;train_test_split_func</title>\n<path fill=\"none\" stroke=\"black\" d=\"M232.94,-590.67C246.72,-583.27 263.25,-573.99 277.58,-564.8 383.1,-497.12 499.6,-407.84 563.45,-357.56\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"565.05,-359.97 570.73,-351.03 560.71,-354.48 565.05,-359.97\"/>\n</g>\n<!-- fit_random_forest -->\n<g id=\"node7\" class=\"node\">\n<title>fit_random_forest</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M1030.98,-309.6C1030.98,-309.6 917.88,-309.6 917.88,-309.6 911.88,-309.6 905.88,-303.6 905.88,-297.6 905.88,-297.6 905.88,-258 905.88,-258 905.88,-252 911.88,-246 917.88,-246 917.88,-246 1030.98,-246 1030.98,-246 1036.98,-246 1042.98,-252 1042.98,-258 1042.98,-258 1042.98,-297.6 1042.98,-297.6 1042.98,-303.6 1036.98,-309.6 1030.98,-309.6\"/>\n<text text-anchor=\"start\" x=\"916.68\" y=\"-286.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">fit_random_forest</text>\n<text text-anchor=\"start\" x=\"930.55\" y=\"-258.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">ClassifierMixin</text>\n</g>\n<!-- titanic_data -->\n<g id=\"node8\" class=\"node\">\n<title>titanic_data</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M86.73,-432.6C86.73,-432.6 14.13,-432.6 14.13,-432.6 8.13,-432.6 2.13,-426.6 2.13,-420.6 2.13,-420.6 2.13,-381 2.13,-381 2.13,-375 8.13,-369 14.13,-369 14.13,-369 86.73,-369 86.73,-369 92.73,-369 98.73,-375 98.73,-381 98.73,-381 98.73,-420.6 98.73,-420.6 98.73,-426.6 92.73,-432.6 86.73,-432.6\"/>\n<text text-anchor=\"start\" x=\"12.93\" y=\"-409.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">titanic_data</text>\n<text text-anchor=\"start\" x=\"16.3\" y=\"-381.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- titanic_data&#45;&gt;target -->\n<g id=\"edge11\" class=\"edge\">\n<title>titanic_data&#45;&gt;target</title>\n<path fill=\"none\" stroke=\"black\" d=\"M94.65,-433.09C96.12,-434.93 97.49,-436.84 98.73,-438.8 129.25,-487.48 93.16,-518.89 127.73,-564.8 136.59,-576.57 149.87,-585.42 162.74,-591.82\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"160.88,-595.27 171.43,-596.25 163.79,-588.91 160.88,-595.27\"/>\n</g>\n<!-- age -->\n<g id=\"node9\" class=\"node\">\n<title>age</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M220.58,-473.6C220.58,-473.6 184.73,-473.6 184.73,-473.6 178.73,-473.6 172.73,-467.6 172.73,-461.6 172.73,-461.6 172.73,-422 172.73,-422 172.73,-416 178.73,-410 184.73,-410 184.73,-410 220.58,-410 220.58,-410 226.58,-410 232.58,-416 232.58,-422 232.58,-422 232.58,-461.6 232.58,-461.6 232.58,-467.6 226.58,-473.6 220.58,-473.6\"/>\n<text text-anchor=\"start\" x=\"191.03\" y=\"-450.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">age</text>\n<text text-anchor=\"start\" x=\"183.53\" y=\"-422.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data&#45;&gt;age -->\n<g id=\"edge15\" class=\"edge\">\n<title>titanic_data&#45;&gt;age</title>\n<path fill=\"none\" stroke=\"black\" d=\"M98.96,-413.77C119.17,-419.29 142.48,-425.65 161.78,-430.92\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"160.76,-434.54 171.33,-433.8 162.6,-427.79 160.76,-434.54\"/>\n</g>\n<!-- pclass -->\n<g id=\"node14\" class=\"node\">\n<title>pclass</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M222.45,-391.6C222.45,-391.6 182.85,-391.6 182.85,-391.6 176.85,-391.6 170.85,-385.6 170.85,-379.6 170.85,-379.6 170.85,-340 170.85,-340 170.85,-334 176.85,-328 182.85,-328 182.85,-328 222.45,-328 222.45,-328 228.45,-328 234.45,-334 234.45,-340 234.45,-340 234.45,-379.6 234.45,-379.6 234.45,-385.6 228.45,-391.6 222.45,-391.6\"/>\n<text text-anchor=\"start\" x=\"181.65\" y=\"-368.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">pclass</text>\n<text text-anchor=\"start\" x=\"183.53\" y=\"-340.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data&#45;&gt;pclass -->\n<g id=\"edge19\" class=\"edge\">\n<title>titanic_data&#45;&gt;pclass</title>\n<path fill=\"none\" stroke=\"black\" d=\"M98.96,-387.83C118.59,-382.47 141.15,-376.31 160.11,-371.14\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"160.8,-374.3 169.52,-368.3 158.95,-367.55 160.8,-374.3\"/>\n</g>\n<!-- fare -->\n<g id=\"node15\" class=\"node\">\n<title>fare</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M220.58,-309.6C220.58,-309.6 184.73,-309.6 184.73,-309.6 178.73,-309.6 172.73,-303.6 172.73,-297.6 172.73,-297.6 172.73,-258 172.73,-258 172.73,-252 178.73,-246 184.73,-246 184.73,-246 220.58,-246 220.58,-246 226.58,-246 232.58,-252 232.58,-258 232.58,-258 232.58,-297.6 232.58,-297.6 232.58,-303.6 226.58,-309.6 220.58,-309.6\"/>\n<text text-anchor=\"start\" x=\"190.28\" y=\"-286.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">fare</text>\n<text text-anchor=\"start\" x=\"183.53\" y=\"-258.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- titanic_data&#45;&gt;fare -->\n<g id=\"edge20\" class=\"edge\">\n<title>titanic_data&#45;&gt;fare</title>\n<path fill=\"none\" stroke=\"black\" d=\"M77.24,-368.54C91.13,-352.41 109.19,-333.3 127.73,-318.8 138.38,-310.46 150.95,-302.81 162.67,-296.44\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"164.03,-299.15 171.26,-291.4 160.78,-292.95 164.03,-299.15\"/>\n</g>\n<!-- age&#45;&gt;training_set_v1 -->\n<g id=\"edge5\" class=\"edge\">\n<title>age&#45;&gt;training_set_v1</title>\n<path fill=\"none\" stroke=\"red\" d=\"M233.03,-426.05C246.69,-418.59 263.08,-409.44 277.58,-400.8 300.82,-386.94 325.99,-371.06 347.83,-356.98\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"349.48,-359.44 355.97,-351.07 345.68,-353.56 349.48,-359.44\"/>\n</g>\n<!-- X_train&#45;&gt;fit_random_forest -->\n<g id=\"edge13\" class=\"edge\">\n<title>X_train&#45;&gt;fit_random_forest</title>\n<path fill=\"none\" stroke=\"red\" d=\"M842.88,-339.11C858.98,-331.55 877.71,-322.75 895.86,-314.23\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"897.01,-317.08 904.58,-309.66 894.04,-310.75 897.01,-317.08\"/>\n</g>\n<!-- y_train&#45;&gt;fit_random_forest -->\n<g id=\"edge14\" class=\"edge\">\n<title>y_train&#45;&gt;fit_random_forest</title>\n<path fill=\"none\" stroke=\"red\" d=\"M830.85,-277.8C848.79,-277.8 872.01,-277.8 894.49,-277.8\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"894.44,-281.3 904.44,-277.8 894.44,-274.3 894.44,-281.3\"/>\n</g>\n<!-- family -->\n<g id=\"node12\" class=\"node\">\n<title>family</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M221.33,-63.6C221.33,-63.6 183.98,-63.6 183.98,-63.6 177.98,-63.6 171.98,-57.6 171.98,-51.6 171.98,-51.6 171.98,-12 171.98,-12 171.98,-6 177.98,0 183.98,0 183.98,0 221.33,0 221.33,0 227.33,0 233.33,-6 233.33,-12 233.33,-12 233.33,-51.6 233.33,-51.6 233.33,-57.6 227.33,-63.6 221.33,-63.6\"/>\n<text text-anchor=\"start\" x=\"182.78\" y=\"-40.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">family</text>\n<text text-anchor=\"start\" x=\"183.53\" y=\"-12.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- family&#45;&gt;training_set_v1 -->\n<g id=\"edge10\" class=\"edge\">\n<title>family&#45;&gt;training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M233.75,-41.15C249.31,-47.56 267.18,-57.68 277.58,-72.8 329.72,-148.62 250.17,-205.1 306.58,-277.8 314.35,-287.82 324.99,-295.4 336.44,-301.15\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"334.64,-304.63 345.19,-305.55 337.52,-298.24 334.64,-304.63\"/>\n</g>\n<!-- prefit_random_forest -->\n<g id=\"node13\" class=\"node\">\n<title>prefit_random_forest</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M864.88,-227.6C864.88,-227.6 730.78,-227.6 730.78,-227.6 724.78,-227.6 718.78,-221.6 718.78,-215.6 718.78,-215.6 718.78,-176 718.78,-176 718.78,-170 724.78,-164 730.78,-164 730.78,-164 864.88,-164 864.88,-164 870.88,-164 876.88,-170 876.88,-176 876.88,-176 876.88,-215.6 876.88,-215.6 876.88,-221.6 870.88,-227.6 864.88,-227.6\"/>\n<text text-anchor=\"start\" x=\"729.58\" y=\"-204.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">prefit_random_forest</text>\n<text text-anchor=\"start\" x=\"753.95\" y=\"-176.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">ClassifierMixin</text>\n</g>\n<!-- prefit_random_forest&#45;&gt;fit_random_forest -->\n<g id=\"edge12\" class=\"edge\">\n<title>prefit_random_forest&#45;&gt;fit_random_forest</title>\n<path fill=\"none\" stroke=\"black\" d=\"M867.58,-228.09C876.86,-232.45 886.43,-236.94 895.85,-241.37\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"894.05,-244.86 904.59,-245.94 897.03,-238.52 894.05,-244.86\"/>\n</g>\n<!-- pclass&#45;&gt;training_set_v1 -->\n<g id=\"edge4\" class=\"edge\">\n<title>pclass&#45;&gt;training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M234.8,-353.48C261.61,-348.05 301.29,-340 335.69,-333.03\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"336.06,-336.32 345.17,-330.9 334.67,-329.46 336.06,-336.32\"/>\n</g>\n<!-- fare&#45;&gt;training_set_v1 -->\n<g id=\"edge6\" class=\"edge\">\n<title>fare&#45;&gt;training_set_v1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M233.06,-283.76C259.86,-289.2 300.51,-297.44 335.66,-304.56\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"334.87,-308.18 345.37,-306.74 336.26,-301.32 334.87,-308.18\"/>\n</g>\n<!-- _train_test_split_func_inputs -->\n<g id=\"node16\" class=\"node\">\n<title>_train_test_split_func_inputs</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"507.18,-268.6 306.58,-268.6 306.58,-203 507.18,-203 507.18,-268.6\"/>\n<text text-anchor=\"start\" x=\"349.75\" y=\"-240.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">random_state</text>\n<text text-anchor=\"start\" x=\"472.75\" y=\"-240.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">int</text>\n<text text-anchor=\"start\" x=\"321.25\" y=\"-219.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">validation_size_fraction</text>\n<text text-anchor=\"start\" x=\"467.13\" y=\"-219.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">float</text>\n</g>\n<!-- _train_test_split_func_inputs&#45;&gt;train_test_split_func -->\n<g id=\"edge3\" class=\"edge\">\n<title>_train_test_split_func_inputs&#45;&gt;train_test_split_func</title>\n<path fill=\"none\" stroke=\"black\" d=\"M489.71,-269.08C501.58,-273.91 513.83,-278.89 525.79,-283.75\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"524.16,-287.27 534.74,-287.8 526.8,-280.79 524.16,-287.27\"/>\n</g>\n<!-- _prefit_random_forest_inputs -->\n<g id=\"node17\" class=\"node\">\n<title>_prefit_random_forest_inputs</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"679.53,-218.1 546.43,-218.1 546.43,-173.5 679.53,-173.5 679.53,-218.1\"/>\n<text text-anchor=\"start\" x=\"561.23\" y=\"-190\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">random_state</text>\n<text text-anchor=\"start\" x=\"650.48\" y=\"-190\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">int</text>\n</g>\n<!-- _prefit_random_forest_inputs&#45;&gt;prefit_random_forest -->\n<g id=\"edge18\" class=\"edge\">\n<title>_prefit_random_forest_inputs&#45;&gt;prefit_random_forest</title>\n<path fill=\"none\" stroke=\"black\" d=\"M679.99,-195.8C688.96,-195.8 698.28,-195.8 707.58,-195.8\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"707.45,-199.3 717.45,-195.8 707.45,-192.3 707.45,-199.3\"/>\n</g>\n<!-- input -->\n<g id=\"node18\" class=\"node\">\n<title>input</title>\n<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"77.43,-542.1 23.43,-542.1 23.43,-505.5 77.43,-505.5 77.43,-542.1\"/>\n<text text-anchor=\"middle\" x=\"50.43\" y=\"-518\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">input</text>\n</g>\n<!-- function -->\n<g id=\"node19\" class=\"node\">\n<title>function</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M72.85,-487.1C72.85,-487.1 28,-487.1 28,-487.1 22,-487.1 16,-481.1 16,-475.1 16,-475.1 16,-462.5 16,-462.5 16,-456.5 22,-450.5 28,-450.5 28,-450.5 72.85,-450.5 72.85,-450.5 78.85,-450.5 84.85,-456.5 84.85,-462.5 84.85,-462.5 84.85,-475.1 84.85,-475.1 84.85,-481.1 78.85,-487.1 72.85,-487.1\"/>\n<text text-anchor=\"middle\" x=\"50.43\" y=\"-463\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n</g>\n</g>\n</svg>\n",
"text/plain": "<graphviz.graphs.Digraph at 0x13f6092e0>"
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Visualize a particular path:\n",
"dr.visualize_path_between(\"age\", \"fit_random_forest\") # pass in a path if you want to save the image.\n"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-07T23:52:35.938980Z",
"start_time": "2023-11-07T23:52:35.127130Z"
}
}
},
{
"cell_type": "code",
"execution_count": 9,
"outputs": [
{
"data": {
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 8.0.5 (20230430.1635)\n -->\n<!-- Pages: 1 -->\n<svg width=\"710pt\" height=\"206pt\"\n viewBox=\"0.00 0.00 710.00 205.80\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 201.8)\">\n<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-201.8 706,-201.8 706,4 -4,4\"/>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster__legend</title>\n<polygon fill=\"none\" stroke=\"black\" points=\"8,-114.8 8,-189.8 92.85,-189.8 92.85,-114.8 8,-114.8\"/>\n<text text-anchor=\"middle\" x=\"50.42\" y=\"-172.5\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">Legend</text>\n</g>\n<!-- train_test_split_func -->\n<g id=\"node1\" class=\"node\">\n<title>train_test_split_func</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M405.05,-104.6C405.05,-104.6 275.45,-104.6 275.45,-104.6 269.45,-104.6 263.45,-98.6 263.45,-92.6 263.45,-92.6 263.45,-53 263.45,-53 263.45,-47 269.45,-41 275.45,-41 275.45,-41 405.05,-41 405.05,-41 411.05,-41 417.05,-47 417.05,-53 417.05,-53 417.05,-92.6 417.05,-92.6 417.05,-98.6 411.05,-104.6 405.05,-104.6\"/>\n<text text-anchor=\"start\" x=\"274.25\" y=\"-81.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">train_test_split_func</text>\n<text text-anchor=\"start\" x=\"329.75\" y=\"-53.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">dict</text>\n</g>\n<!-- X_train -->\n<g id=\"node5\" class=\"node\">\n<title>X_train</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M523.9,-145.6C523.9,-145.6 458.05,-145.6 458.05,-145.6 452.05,-145.6 446.05,-139.6 446.05,-133.6 446.05,-133.6 446.05,-94 446.05,-94 446.05,-88 452.05,-82 458.05,-82 458.05,-82 523.9,-82 523.9,-82 529.9,-82 535.9,-88 535.9,-94 535.9,-94 535.9,-133.6 535.9,-133.6 535.9,-139.6 529.9,-145.6 523.9,-145.6\"/>\n<text text-anchor=\"start\" x=\"468.1\" y=\"-122.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">X_train</text>\n<text text-anchor=\"start\" x=\"456.85\" y=\"-94.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- train_test_split_func&#45;&gt;X_train -->\n<g id=\"edge5\" class=\"edge\">\n<title>train_test_split_func&#45;&gt;X_train</title>\n<path fill=\"none\" stroke=\"red\" d=\"M417.36,-93.78C423.43,-95.46 429.49,-97.13 435.37,-98.75\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"434.16,-102.32 444.73,-101.6 436.02,-95.57 434.16,-102.32\"/>\n</g>\n<!-- y_train -->\n<g id=\"node6\" class=\"node\">\n<title>y_train</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M511.9,-63.6C511.9,-63.6 470.05,-63.6 470.05,-63.6 464.05,-63.6 458.05,-57.6 458.05,-51.6 458.05,-51.6 458.05,-12 458.05,-12 458.05,-6 464.05,0 470.05,0 470.05,0 511.9,0 511.9,0 517.9,0 523.9,-6 523.9,-12 523.9,-12 523.9,-51.6 523.9,-51.6 523.9,-57.6 517.9,-63.6 511.9,-63.6\"/>\n<text text-anchor=\"start\" x=\"468.85\" y=\"-40.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">y_train</text>\n<text text-anchor=\"start\" x=\"471.85\" y=\"-12.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- train_test_split_func&#45;&gt;y_train -->\n<g id=\"edge6\" class=\"edge\">\n<title>train_test_split_func&#45;&gt;y_train</title>\n<path fill=\"none\" stroke=\"red\" d=\"M417.36,-51.82C427.74,-48.96 438.08,-46.11 447.56,-43.49\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"448.15,-46.68 456.86,-40.65 446.29,-39.94 448.15,-46.68\"/>\n</g>\n<!-- training_set_v1 -->\n<g id=\"node2\" class=\"node\">\n<title>training_set_v1</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M222.45,-104.6C222.45,-104.6 125.85,-104.6 125.85,-104.6 119.85,-104.6 113.85,-98.6 113.85,-92.6 113.85,-92.6 113.85,-53 113.85,-53 113.85,-47 119.85,-41 125.85,-41 125.85,-41 222.45,-41 222.45,-41 228.45,-41 234.45,-47 234.45,-53 234.45,-53 234.45,-92.6 234.45,-92.6 234.45,-98.6 228.45,-104.6 222.45,-104.6\"/>\n<text text-anchor=\"start\" x=\"124.65\" y=\"-81.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">training_set_v1</text>\n<text text-anchor=\"start\" x=\"140.02\" y=\"-53.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">DataFrame</text>\n</g>\n<!-- training_set_v1&#45;&gt;train_test_split_func -->\n<g id=\"edge1\" class=\"edge\">\n<title>training_set_v1&#45;&gt;train_test_split_func</title>\n<path fill=\"none\" stroke=\"red\" d=\"M234.86,-72.8C240.59,-72.8 246.47,-72.8 252.39,-72.8\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"252.05,-76.3 262.05,-72.8 252.05,-69.3 252.05,-76.3\"/>\n</g>\n<!-- fit_random_forest -->\n<g id=\"node3\" class=\"node\">\n<title>fit_random_forest</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M690,-104.6C690,-104.6 576.9,-104.6 576.9,-104.6 570.9,-104.6 564.9,-98.6 564.9,-92.6 564.9,-92.6 564.9,-53 564.9,-53 564.9,-47 570.9,-41 576.9,-41 576.9,-41 690,-41 690,-41 696,-41 702,-47 702,-53 702,-53 702,-92.6 702,-92.6 702,-98.6 696,-104.6 690,-104.6\"/>\n<text text-anchor=\"start\" x=\"575.7\" y=\"-81.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">fit_random_forest</text>\n<text text-anchor=\"start\" x=\"589.58\" y=\"-53.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">ClassifierMixin</text>\n</g>\n<!-- age -->\n<g id=\"node4\" class=\"node\">\n<title>age</title>\n<path fill=\"#b4d8e4\" stroke=\"red\" d=\"M68.35,-104.6C68.35,-104.6 32.5,-104.6 32.5,-104.6 26.5,-104.6 20.5,-98.6 20.5,-92.6 20.5,-92.6 20.5,-53 20.5,-53 20.5,-47 26.5,-41 32.5,-41 32.5,-41 68.35,-41 68.35,-41 74.35,-41 80.35,-47 80.35,-53 80.35,-53 80.35,-92.6 80.35,-92.6 80.35,-98.6 74.35,-104.6 68.35,-104.6\"/>\n<text text-anchor=\"start\" x=\"38.8\" y=\"-81.5\" font-family=\"Helvetica,sans-Serif\" font-weight=\"bold\" font-size=\"14.00\">age</text>\n<text text-anchor=\"start\" x=\"31.3\" y=\"-53.5\" font-family=\"Helvetica,sans-Serif\" font-style=\"italic\" font-size=\"14.00\">Series</text>\n</g>\n<!-- age&#45;&gt;training_set_v1 -->\n<g id=\"edge2\" class=\"edge\">\n<title>age&#45;&gt;training_set_v1</title>\n<path fill=\"none\" stroke=\"red\" d=\"M80.58,-72.8C87.45,-72.8 95.06,-72.8 102.89,-72.8\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"102.64,-76.3 112.64,-72.8 102.64,-69.3 102.64,-76.3\"/>\n</g>\n<!-- X_train&#45;&gt;fit_random_forest -->\n<g id=\"edge3\" class=\"edge\">\n<title>X_train&#45;&gt;fit_random_forest</title>\n<path fill=\"none\" stroke=\"red\" d=\"M536.04,-100.94C541.86,-99.24 547.97,-97.46 554.16,-95.65\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"555,-98.76 563.62,-92.6 553.04,-92.04 555,-98.76\"/>\n</g>\n<!-- y_train&#45;&gt;fit_random_forest -->\n<g id=\"edge4\" class=\"edge\">\n<title>y_train&#45;&gt;fit_random_forest</title>\n<path fill=\"none\" stroke=\"red\" d=\"M524.2,-41.21C533.25,-43.85 543.51,-46.84 554.02,-49.91\"/>\n<polygon fill=\"red\" stroke=\"red\" points=\"552.99,-53.55 563.57,-52.99 554.95,-46.83 552.99,-53.55\"/>\n</g>\n<!-- function -->\n<g id=\"node7\" class=\"node\">\n<title>function</title>\n<path fill=\"#b4d8e4\" stroke=\"black\" d=\"M72.85,-159.1C72.85,-159.1 28,-159.1 28,-159.1 22,-159.1 16,-153.1 16,-147.1 16,-147.1 16,-134.5 16,-134.5 16,-128.5 22,-122.5 28,-122.5 28,-122.5 72.85,-122.5 72.85,-122.5 78.85,-122.5 84.85,-128.5 84.85,-134.5 84.85,-134.5 84.85,-147.1 84.85,-147.1 84.85,-153.1 78.85,-159.1 72.85,-159.1\"/>\n<text text-anchor=\"middle\" x=\"50.42\" y=\"-135\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">function</text>\n</g>\n</g>\n</svg>\n",
"text/plain": "<graphviz.graphs.Digraph at 0x13f0667f0>"
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# If we want to just focus on the path we can do:\n",
"dr.visualize_path_between(\"age\", \"fit_random_forest\", strict_path_visualization=True) # pass in values if you want to save the image.\n"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-07T23:50:22.866435Z",
"start_time": "2023-11-07T23:50:22.394442Z"
}
}
},
{
"cell_type": "code",
"execution_count": 10,
"outputs": [
{
"data": {
"text/plain": "[Variable(name='train_test_split_func', type=typing.Dict[str, typing.Union[pandas.core.frame.DataFrame, pandas.core.series.Series]], tags={'module': 'model_pipeline'}, is_external_input=False, originating_functions=(<function train_test_split_func at 0x13f043040>,)),\n Variable(name='training_set_v1', type=<class 'pandas.core.frame.DataFrame'>, tags={'module': 'sets', 'owner': 'data-science', 'importance': 'production', 'artifact': 'training_set'}, is_external_input=False, originating_functions=(<function training_set_v1 at 0x13f043280>,)),\n Variable(name='fit_random_forest', type=<class 'sklearn.base.ClassifierMixin'>, tags={'module': 'model_pipeline', 'owner': 'data-science', 'importance': 'production', 'artifact': 'model'}, is_external_input=False, originating_functions=(<function fit_random_forest at 0x13f043160>,)),\n Variable(name='age', type=<class 'pandas.core.series.Series'>, tags={'module': 'data_loading', 'PII': 'true'}, is_external_input=False, originating_functions=(<function titanic_data at 0x12073f5e0>,)),\n Variable(name='X_train', type=<class 'pandas.core.frame.DataFrame'>, tags={'module': 'model_pipeline'}, is_external_input=False, originating_functions=(<function train_test_split_func at 0x13f043040>,)),\n Variable(name='y_train', type=<class 'pandas.core.series.Series'>, tags={'module': 'model_pipeline'}, is_external_input=False, originating_functions=(<function train_test_split_func at 0x13f043040>,))]"
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# to programmatically get the nodes in the path:\n",
"nodes_in_path = dr.what_is_the_path_between(\"age\", \"fit_random_forest\")\n",
"nodes_in_path"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-07T23:50:23.082151Z",
"start_time": "2023-11-07T23:50:23.009952Z"
}
}
},
{
"cell_type": "code",
"execution_count": 10,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-07T02:19:27.919791Z",
"start_time": "2023-11-07T02:19:27.882617Z"
}
}
},
{
"cell_type": "code",
"execution_count": 10,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-07T02:19:27.934486Z",
"start_time": "2023-11-07T02:19:27.890850Z"
}
}
},
{
"cell_type": "code",
"execution_count": 10,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-07T02:19:27.934943Z",
"start_time": "2023-11-07T02:19:27.898178Z"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}