blob: 0322611f5ad583b5167532fd52e083aff85c8bff [file] [log] [blame]
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Documentation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Sample"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import marvin_titanic_engine\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/vagrant/projects/titanic-engine/notebooks\n"
]
}
],
"source": [
"print(os.getcwd())"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"#from marvin_python_toolbox.common.data import MarvinData\n",
"import pandas as pd\n",
"\n",
"train_df = pd.read_csv(\n",
" 'marvin_titanic_engine/data_files/train.csv'\n",
")\n",
"test_df = pd.read_csv(\n",
" 'marvin_titanic_engine/data_files/train.csv'\n",
")\n",
"marvin_initial_dataset = {\n",
" 'train': train_df,\n",
" 'test': test_df\n",
"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Training Preparator"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Length: 1046\n"
]
}
],
"source": [
"from sklearn.model_selection import StratifiedShuffleSplit, train_test_split, cross_val_score, GridSearchCV\n",
"\n",
"pred_cols = ['Age', 'Pclass', 'Sex']\n",
"dep_var = 'Survived'\n",
"train_no_na = marvin_initial_dataset['train'][\n",
" pred_cols + [dep_var]\n",
"].dropna()\n",
"print(\"Length: {}\".format(len(train_no_na)))\n",
"\n",
"# Feature Engineering\n",
"data_X = train_no_na[pred_cols]\n",
"data_X.loc[:, 'Sex'] = data_X['Sex'].map({'male': 1, 'female': 0})\n",
"data_y = train_no_na[dep_var]\n",
"\n",
"# Prepare for Stratified Shuffle Split\n",
"sss = StratifiedShuffleSplit(n_splits=5, test_size=.6, random_state=0)\n",
"sss.get_n_splits(data_X, data_y)\n",
"for train_index, test_index in sss.split(data_X, data_y):\n",
" X_train, X_test = data_X.iloc[train_index], data_X.iloc[test_index]\n",
" y_train, y_test = data_y.iloc[train_index], data_y.iloc[test_index]\n",
"marvin_dataset = {\n",
" 'X_train': X_train,\n",
" 'y_train': y_train,\n",
" 'X_test': X_test,\n",
" 'y_test': y_test,\n",
" 'sss': sss\n",
"}\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Trainer"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('Best score for training data:', 0.76315789473684215)\n",
"('Best `C`:', 100)\n",
"('Best kernel:', 'rbf')\n",
"('Best `gamma`:', 0.001)\n"
]
}
],
"source": [
"from sklearn import svm, neighbors, tree\n",
"from sklearn.model_selection import StratifiedShuffleSplit, train_test_split, cross_val_score, GridSearchCV\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.preprocessing import StandardScaler, scale\n",
"from sklearn.linear_model import LogisticRegression\n",
"\n",
"\n",
"# Set the parameter candidates\n",
"parameter_candidates = [\n",
" {'C': [1, 10, 100], 'gamma': [0.01, 0.001], 'kernel': ['linear']},\n",
" {'C': [1, 10, 100], 'gamma': [0.01, 0.001], 'kernel': ['rbf']},\n",
"]\n",
"\n",
"# Create a classifier with the parameter candidates\n",
"svm_grid = GridSearchCV(estimator=svm.SVC(), param_grid=parameter_candidates, n_jobs=-1)\n",
"\n",
"# Train the classifier on training data\n",
"svm_grid.fit(\n",
" marvin_dataset['X_train'],\n",
" marvin_dataset['y_train']\n",
")\n",
"\n",
"# use a full grid over all parameters\n",
"parameter_candidates = {\n",
" \"max_depth\": [3, None],\n",
" \"random_state\": [0],\n",
" \"min_samples_split\": [2, 3, 10],\n",
" \"min_samples_leaf\": [1, 3, 10],\n",
" \"n_estimators\": [20, 50],\n",
" \"bootstrap\": [True, False],\n",
" \"criterion\": [\"gini\", \"entropy\"]\n",
"}\n",
"\n",
"#clf = RandomForestClassifier(n_estimators=20)\n",
"\n",
"# run grid search\n",
"rf_grid = GridSearchCV(estimator=RandomForestClassifier(), param_grid=parameter_candidates)\n",
"rf_grid.fit(\n",
" marvin_dataset['X_train'],\n",
" marvin_dataset['y_train']\n",
")\n",
"\n",
"marvin_model = {}\n",
"marvin_model['svm'] = svm_grid\n",
"marvin_model['rf'] = rf_grid\n"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model Type: rf\n",
"{'warm_start': False, 'oob_score': False, 'n_jobs': 1, 'verbose': 0, 'max_leaf_nodes': None, 'bootstrap': True, 'min_samples_leaf': 1, 'n_estimators': 20, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'criterion': 'entropy', 'random_state': 0, 'min_impurity_split': 1e-07, 'max_features': 'auto', 'max_depth': 3, 'class_weight': None}\n",
"Accuracy Score: 0.7703%\n",
"Classification Report:\n",
"\n",
" precision recall f1-score support\n",
"\n",
" 0.0 0.89 0.81 0.85 409\n",
" 1.0 0.70 0.82 0.75 219\n",
"\n",
"avg / total 0.83 0.81 0.82 628\n",
"\n",
"Confusion Matrix:\n",
"\n",
"[[332 77]\n",
" [ 40 179]]\n",
"\n",
"\n",
"\n",
"Model Type: svm\n",
"{'kernel': 'rbf', 'C': 100, 'verbose': False, 'probability': False, 'degree': 3, 'shrinking': True, 'max_iter': -1, 'decision_function_shape': None, 'random_state': None, 'tol': 0.001, 'cache_size': 200, 'coef0': 0.0, 'gamma': 0.001, 'class_weight': None}\n",
"Accuracy Score: 0.7632%\n",
"Classification Report:\n",
"\n",
" precision recall f1-score support\n",
"\n",
" 0.0 0.83 0.84 0.83 371\n",
" 1.0 0.76 0.76 0.76 257\n",
"\n",
"avg / total 0.80 0.80 0.80 628\n",
"\n",
"Confusion Matrix:\n",
"\n",
"[[310 61]\n",
" [ 62 195]]\n",
"\n",
"\n",
"\n"
]
}
],
"source": [
"from sklearn import metrics\n",
"for model_type, fitted_model in marvin_model.iteritems():\n",
" print(\"Model Type: {0}\\n{1}\".format(model_type, fitted_model.best_estimator_.get_params()))\n",
" print(\"Accuracy Score: {}%\".format(round(fitted_model.best_score_,4)))\n",
" # Print the classification report of `y_test` and `predicted`\n",
" print(\"Classification Report:\\n\")\n",
" print(metrics.classification_report(fitted_model.predict(marvin_dataset['X_test']), marvin_dataset['y_test']))\n",
"\n",
" # Print the confusion matrix\n",
" print(\"Confusion Matrix:\\n\")\n",
" print(metrics.confusion_matrix(fitted_model.predict(marvin_dataset['X_test']), marvin_dataset['y_test']))\n",
" print(\"\\n\\n\")"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature ranking:\n",
"1. feature sex (0.486342)\n",
"2. feature pclass (0.265882)\n",
"3. feature age (0.247776)\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAFUCAYAAADmhXKJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAF0NJREFUeJzt3Xu0pXdd3/HPl8mFW4RCpkAuJhFT2hHDbQjUIlLBkoAkqKCJRYWFjazVFKhWSFFTGkUBqVRtQIKAyMVwa2GQodFqsVULZsAIJCFlCOAkKAyQIPeQ5Ns/9jN0M57J7JDfzN7nzOu11lk5z+Xs/T0ne82853me8+zq7gAAcNvdbtkDAABsFMIKAGAQYQUAMIiwAgAYRFgBAAwirAAABhFWwEqrqt+qql9Y9hwAiyj3sYKNqao+luQeSW6aW/2PuvsTt+ExH5Hktd193G2bbn2qqt9Jck13//yyZwFWkyNWsLE9rrvvPPfxTUfVCFV12DKf/7aoqk3LngFYfcIKDkFV9dCq+vOqur6q/mo6ErVn21Oq6sqq+nxVXV1VPzWtv1OSdyY5pqq+MH0cU1W/U1W/NPf1j6iqa+aWP1ZVz66q9yf5YlUdNn3dW6pqd1V9tKqefguzfv3x9zx2VT2rqj5VVX9TVY+vqsdU1f+tqs9W1XPmvva5VfXmqnrD9P28r6ruN7f9n1TVu6afw+VVdcZez/vSqtpeVV9M8tQk/zLJs6bv/e3TfudV1Uemx7+iqn5g7jGeXFV/WlUvqqrrpu/19Lntd6uqV1XVJ6btb53b9v1Vddk0259X1Slz255dVddOz3lVVT1ygf/twEEgrOAQU1XHJnlHkl9Kcrck/y7JW6pq87TLp5J8f5JvSfKUJC+uqgd29xeTnJ7kE9/EEbCzkzw2yV2T3Jzk7Un+KsmxSR6Z5JlV9egFH+ueSW4/fe35SV6e5ElJHpTku5P8QlWdNLf/mUneNH2vr0/y1qo6vKoOn+b4gyT/MMm/SfK6qrrP3Nf+aJLnJTkqye8meV2SF07f++OmfT4yPe9dkvzHJK+tqnvNPcZDklyV5OgkL0zyiqqqadtrktwxyXdMM7w4SarqAUlemeSnktw9ycuSbKuqI6f5zk3y4O4+Ksmjk3xswZ8dcIAJK9jY3jod8bh+7mjIk5Js7+7t3X1zd/9hkh1JHpMk3f2O7v5Iz/xJZuHx3bdxjt/o7l3d/eUkD06yubsv6O4buvvqzOLorAUf62tJntfdX0tycWbB8uvd/fnuvjzJFUnuN7f/e7v7zdP+v5ZZlD10+rhzkudPc/xxkt/PLAL3eFt3/9n0c/rKWsN095u6+xPTPm9I8uEkp87t8vHufnl335Tk1UnuleQeU3ydnuRp3X1dd39t+nknyTlJXtbd7+num7r71Um+Os18U5Ijk2ypqsO7+2Pd/ZEFf3bAASasYGN7fHffdfp4/LTuhCRPnAuu65M8LLO/8FNVp1fVu6fTatdnFlxH38Y5ds19fkJmpxPnn/85mV1ov4jPTJGSJF+e/vvJue1fziyY/t5zd/fNSa5Jcsz0sWtat8fHMzsSttbca6qqH587ZXd9kvvmG39efzv3/F+aPr1zkuOTfLa7r1vjYU9I8jN7/YyOT3JMd+9M8swkz03yqaq6uKqO2d+cwMEhrODQsyvJa+aC667dfafufn5VHZnkLUlelOQe3X3XJNuT7Dl1tdavEX8xs9NZe9xzjX3mv25Xko/u9fxHdfdjbvN3trbj93xSVbdLclyST0wfx0/r9vjWJNfuY+6/t1xVJ2R2tO3cJHeffl4fzP//ed2SXUnuVlV33ce25+31M7pjd/9eknT367v7YZkFWCd5wQLPBxwEwgoOPa9N8riqenRVbaqq208XhR+X5IjMTjPtTnLjdKH1v5j72k8muXtV3WVu3WVJHjNdiH3PzI6m3JK/SPL56QLsO0wz3LeqHjzsO/xGD6qqH6zZbyQ+M7NTau9O8p4kX8rsYvTDa3YB/+MyO724L59M8m1zy3fKLGx2J7ML/zM7YrVf3f03mf0ywEuq6h9MMzx82vzyJE+rqofUzJ2q6rFVdVRV3aeqvneK4K9kdoTu5n08DXCQCSs4xHT3rswu6H5OZkGwK8nPJrldd38+ydOTvDHJdZldvL1t7ms/lOT3klw9naI6JrMLsP8qswuo/yDJG/bz/DdldnH8/ZN8NMmnk/x2Zhd/HwhvS/IjmX0/P5bkB6frmW7ILKROn2Z4SZIfn77HfXlFZtc2XV9Vb+3uK5L8pyT/J7Po+s4kf3YrZvuxzK4Z+1BmvzTwzCTp7h1J/lWS/zLNvTPJk6evOTLJ86eZ/zazi97//a14TuAAcoNQYMOqqucm+fbuftKyZwEODY5YAQAMIqwAAAZxKhAAYBBHrAAABhFWAACDLO2d5o8++ug+8cQTl/X0AAALe+973/vp7t68v/2WFlYnnnhiduzYsaynBwBYWFV9fJH9FjoVWFWnVdVVVbWzqs5bY/uTq2r39H5Zl1XVT97agQEA1rv9HrGqqk1JLkzyfZm9eemlVbVtuuPwvDd097kHYEYAgHVhkSNWpybZ2d1XT28BcXFmb4cBAMCcRcLq2MzeS2yPa6Z1e/uhqnp/Vb25qo5fY3uq6pyq2lFVO3bv3v1NjAsAsLpG3W7h7UlO7O5TkvxhklevtVN3X9TdW7t76+bN+72wHgBgXVkkrK5NMn8E6rhp3dd192e6+6vT4m8nedCY8QAA1o9FwurSJCdX1UlVdUSSs5Jsm9+hqu41t3hGkivHjQgAsD7s97cCu/vGqjo3ySVJNiV5ZXdfXlUXJNnR3duSPL2qzkhyY5LPJnnyAZwZAGAlLe1NmLdu3dpuEAoArAdV9d7u3rq//bxXIADAIMIKAGCQpb1XIINVLXsCvllLOh0PwHiOWAEADCKsAAAGEVYAAIMIKwCAQYQVAMAgwgoAYBBhBQAwiLACABhEWAEADCKsAAAGEVYAAIMIKwCAQYQVAMAgwgoAYBBhBQAwiLACABhEWAEADCKsAAAGEVYAAIMIKwCAQYQVAMAgwgoAYBBhBQAwiLACABhEWAEADCKsAAAGEVYAAIMIKwCAQYQVAMAgwgoAYBBhBQAwiLACABhEWAEADCKsAAAGEVYAAIMIKwCAQYQVAMAgwgoAYBBhBQAwiLACABhEWAEADLJQWFXVaVV1VVXtrKrzbmG/H6qqrqqt40YEAFgf9htWVbUpyYVJTk+yJcnZVbVljf2OSvKMJO8ZPSQAwHqwyBGrU5Ps7O6ru/uGJBcnOXON/X4xyQuSfGXgfAAA68YiYXVskl1zy9dM676uqh6Y5PjufsfA2QAA1pXbfPF6Vd0uya8l+ZkF9j2nqnZU1Y7du3ff1qcGAFgpi4TVtUmOn1s+blq3x1FJ7pvkXVX1sSQPTbJtrQvYu/ui7t7a3Vs3b978zU8NALCCFgmrS5OcXFUnVdURSc5Ksm3Pxu7+XHcf3d0ndveJSd6d5Izu3nFAJgYAWFH7DavuvjHJuUkuSXJlkjd29+VVdUFVnXGgBwQAWC8OW2Sn7t6eZPte687fx76PuO1jAQCsP+68DgAwiLACABhEWAEADCKsAAAGEVYAAIMIKwCAQYQVAMAgwgoAYBBhBQAwiLACABhEWAEADCKsAAAGEVYAAIMIKwCAQYQVAMAgwgoAYBBhBQAwiLACABhEWAEADCKsAAAGEVYAAIMIKwCAQYQVAMAgwgoAYBBhBQAwiLACABhEWAEADCKsAAAGEVYAAIMIKwCAQYQVAMAgwgoAYBBhBQAwiLACABhEWAEADCKsAAAGEVYAAIMIKwCAQYQVAMAgwgoAYBBhBQAwiLACABhEWAEADCKsAAAGWSisquq0qrqqqnZW1XlrbH9aVX2gqi6rqj+tqi3jRwUAWG37Dauq2pTkwiSnJ9mS5Ow1wun13f2d3X3/JC9M8mvDJwUAWHGLHLE6NcnO7r66u29IcnGSM+d36O6/m1u8U5IeNyIAwPpw2AL7HJtk19zyNUkesvdOVfWvk/x0kiOSfO+Q6QAA1pFhF69394Xdfe8kz07y82vtU1XnVNWOqtqxe/fuUU8NALASFgmra5McP7d83LRuXy5O8vi1NnT3Rd29tbu3bt68efEpAQDWgUXC6tIkJ1fVSVV1RJKzkmyb36GqTp5bfGySD48bEQBgfdjvNVbdfWNVnZvkkiSbkryyuy+vqguS7OjubUnOrapHJflakuuS/MSBHBoAYBUtcvF6unt7ku17rTt/7vNnDJ4LAGDdced1AIBBhBUAwCDCCgBgEGEFADCIsAIAGERYAQAMIqwAAAYRVgAAgwgrAIBBhBUAwCDCCgBgEGEFADCIsAIAGOSwZQ8AHGRVy56A26J72RMAt8ARKwCAQYQVAMAgwgoAYBBhBQAwiLACABhEWAEADCKsAAAGEVYAAIMIKwCAQdx5HYB9c6f+9ctd+pfCESsAgEGEFQDAIMIKAGAQYQUAMIiwAgAYRFgBAAwirAAABhFWAACDCCsAgEGEFQDAIMIKAGAQYQUAMIiwAgAYRFgBAAwirAAABhFWAACDCCsAgEGEFQDAIMIKAGAQYQUAMIiwAgAYZKGwqqrTquqqqtpZVeetsf2nq+qKqnp/Vf1RVZ0wflQAgNW237Cqqk1JLkxyepItSc6uqi177faXSbZ29ylJ3pzkhaMHBQBYdYscsTo1yc7uvrq7b0hycZIz53fo7v/Z3V+aFt+d5LixYwIArL5FwurYJLvmlq+Z1u3LU5O8c60NVXVOVe2oqh27d+9efEoAgHVg6MXrVfWkJFuT/Opa27v7ou7e2t1bN2/ePPKpAQCW7rAF9rk2yfFzy8dN675BVT0qyc8l+Z7u/uqY8QAA1o9FjlhdmuTkqjqpqo5IclaSbfM7VNUDkrwsyRnd/anxYwIArL79hlV335jk3CSXJLkyyRu7+/KquqCqzph2+9Ukd07ypqq6rKq27ePhAAA2rEVOBaa7tyfZvte68+c+f9TguQAA1h13XgcAGERYAQAMIqwAAAYRVgAAgwgrAIBBhBUAwCDCCgBgEGEFADCIsAIAGERYAQAMIqwAAAYRVgAAgwgrAIBBhBUAwCDCCgBgEGEFADCIsAIAGERYAQAMIqwAAAYRVgAAgwgrAIBBhBUAwCDCCgBgEGEFADCIsAIAGERYAQAMIqwAAAYRVgAAgwgrAIBBhBUAwCDCCgBgEGEFADCIsAIAGERYAQAMIqwAAAYRVgAAgwgrAIBBhBUAwCDCCgBgEGEFADCIsAIAGERYAQAMIqwAAAYRVgAAgywUVlV1WlVdVVU7q+q8NbY/vKreV1U3VtUTxo8JALD69htWVbUpyYVJTk+yJcnZVbVlr93+OsmTk7x+9IAAAOvFYQvsc2qSnd19dZJU1cVJzkxyxZ4duvtj07abD8CMAADrwiKnAo9Nsmtu+Zpp3a1WVedU1Y6q2rF79+5v5iEAAFbWQb14vbsv6u6t3b118+bNB/OpAQAOuEXC6tokx88tHzetAwBgziJhdWmSk6vqpKo6IslZSbYd2LEAANaf/YZVd9+Y5NwklyS5Mskbu/vyqrqgqs5Ikqp6cFVdk+SJSV5WVZcfyKEBAFbRIr8VmO7enmT7XuvOn/v80sxOEQIAHLLceR0AYBBhBQAwiLACABhEWAEADCKsAAAGEVYAAIMIKwCAQYQVAMAgwgoAYBBhBQAwiLACABhEWAEADCKsAAAGEVYAAIMIKwCAQYQVAMAgwgoAYBBhBQAwiLACABhEWAEADCKsAAAGEVYAAIMIKwCAQYQVAMAgwgoAYBBhBQAwiLACABhEWAEADCKsAAAGEVYAAIMIKwCAQYQVAMAgwgoAYBBhBQAwiLACABhEWAEADCKsAAAGEVYAAIMIKwCAQYQVAMAgwgoAYBBhBQAwiLACABhEWAEADLJQWFXVaVV1VVXtrKrz1th+ZFW9Ydr+nqo6cfSgAACrbr9hVVWbklyY5PQkW5KcXVVb9trtqUmu6+5vT/LiJC8YPSgAwKpb5IjVqUl2dvfV3X1DkouTnLnXPmcmefX0+ZuTPLKqatyYAACrb5GwOjbJrrnla6Z1a+7T3Tcm+VySu48YEABgvTjsYD5ZVZ2T5Jxp8QtVddXBfH7WtaOTfHrZQxwQDu6OtnFfK4nXy3gb9/XitTLaCYvstEhYXZvk+Lnl46Z1a+1zTVUdluQuST6z9wN190VJLlpkMJhXVTu6e+uy52D1ea1wa3i9MNoipwIvTXJyVZ1UVUckOSvJtr322ZbkJ6bPn5Dkj7u7x40JALD69nvEqrtvrKpzk1ySZFOSV3b35VV1QZId3b0tySuSvKaqdib5bGbxBQBwSCkHllgPquqc6VQy3CKvFW4NrxdGE1YAAIN4SxsAgEGEFQDAIMIKAGAQYcVKqqpfnO6Jtmf5W6rqVcucidVVVU+sqqOmz3++qv5rVT1w2XOxuqrqjsuegY1JWLGqDkvynqo6paq+L7P7qb13yTOxun6huz9fVQ9L8qjMbgHz0iXPxAqqqu+qqiuSfGhavl9VvWTJY7GB+K1AVlZVPTLJ7ye5LsnDu3vnkkdiRVXVX3b3A6rqV5J8oLtfv2fdsmdjtVTVezK7kfW2Pa+Pqvpgd993uZOxUThixUqqqocn+Y0kFyR5V5LfrKpjljoUq+zaqnpZkh9Jsr2qjow/39iH7t6116qbljIIG9JBfRNmuBVelOSJ3X1FklTVDyb54yT/eKlTsap+OMlpSV7U3ddX1b2S/OySZ2I17aqq70rSVXV4kmckuXLJM7GBOBXISqqqTd19017r7t7df+/NvaGq7p3kmu7+alU9IskpSX63u69f7mSsmqo6OsmvZ3YtXiX5gyTP8GcLowgrVlJV3SPJLyc5trtPq6otSf5pd79iyaOxgqrqsiRbk5yYZHuStyX5ju5+zDLnAg49woqVVFXvTPKqJD/X3febbr3wl939nUsejRVUVe/r7gdW1bOSfLm7f9PF66ylqn5jjdWfS7Kju992sOdh43FxJ6vq6O5+Y5Kbk6S7b4wLTNm3r1XV2Ul+PLPfJE2Sw5c4D6vr9knun+TD08cpSY5L8tSq+s/LHIyNwcXrrKovVtXdk3SSVNVDM/tXJazlKUmeluR53f3RqjopyWuWPBOr6ZQk/2zPNZxV9dIk/zvJw5J8YJmDsTE4FchKmu6a/ZtJ7pvkg0k2J3lCd79/qYMB61pVXZXk1O7+3LR8lyR/0d33cfqYERyxYlXdO8npSY5P8kNJHhKvV/ahqk5O8itJtmR2qidJ0t3ftrShWFUvTHJZVb0rs98KfHiSX66qOyX5H8scjI3BEStWUlW9v7tPmd6i5Bczu6/V+d39kCWPxgqqqj9N8h+SvDjJ4zI7NXi77j5/qYOxkqabDf9YZvevunNmt+r4X8udio3Cxeusqj0Xqj82ycu7+x1JjljiPKy2O3T3H2X2j8WPd/dzM3vtwDeoqp9MckmS85L828zeV/K5y5yJjUVYsaq8RQm3xler6nZJPlxV51bVD2R2JAL29owkD07y8e7+50kekMSNZBnGX1Ssqh/O7F+Vj57unn23eIsS9u0ZSe6Y5OlJHpTZaZ6fWOpErKqvdPdXkqSqjuzuDyW5z5JnYgNxjRUAh4yq+m+ZXYP3zCTfm+S6JIe7Sz+jCCtg3aqqt2e619lauvuMgzgO60xVfU+SuyT57919w7LnYWMQVsC6Nf3FuE/d/ScHaxaARFgBG8B0D6Ivd/fN0/KmJEd295eWOxlwqHHxOrAR/FFmF6/vcYe42SOwBMIK2Ahu391f2LMwfX7HW9gf4IAQVsBG8MXp/SWTJFW1NcmXlzgPcIjy3mvARvDMJG+qqk9My/fK7OayAAeVI1bARvCBJL+V5KtJdid5WZLLlzoRcEjyW4HAuldVb0zyd0leN6360SR37e4nLm8q4FAkrIB1r6qu6O4t+1sHcKA5FQhsBO+rqofuWaiqhyTZscR5gEOUI1bAuldVV2b2Rrp/Pa361iRXJbkxSXf3KcuaDTi0CCtg3auqE25pe3d//GDNAhzahBUAwCCusQIAGERYAQAMIqwAAAYRVgAAgwgrAIBB/h+4N9uGTSWx+gAAAABJRU5ErkJggg==\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7fe437993a10>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"#marvin_dataset['X_train']\n",
"\n",
"importances = rf_grid.best_estimator_.feature_importances_\n",
"\n",
"#std = np.std([rf_grid.best_estimator_.feature_importances_ for tree in rf_grid.estimators_],\n",
"# axis=0)\n",
"indices = np.argsort(importances)[::-1]\n",
"\n",
"# Print the feature ranking\n",
"print(\"Feature ranking:\")\n",
"\n",
"for f in range(marvin_dataset['X_train'].shape[1]):\n",
" print(\"%d. feature %s (%f)\" % (f + 1, pred_cols[indices[f]], importances[indices[f]]))\n",
"\n",
"# Plot the feature importances of the forest\n",
"plt.figure(figsize=(10,5))\n",
"plt.title(\"Feature importances\")\n",
"plt.bar(range(X_train.shape[1]), importances[indices],\n",
" color=\"r\", align=\"center\")\n",
"stats_order = [pred_cols[x] for x in indices]\n",
"plt.xticks(range(marvin_dataset['X_train'].shape[1]), stats_order, rotation='vertical')\n",
"plt.xlim([-1, marvin_dataset['X_train'].shape[1]])\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Predictor"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'prediction1': 0.0, 'prediction2': 1.0}"
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# age, class, sex\n",
"input_message = [[50, 3, 0]]\n",
"\n",
"\n",
"final_result = {\n",
" \"prediction1\": marvin_model['rf'].predict(input_message)[0],\n",
" \"prediction2\": marvin_model['svm'].predict(input_message)[0]\n",
"\n",
"}\n",
"\n",
"final_result"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}