| { |
| "cells": [ |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Iris H2O Example" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Acquisitor and Cleaner" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 1, |
| "metadata": { |
| "marvin_cell": "acquisitor" |
| }, |
| "outputs": [], |
| "source": [ |
| "import pandas as pd\n", |
| "from marvin_python_toolbox.common.data import MarvinData\n", |
| "\n", |
| "file_path = MarvinData.download_file(url=\"https://s3.amazonaws.com/marvin-engines-data/Iris.csv\")\n", |
| "\n", |
| "iris = pd.read_csv(file_path)\n", |
| "\n", |
| "marvin_initial_dataset = iris" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Training Preparator" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 2, |
| "metadata": { |
| "marvin_cell": "tpreparator" |
| }, |
| "outputs": [], |
| "source": [ |
| "from sklearn.model_selection import train_test_split\n", |
| "from sklearn import model_selection\n", |
| "\n", |
| "X_train, X_test= train_test_split(marvin_initial_dataset, random_state=1,test_size=0.3)\n", |
| "\n", |
| "marvin_dataset = {'train_X': X_train, 'test_X': X_test}\n" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Trainer" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 3, |
| "metadata": { |
| "marvin_cell": "trainer", |
| "scrolled": true |
| }, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Checking whether there is an H2O instance running at http://localhost:54321 . connected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td>H2O cluster uptime:</td>\n", |
| "<td>1 day 0 hours 48 mins</td></tr>\n", |
| "<tr><td>H2O cluster timezone:</td>\n", |
| "<td>America/Sao_Paulo</td></tr>\n", |
| "<tr><td>H2O data parsing timezone:</td>\n", |
| "<td>UTC</td></tr>\n", |
| "<tr><td>H2O cluster version:</td>\n", |
| "<td>3.26.0.3</td></tr>\n", |
| "<tr><td>H2O cluster version age:</td>\n", |
| "<td>20 days </td></tr>\n", |
| "<tr><td>H2O cluster name:</td>\n", |
| "<td>H2O_from_python_fernandozagatti_7kn410</td></tr>\n", |
| "<tr><td>H2O cluster total nodes:</td>\n", |
| "<td>1</td></tr>\n", |
| "<tr><td>H2O cluster free memory:</td>\n", |
| "<td>3.791 Gb</td></tr>\n", |
| "<tr><td>H2O cluster total cores:</td>\n", |
| "<td>4</td></tr>\n", |
| "<tr><td>H2O cluster allowed cores:</td>\n", |
| "<td>4</td></tr>\n", |
| "<tr><td>H2O cluster status:</td>\n", |
| "<td>locked, healthy</td></tr>\n", |
| "<tr><td>H2O connection url:</td>\n", |
| "<td>http://localhost:54321</td></tr>\n", |
| "<tr><td>H2O connection proxy:</td>\n", |
| "<td>None</td></tr>\n", |
| "<tr><td>H2O internal security:</td>\n", |
| "<td>False</td></tr>\n", |
| "<tr><td>H2O API Extensions:</td>\n", |
| "<td>Amazon S3, XGBoost, Algos, AutoML, Core V3, Core V4</td></tr>\n", |
| "<tr><td>Python version:</td>\n", |
| "<td>3.7.3 final</td></tr></table></div>" |
| ], |
| "text/plain": [ |
| "-------------------------- ---------------------------------------------------\n", |
| "H2O cluster uptime: 1 day 0 hours 48 mins\n", |
| "H2O cluster timezone: America/Sao_Paulo\n", |
| "H2O data parsing timezone: UTC\n", |
| "H2O cluster version: 3.26.0.3\n", |
| "H2O cluster version age: 20 days\n", |
| "H2O cluster name: H2O_from_python_fernandozagatti_7kn410\n", |
| "H2O cluster total nodes: 1\n", |
| "H2O cluster free memory: 3.791 Gb\n", |
| "H2O cluster total cores: 4\n", |
| "H2O cluster allowed cores: 4\n", |
| "H2O cluster status: locked, healthy\n", |
| "H2O connection url: http://localhost:54321\n", |
| "H2O connection proxy:\n", |
| "H2O internal security: False\n", |
| "H2O API Extensions: Amazon S3, XGBoost, Algos, AutoML, Core V3, Core V4\n", |
| "Python version: 3.7.3 final\n", |
| "-------------------------- ---------------------------------------------------" |
| ] |
| }, |
| "metadata": {}, |
| "output_type": "display_data" |
| }, |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Parse progress: |█████████████████████████████████████████████████████████| 100%\n", |
| "Parse progress: |█████████████████████████████████████████████████████████| 100%\n", |
| "AutoML progress: |████████████████████████████████████████████████████████| 100%\n" |
| ] |
| } |
| ], |
| "source": [ |
| "import h2o\n", |
| "from h2o.automl import H2OAutoML\n", |
| "\n", |
| "h2o.init()\n", |
| "\n", |
| "train_X_frame = h2o.H2OFrame.from_python(marvin_dataset['train_X'])\n", |
| "test_X_frame = h2o.H2OFrame.from_python(marvin_dataset['test_X'])\n", |
| "\n", |
| "x = train_X_frame.columns\n", |
| "y = 'Species'\n", |
| "x.remove(y)\n", |
| "\n", |
| "automl = H2OAutoML(max_models=20, seed=1)\n", |
| "automl.train(x=x, \n", |
| " y=y, \n", |
| " training_frame=train_X_frame)\n", |
| "\n", |
| "marvin_model = automl" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Metrics Evaluator" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 4, |
| "metadata": { |
| "marvin_cell": "evaluator", |
| "scrolled": false |
| }, |
| "outputs": [ |
| { |
| "name": "stderr", |
| "output_type": "stream", |
| "text": [ |
| "/home/fernandozagatti/.virtualenvs/iris-h2o-automl-env/lib/python3.7/site-packages/pandas/core/frame.py:4102: SettingWithCopyWarning: \n", |
| "A value is trying to be set on a copy of a slice from a DataFrame\n", |
| "\n", |
| "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", |
| " errors=errors,\n" |
| ] |
| }, |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Parse progress: |█████████████████████████████████████████████████████████| 100%\n", |
| "gbm prediction progress: |████████████████████████████████████████████████| 100%\n" |
| ] |
| } |
| ], |
| "source": [ |
| "import h2o\n", |
| "from sklearn import metrics\n", |
| "\n", |
| "#h2o.init()\n", |
| "\n", |
| "y_test = marvin_dataset['test_X']['Species']\n", |
| "marvin_dataset['test_X'].drop(columns='Species',inplace= True)\n", |
| "\n", |
| "teste = h2o.H2OFrame.from_python(marvin_dataset['test_X'])\n", |
| "preds = marvin_model.predict(teste).as_data_frame()['predict'].values\n", |
| "marvin_metrics = metrics.accuracy_score(y_test, preds)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Prediction Preparator" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 11, |
| "metadata": { |
| "marvin_cell": "ppreparator", |
| "scrolled": true |
| }, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Parse progress: |█████████████████████████████████████████████████████████| 100%\n" |
| ] |
| } |
| ], |
| "source": [ |
| "import h2o\n", |
| "import pandas as pd\n", |
| "\n", |
| "input_message = {'SepalLengthCm': [input_message[0]], 'SepalWidthCm': [input_message[1]],\n", |
| " 'PetalLengthCm': [input_message[2]], 'PetalWidthCm': [input_message[3]]}\n", |
| "input_message = pd.DataFrame(data=input_message)\n", |
| "input_message = h2o.H2OFrame.from_python(input_message)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Predictor" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 12, |
| "metadata": { |
| "marvin_cell": "predictor" |
| }, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "gbm prediction progress: |████████████████████████████████████████████████| 100%\n" |
| ] |
| } |
| ], |
| "source": [ |
| "final_prediction = marvin_model.predict(input_message).as_data_frame().values[0][0]" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 13, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Iris-versicolor\n" |
| ] |
| } |
| ], |
| "source": [ |
| "print(final_prediction)" |
| ] |
| } |
| ], |
| "metadata": { |
| "kernelspec": { |
| "display_name": "Python 3", |
| "language": "python", |
| "name": "python3" |
| }, |
| "language_info": { |
| "codemirror_mode": { |
| "name": "ipython", |
| "version": 3 |
| }, |
| "file_extension": ".py", |
| "mimetype": "text/x-python", |
| "name": "python", |
| "nbconvert_exporter": "python", |
| "pygments_lexer": "ipython3", |
| "version": "3.7.3" |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 1 |
| } |