| { |
| "cells": [ |
| { |
| "cell_type": "code", |
| "execution_count": 1, |
| "metadata": { |
| "scrolled": true |
| }, |
| "outputs": [ |
| { |
| "name": "stderr", |
| "output_type": "stream", |
| "text": [ |
| "/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/config.py:13: ShimWarning: The `IPython.config` package has been deprecated since IPython 4.0. You should import from traitlets.config instead.\n", |
| " \"You should import from traitlets.config instead.\", ShimWarning)\n", |
| "/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/utils/traitlets.py:5: UserWarning: IPython.utils.traitlets has moved to a top-level traitlets package.\n", |
| " warn(\"IPython.utils.traitlets has moved to a top-level traitlets package.\")\n" |
| ] |
| } |
| ], |
| "source": [ |
| "%load_ext sql" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 2, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# Greenplum Database 5.x on GCP (PM demo machine) - direct external IP access\n", |
| "#%sql postgresql://gpadmin@34.67.65.96:5432/madlib\n", |
| "\n", |
| "# Greenplum Database 5.x on GCP - via tunnel\n", |
| "%sql postgresql://gpadmin@localhost:8000/madlib\n", |
| " \n", |
| "# PostgreSQL local\n", |
| "#%sql postgresql://fmcquillan@localhost:5432/madlib\n", |
| "\n", |
| "# psycopg2 connection\n", |
| "import psycopg2 as p2\n", |
| "#conn = p2.connect('postgresql://fmcquillan@localhost:5432/madlib')\n", |
| "conn = p2.connect('postgresql://gpadmin@localhost:8000/madlib')\n", |
| "cur = conn.cursor()" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 3, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "1 rows affected.\n" |
| ] |
| }, |
| { |
| "data": { |
| "text/html": [ |
| "<table>\n", |
| " <tr>\n", |
| " <th>version</th>\n", |
| " </tr>\n", |
| " <tr>\n", |
| " <td>MADlib version: 1.17-dev, git revision: rel/v1.16-46-g77ee745, cmake configuration time: Thu Nov 14 17:59:26 UTC 2019, build type: release, build system: Linux-3.10.0-957.27.2.el7.x86_64, C compiler: gcc 4.8.5, C++ compiler: g++ 4.8.5</td>\n", |
| " </tr>\n", |
| "</table>" |
| ], |
| "text/plain": [ |
| "[(u'MADlib version: 1.17-dev, git revision: rel/v1.16-46-g77ee745, cmake configuration time: Thu Nov 14 17:59:26 UTC 2019, build type: release, build system: Linux-3.10.0-957.27.2.el7.x86_64, C compiler: gcc 4.8.5, C++ compiler: g++ 4.8.5',)]" |
| ] |
| }, |
| "execution_count": 3, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "%sql select madlib.version();\n", |
| "#%sql select version();" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "Pretty print run schedule" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 71, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "max_iter = 81\n", |
| "eta = 3\n", |
| "B = 5*max_iter = 405\n", |
| " \n", |
| "s=4\n", |
| "n_i r_i\n", |
| "------------\n", |
| "81 1.0\n", |
| "27.0 3.0\n", |
| "9.0 9.0\n", |
| "3.0 27.0\n", |
| "1.0 81.0\n", |
| " \n", |
| "s=3\n", |
| "n_i r_i\n", |
| "------------\n", |
| "27 3.0\n", |
| "9.0 9.0\n", |
| "3.0 27.0\n", |
| "1.0 81.0\n", |
| " \n", |
| "s=2\n", |
| "n_i r_i\n", |
| "------------\n", |
| "9 9.0\n", |
| "3.0 27.0\n", |
| "1.0 81.0\n", |
| " \n", |
| "s=1\n", |
| "n_i r_i\n", |
| "------------\n", |
| "6 27.0\n", |
| "2.0 81.0\n", |
| " \n", |
| "s=0\n", |
| "n_i r_i\n", |
| "------------\n", |
| "5 81\n", |
| " \n", |
| "sum of configurations at leaf nodes across all s = 10.0\n", |
| "(if have more workers than this, they may not be 100% busy)\n" |
| ] |
| } |
| ], |
| "source": [ |
| "import numpy as np\n", |
| "from math import log, ceil\n", |
| "\n", |
| "#input\n", |
| "max_iter = 81 # maximum iterations/epochs per configuration\n", |
| "eta = 3 # defines downsampling rate (default=3)\n", |
| "\n", |
| "logeta = lambda x: log(x)/log(eta)\n", |
| "s_max = int(logeta(max_iter)) # number of unique executions of Successive Halving (minus one)\n", |
| "B = (s_max+1)*max_iter # total number of iterations (without reuse) per execution of Succesive Halving (n,r)\n", |
| "\n", |
| "#echo output\n", |
| "print (\"max_iter = \" + str(max_iter))\n", |
| "print (\"eta = \" + str(eta))\n", |
| "print (\"B = \" + str(s_max+1) + \"*max_iter = \" + str(B))\n", |
| "\n", |
| "sum_leaf_n_i = 0 # count configurations at leaf nodes across all s\n", |
| "\n", |
| "#### Begin Finite Horizon Hyperband outlerloop. Repeat indefinitely.\n", |
| "for s in reversed(range(s_max+1)):\n", |
| " \n", |
| " print (\" \")\n", |
| " print (\"s=\" + str(s))\n", |
| " print (\"n_i r_i\")\n", |
| " print (\"------------\")\n", |
| " counter = 0\n", |
| " \n", |
| " n = int(ceil(int(B/max_iter/(s+1))*eta**s)) # initial number of configurations\n", |
| " r = max_iter*eta**(-s) # initial number of iterations to run configurations for\n", |
| "\n", |
| " #### Begin Finite Horizon Successive Halving with (n,r)\n", |
| " #T = [ get_random_hyperparameter_configuration() for i in range(n) ] \n", |
| " for i in range(s+1):\n", |
| " # Run each of the n_i configs for r_i iterations and keep best n_i/eta\n", |
| " n_i = n*eta**(-i)\n", |
| " r_i = r*eta**(i)\n", |
| " \n", |
| " print (str(n_i) + \" \" + str (r_i))\n", |
| " \n", |
| " # check if leaf node for this s\n", |
| " if counter == s:\n", |
| " sum_leaf_n_i += n_i\n", |
| " counter += 1\n", |
| " \n", |
| " #val_losses = [ run_then_return_val_loss(num_iters=r_i,hyperparameters=t) for t in T ]\n", |
| " #T = [ T[i] for i in argsort(val_losses)[0:int( n_i/eta )] ]\n", |
| " #### End Finite Horizon Successive Halving with (n,r)\n", |
| "\n", |
| "print (\" \")\n", |
| "print (\"sum of configurations at leaf nodes across all s = \" + str(sum_leaf_n_i))\n", |
| "print (\"(if have more workers than this, they may not be 100% busy)\")" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "Pretty print diagonal" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 72, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "echo input:\n", |
| "max_iter = 81\n", |
| "eta = 3\n", |
| "s_max = 4\n", |
| "B = 5*max_iter = 405\n", |
| " \n", |
| "initial n, r values for each s:\n", |
| "s=4\n", |
| "n=81\n", |
| "r=1.0\n", |
| " \n", |
| "s=3\n", |
| "n=27\n", |
| "r=3.0\n", |
| " \n", |
| "s=2\n", |
| "n=9\n", |
| "r=9.0\n", |
| " \n", |
| "s=1\n", |
| "n=6\n", |
| "r=27.0\n", |
| " \n", |
| "s=0\n", |
| "n=5\n", |
| "r=81\n", |
| " \n", |
| "outer loop on diagonal:\n", |
| " \n", |
| "i=0\n", |
| "inner loop on s desc:\n", |
| "s=4\n", |
| "n_i=81\n", |
| "r_i=1.0\n", |
| " \n", |
| "i=1\n", |
| "inner loop on s desc:\n", |
| "s=4\n", |
| "n_i=27.0\n", |
| "r_i=3.0\n", |
| "s=3\n", |
| "n_i=27\n", |
| "r_i=3.0\n", |
| " \n", |
| "i=2\n", |
| "inner loop on s desc:\n", |
| "s=4\n", |
| "n_i=9.0\n", |
| "r_i=9.0\n", |
| "s=3\n", |
| "n_i=9.0\n", |
| "r_i=9.0\n", |
| "s=2\n", |
| "n_i=9\n", |
| "r_i=9.0\n", |
| " \n", |
| "i=3\n", |
| "inner loop on s desc:\n", |
| "s=4\n", |
| "n_i=3.0\n", |
| "r_i=27.0\n", |
| "s=3\n", |
| "n_i=3.0\n", |
| "r_i=27.0\n", |
| "s=2\n", |
| "n_i=3.0\n", |
| "r_i=27.0\n", |
| "s=1\n", |
| "n_i=6\n", |
| "r_i=27.0\n", |
| " \n", |
| "i=4\n", |
| "inner loop on s desc:\n", |
| "s=4\n", |
| "n_i=1.0\n", |
| "r_i=81.0\n", |
| "s=3\n", |
| "n_i=1.0\n", |
| "r_i=81.0\n", |
| "s=2\n", |
| "n_i=1.0\n", |
| "r_i=81.0\n", |
| "s=1\n", |
| "n_i=2.0\n", |
| "r_i=81.0\n", |
| "s=0\n", |
| "n_i=5\n", |
| "r_i=81\n" |
| ] |
| } |
| ], |
| "source": [ |
| "import numpy as np\n", |
| "from math import log, ceil\n", |
| "\n", |
| "#input\n", |
| "max_iter = 81 # maximum iterations/epochs per configuration\n", |
| "eta = 3 # defines downsampling rate (default=3)\n", |
| "\n", |
| "logeta = lambda x: log(x)/log(eta)\n", |
| "s_max = int(logeta(max_iter)) # number of unique executions of Successive Halving (minus one)\n", |
| "B = (s_max+1)*max_iter # total number of iterations (without reuse) per execution of Succesive Halving (n,r)\n", |
| "\n", |
| "#echo output\n", |
| "print (\"echo input:\")\n", |
| "print (\"max_iter = \" + str(max_iter))\n", |
| "print (\"eta = \" + str(eta))\n", |
| "print (\"s_max = \" + str(s_max))\n", |
| "print (\"B = \" + str(s_max+1) + \"*max_iter = \" + str(B))\n", |
| "\n", |
| "print (\" \")\n", |
| "print (\"initial n, r values for each s:\")\n", |
| "initial_n_vals = {}\n", |
| "initial_r_vals = {}\n", |
| "# get hyper parameter configs for each s\n", |
| "for s in reversed(range(s_max+1)):\n", |
| " \n", |
| " n = int(ceil(int(B/max_iter/(s+1))*eta**s)) # initial number of configurations\n", |
| " r = max_iter*eta**(-s) # initial number of iterations to run configurations for\n", |
| " \n", |
| " initial_n_vals[s] = n \n", |
| " initial_r_vals[s] = r \n", |
| " \n", |
| " print (\"s=\" + str(s))\n", |
| " print (\"n=\" + str(n))\n", |
| " print (\"r=\" + str(r))\n", |
| " print (\" \")\n", |
| " \n", |
| "print (\"outer loop on diagonal:\")\n", |
| "# outer loop on diagonal\n", |
| "for i in range(s_max+1):\n", |
| " print (\" \")\n", |
| " print (\"i=\" + str(i))\n", |
| " \n", |
| " print (\"inner loop on s desc:\")\n", |
| " # inner loop on s desc\n", |
| " for s in range(s_max, s_max-i-1, -1):\n", |
| " n_i = initial_n_vals[s]*eta**(-i+s_max-s)\n", |
| " r_i = initial_r_vals[s]*eta**(i-s_max+s)\n", |
| " \n", |
| " print (\"s=\" + str(s))\n", |
| " print (\"n_i=\" + str(n_i))\n", |
| " print (\"r_i=\" + str(r_i))" |
| ] |
| } |
| ], |
| "metadata": { |
| "kernelspec": { |
| "display_name": "Python 2", |
| "language": "python", |
| "name": "python2" |
| }, |
| "language_info": { |
| "codemirror_mode": { |
| "name": "ipython", |
| "version": 2 |
| }, |
| "file_extension": ".py", |
| "mimetype": "text/x-python", |
| "name": "python", |
| "nbconvert_exporter": "python", |
| "pygments_lexer": "ipython2", |
| "version": "2.7.10" |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 1 |
| } |