blob: c485a818e7790cb5508f9e7c977b357b46a758a6 [file] [log] [blame]
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/config.py:13: ShimWarning: The `IPython.config` package has been deprecated since IPython 4.0. You should import from traitlets.config instead.\n",
" \"You should import from traitlets.config instead.\", ShimWarning)\n",
"/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/utils/traitlets.py:5: UserWarning: IPython.utils.traitlets has moved to a top-level traitlets package.\n",
" warn(\"IPython.utils.traitlets has moved to a top-level traitlets package.\")\n"
]
}
],
"source": [
"%load_ext sql"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Greenplum Database 5.x on GCP (PM demo machine) - direct external IP access\n",
"#%sql postgresql://gpadmin@34.67.65.96:5432/madlib\n",
"\n",
"# Greenplum Database 5.x on GCP - via tunnel\n",
"%sql postgresql://gpadmin@localhost:8000/madlib\n",
" \n",
"# PostgreSQL local\n",
"#%sql postgresql://fmcquillan@localhost:5432/madlib\n",
"\n",
"# psycopg2 connection\n",
"import psycopg2 as p2\n",
"#conn = p2.connect('postgresql://fmcquillan@localhost:5432/madlib')\n",
"conn = p2.connect('postgresql://gpadmin@localhost:8000/madlib')\n",
"cur = conn.cursor()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 rows affected.\n"
]
},
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <th>version</th>\n",
" </tr>\n",
" <tr>\n",
" <td>MADlib version: 1.17-dev, git revision: rel/v1.16-46-g77ee745, cmake configuration time: Thu Nov 14 17:59:26 UTC 2019, build type: release, build system: Linux-3.10.0-957.27.2.el7.x86_64, C compiler: gcc 4.8.5, C++ compiler: g++ 4.8.5</td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"[(u'MADlib version: 1.17-dev, git revision: rel/v1.16-46-g77ee745, cmake configuration time: Thu Nov 14 17:59:26 UTC 2019, build type: release, build system: Linux-3.10.0-957.27.2.el7.x86_64, C compiler: gcc 4.8.5, C++ compiler: g++ 4.8.5',)]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%sql select madlib.version();\n",
"#%sql select version();"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Pretty print run schedule"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"max_iter = 81\n",
"eta = 3\n",
"B = 5*max_iter = 405\n",
" \n",
"s=4\n",
"n_i r_i\n",
"------------\n",
"81 1.0\n",
"27.0 3.0\n",
"9.0 9.0\n",
"3.0 27.0\n",
"1.0 81.0\n",
" \n",
"s=3\n",
"n_i r_i\n",
"------------\n",
"27 3.0\n",
"9.0 9.0\n",
"3.0 27.0\n",
"1.0 81.0\n",
" \n",
"s=2\n",
"n_i r_i\n",
"------------\n",
"9 9.0\n",
"3.0 27.0\n",
"1.0 81.0\n",
" \n",
"s=1\n",
"n_i r_i\n",
"------------\n",
"6 27.0\n",
"2.0 81.0\n",
" \n",
"s=0\n",
"n_i r_i\n",
"------------\n",
"5 81\n",
" \n",
"sum of configurations at leaf nodes across all s = 10.0\n",
"(if have more workers than this, they may not be 100% busy)\n"
]
}
],
"source": [
"import numpy as np\n",
"from math import log, ceil\n",
"\n",
"#input\n",
"max_iter = 81 # maximum iterations/epochs per configuration\n",
"eta = 3 # defines downsampling rate (default=3)\n",
"\n",
"logeta = lambda x: log(x)/log(eta)\n",
"s_max = int(logeta(max_iter)) # number of unique executions of Successive Halving (minus one)\n",
"B = (s_max+1)*max_iter # total number of iterations (without reuse) per execution of Succesive Halving (n,r)\n",
"\n",
"#echo output\n",
"print (\"max_iter = \" + str(max_iter))\n",
"print (\"eta = \" + str(eta))\n",
"print (\"B = \" + str(s_max+1) + \"*max_iter = \" + str(B))\n",
"\n",
"sum_leaf_n_i = 0 # count configurations at leaf nodes across all s\n",
"\n",
"#### Begin Finite Horizon Hyperband outlerloop. Repeat indefinitely.\n",
"for s in reversed(range(s_max+1)):\n",
" \n",
" print (\" \")\n",
" print (\"s=\" + str(s))\n",
" print (\"n_i r_i\")\n",
" print (\"------------\")\n",
" counter = 0\n",
" \n",
" n = int(ceil(int(B/max_iter/(s+1))*eta**s)) # initial number of configurations\n",
" r = max_iter*eta**(-s) # initial number of iterations to run configurations for\n",
"\n",
" #### Begin Finite Horizon Successive Halving with (n,r)\n",
" #T = [ get_random_hyperparameter_configuration() for i in range(n) ] \n",
" for i in range(s+1):\n",
" # Run each of the n_i configs for r_i iterations and keep best n_i/eta\n",
" n_i = n*eta**(-i)\n",
" r_i = r*eta**(i)\n",
" \n",
" print (str(n_i) + \" \" + str (r_i))\n",
" \n",
" # check if leaf node for this s\n",
" if counter == s:\n",
" sum_leaf_n_i += n_i\n",
" counter += 1\n",
" \n",
" #val_losses = [ run_then_return_val_loss(num_iters=r_i,hyperparameters=t) for t in T ]\n",
" #T = [ T[i] for i in argsort(val_losses)[0:int( n_i/eta )] ]\n",
" #### End Finite Horizon Successive Halving with (n,r)\n",
"\n",
"print (\" \")\n",
"print (\"sum of configurations at leaf nodes across all s = \" + str(sum_leaf_n_i))\n",
"print (\"(if have more workers than this, they may not be 100% busy)\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Pretty print diagonal"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"echo input:\n",
"max_iter = 81\n",
"eta = 3\n",
"s_max = 4\n",
"B = 5*max_iter = 405\n",
" \n",
"initial n, r values for each s:\n",
"s=4\n",
"n=81\n",
"r=1.0\n",
" \n",
"s=3\n",
"n=27\n",
"r=3.0\n",
" \n",
"s=2\n",
"n=9\n",
"r=9.0\n",
" \n",
"s=1\n",
"n=6\n",
"r=27.0\n",
" \n",
"s=0\n",
"n=5\n",
"r=81\n",
" \n",
"outer loop on diagonal:\n",
" \n",
"i=0\n",
"inner loop on s desc:\n",
"s=4\n",
"n_i=81\n",
"r_i=1.0\n",
" \n",
"i=1\n",
"inner loop on s desc:\n",
"s=4\n",
"n_i=27.0\n",
"r_i=3.0\n",
"s=3\n",
"n_i=27\n",
"r_i=3.0\n",
" \n",
"i=2\n",
"inner loop on s desc:\n",
"s=4\n",
"n_i=9.0\n",
"r_i=9.0\n",
"s=3\n",
"n_i=9.0\n",
"r_i=9.0\n",
"s=2\n",
"n_i=9\n",
"r_i=9.0\n",
" \n",
"i=3\n",
"inner loop on s desc:\n",
"s=4\n",
"n_i=3.0\n",
"r_i=27.0\n",
"s=3\n",
"n_i=3.0\n",
"r_i=27.0\n",
"s=2\n",
"n_i=3.0\n",
"r_i=27.0\n",
"s=1\n",
"n_i=6\n",
"r_i=27.0\n",
" \n",
"i=4\n",
"inner loop on s desc:\n",
"s=4\n",
"n_i=1.0\n",
"r_i=81.0\n",
"s=3\n",
"n_i=1.0\n",
"r_i=81.0\n",
"s=2\n",
"n_i=1.0\n",
"r_i=81.0\n",
"s=1\n",
"n_i=2.0\n",
"r_i=81.0\n",
"s=0\n",
"n_i=5\n",
"r_i=81\n"
]
}
],
"source": [
"import numpy as np\n",
"from math import log, ceil\n",
"\n",
"#input\n",
"max_iter = 81 # maximum iterations/epochs per configuration\n",
"eta = 3 # defines downsampling rate (default=3)\n",
"\n",
"logeta = lambda x: log(x)/log(eta)\n",
"s_max = int(logeta(max_iter)) # number of unique executions of Successive Halving (minus one)\n",
"B = (s_max+1)*max_iter # total number of iterations (without reuse) per execution of Succesive Halving (n,r)\n",
"\n",
"#echo output\n",
"print (\"echo input:\")\n",
"print (\"max_iter = \" + str(max_iter))\n",
"print (\"eta = \" + str(eta))\n",
"print (\"s_max = \" + str(s_max))\n",
"print (\"B = \" + str(s_max+1) + \"*max_iter = \" + str(B))\n",
"\n",
"print (\" \")\n",
"print (\"initial n, r values for each s:\")\n",
"initial_n_vals = {}\n",
"initial_r_vals = {}\n",
"# get hyper parameter configs for each s\n",
"for s in reversed(range(s_max+1)):\n",
" \n",
" n = int(ceil(int(B/max_iter/(s+1))*eta**s)) # initial number of configurations\n",
" r = max_iter*eta**(-s) # initial number of iterations to run configurations for\n",
" \n",
" initial_n_vals[s] = n \n",
" initial_r_vals[s] = r \n",
" \n",
" print (\"s=\" + str(s))\n",
" print (\"n=\" + str(n))\n",
" print (\"r=\" + str(r))\n",
" print (\" \")\n",
" \n",
"print (\"outer loop on diagonal:\")\n",
"# outer loop on diagonal\n",
"for i in range(s_max+1):\n",
" print (\" \")\n",
" print (\"i=\" + str(i))\n",
" \n",
" print (\"inner loop on s desc:\")\n",
" # inner loop on s desc\n",
" for s in range(s_max, s_max-i-1, -1):\n",
" n_i = initial_n_vals[s]*eta**(-i+s_max-s)\n",
" r_i = initial_r_vals[s]*eta**(i-s_max+s)\n",
" \n",
" print (\"s=\" + str(s))\n",
" print (\"n_i=\" + str(n_i))\n",
" print (\"r_i=\" + str(r_i))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 1
}