blob: 4cf72934084e9fa94ac7ff06c9440945cbd7e046 [file] [log] [blame]
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/config.py:13: ShimWarning: The `IPython.config` package has been deprecated since IPython 4.0. You should import from traitlets.config instead.\n",
" \"You should import from traitlets.config instead.\", ShimWarning)\n",
"/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/utils/traitlets.py:5: UserWarning: IPython.utils.traitlets has moved to a top-level traitlets package.\n",
" warn(\"IPython.utils.traitlets has moved to a top-level traitlets package.\")\n"
]
}
],
"source": [
"%load_ext sql"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"u'Connected: fmcquillan@madlib'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Greenplum Database 5.x on GCP (PM demo machine) - direct external IP access\n",
"#%sql postgresql://gpadmin@34.67.65.96:5432/madlib\n",
"\n",
"# Greenplum Database 5.x on GCP - via tunnel\n",
"#%sql postgresql://gpadmin@localhost:8000/madlib\n",
" \n",
"# PostgreSQL local\n",
"%sql postgresql://fmcquillan@localhost:5432/madlib"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"from random import random\n",
"from math import log, ceil\n",
"from time import time, ctime\n",
"\n",
"\n",
"class Hyperband:\n",
"\n",
"\tdef __init__( self, get_params_function, try_params_function ):\n",
"\t\tself.get_params = get_params_function\n",
"\t\tself.try_params = try_params_function\n",
"\n",
"\t\tself.max_iter = 27 \t# maximum iterations per configuration\n",
"\t\tself.eta = 3\t\t\t# defines configuration downsampling rate (default = 3)\n",
"\n",
"\t\tself.logeta = lambda x: log( x ) / log( self.eta )\n",
"\t\tself.s_max = int( self.logeta( self.max_iter ))\n",
"\t\tself.B = ( self.s_max + 1 ) * self.max_iter\n",
"\n",
"\t\tself.results = []\t# list of dicts\n",
"\t\tself.counter = 0\n",
"\t\tself.best_loss = np.inf\n",
"\t\tself.best_counter = -1\n",
"\n",
"\n",
"\t# can be called multiple times\n",
"\tdef run( self, skip_last = 0, dry_run = False ):\n",
"\n",
"\t\tfor s in reversed( range( self.s_max + 1 )):\n",
" \n",
"\t\t\tprint (\" \") \n",
"\t\t\tprint (\"s = \", s)\n",
"\n",
"\t\t\t# initial number of configurations\n",
"\t\t\tn = int( ceil( self.B / self.max_iter / ( s + 1 ) * self.eta ** s ))\n",
"\n",
"\t\t\t# initial number of iterations per config\n",
"\t\t\tr = self.max_iter * self.eta ** ( -s )\n",
"\n",
"\t\t\t# n random configurations\n",
"\t\t\tT = [ self.get_params() for i in range( n )]\n",
"\n",
"\t\t\tfor i in range(( s + 1 ) - int( skip_last )):\t# changed from s + 1\n",
"\n",
"\t\t\t\t# Run each of the n configs for <iterations>\n",
"\t\t\t\t# and keep best (n_configs / eta) configurations\n",
"\n",
"\t\t\t\tn_configs = n * self.eta ** ( -i )\n",
"\t\t\t\tn_iterations = r * self.eta ** ( i )\n",
"\n",
"\t\t\t\tprint \"\\n*** {} configurations x {:.1f} iterations each\".format(\n",
"\t\t\t\t\tn_configs, n_iterations )\n",
"\n",
"\t\t\t\tval_losses = []\n",
"\t\t\t\tearly_stops = []\n",
"\n",
"\t\t\t\tfor t in T:\n",
"\n",
"\t\t\t\t\tself.counter += 1\n",
"\t\t\t\t\t#print \"\\n{} | {} | lowest loss so far: {:.4f} (run {})\\n\".format(\n",
"\t\t\t\t\t#\tself.counter, ctime(), self.best_loss, self.best_counter )\n",
"\n",
"\t\t\t\t\tstart_time = time()\n",
"\n",
"\t\t\t\t\tif dry_run:\n",
"\t\t\t\t\t\tresult = { 'loss': random(), 'log_loss': random(), 'auc': random()}\n",
"\t\t\t\t\telse:\n",
"\t\t\t\t\t\tresult = self.try_params( n_iterations, t )\t\t# <---\n",
"\n",
"\t\t\t\t\tassert( type( result ) == dict )\n",
"\t\t\t\t\tassert( 'loss' in result )\n",
"\n",
"\t\t\t\t\tseconds = int( round( time() - start_time ))\n",
"\t\t\t\t\t#print \"\\n{} seconds.\".format( seconds )\n",
"\n",
"\t\t\t\t\tloss = result['loss']\n",
"\t\t\t\t\tval_losses.append( loss )\n",
"\n",
"\t\t\t\t\tearly_stop = result.get( 'early_stop', False )\n",
"\t\t\t\t\tearly_stops.append( early_stop )\n",
"\n",
"\t\t\t\t\t# keeping track of the best result so far (for display only)\n",
"\t\t\t\t\t# could do it be checking results each time, but hey\n",
"\t\t\t\t\tif loss < self.best_loss:\n",
"\t\t\t\t\t\tself.best_loss = loss\n",
"\t\t\t\t\t\tself.best_counter = self.counter\n",
"\n",
"\t\t\t\t\tresult['counter'] = self.counter\n",
"\t\t\t\t\tresult['seconds'] = seconds\n",
"\t\t\t\t\tresult['params'] = t\n",
"\t\t\t\t\tresult['iterations'] = n_iterations\n",
"\n",
"\t\t\t\t\tself.results.append( result )\n",
"\n",
"\t\t\t\t# select a number of best configurations for the next loop\n",
"\t\t\t\t# filter out early stops, if any\n",
"\t\t\t\tindices = np.argsort( val_losses )\n",
"\t\t\t\tT = [ T[i] for i in indices if not early_stops[i]]\n",
"\t\t\t\tT = T[ 0:int( n_configs / self.eta )]\n",
"\n",
"\t\treturn self.results\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def get_params():\n",
" return"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def try_params():\n",
" return"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" \n",
"('s = ', 3)\n",
"\n",
"*** 27 configurations x 1.0 iterations each\n",
"\n",
"*** 9.0 configurations x 3.0 iterations each\n",
"\n",
"*** 3.0 configurations x 9.0 iterations each\n",
"\n",
"*** 1.0 configurations x 27.0 iterations each\n",
" \n",
"('s = ', 2)\n",
"\n",
"*** 9 configurations x 3.0 iterations each\n",
"\n",
"*** 3.0 configurations x 9.0 iterations each\n",
"\n",
"*** 1.0 configurations x 27.0 iterations each\n",
" \n",
"('s = ', 1)\n",
"\n",
"*** 6 configurations x 9.0 iterations each\n",
"\n",
"*** 2.0 configurations x 27.0 iterations each\n",
" \n",
"('s = ', 0)\n",
"\n",
"*** 4 configurations x 27.0 iterations each\n"
]
}
],
"source": [
"#!/usr/bin/env python\n",
"\n",
"\"bare-bones demonstration of using hyperband to tune sklearn GBT\"\n",
"\n",
"#from hyperband import Hyperband\n",
"#from defs.gb import get_params, try_params\n",
"\n",
"hb = Hyperband( get_params, try_params )\n",
"\n",
"# no actual tuning, doesn't call try_params()\n",
"results = hb.run( dry_run = True )\n",
"\n",
"#results = hb.run( skip_last = 1 ) # shorter run\n",
"#results = hb.run()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}