community-artifacts/Deep-learning/automl/hyperband_v0.ipynb - madlib-site - Git at Google

 {
  "cells": [
   {
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
       "/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/config.py:13: ShimWarning: The `IPython.config` package has been deprecated since IPython 4.0. You should import from traitlets.config instead.\n",
       "  \"You should import from traitlets.config instead.\", ShimWarning)\n",
       "/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/utils/traitlets.py:5: UserWarning: IPython.utils.traitlets has moved to a top-level traitlets package.\n",
       "  warn(\"IPython.utils.traitlets has moved to a top-level traitlets package.\")\n"
      ]
     }
    ],
    "source": [
     "%load_ext sql"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "u'Connected: fmcquillan@madlib'"
       ]
      },
      "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "# Greenplum Database 5.x on GCP (PM demo machine) - direct external IP access\n",
     "#%sql postgresql://gpadmin@34.67.65.96:5432/madlib\n",
     "\n",
     "# Greenplum Database 5.x on GCP - via tunnel\n",
     "#%sql postgresql://gpadmin@localhost:8000/madlib\n",
     "        \n",
     "# PostgreSQL local\n",
     "%sql postgresql://fmcquillan@localhost:5432/madlib"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 46,
    "metadata": {},
    "outputs": [],
    "source": [
     "import numpy as np\n",
     "\n",
     "from random import random\n",
     "from math import log, ceil\n",
     "from time import time, ctime\n",
     "\n",
     "\n",
     "class Hyperband:\n",
     "\n",
     "\tdef __init__( self, get_params_function, try_params_function ):\n",
     "\t\tself.get_params = get_params_function\n",
     "\t\tself.try_params = try_params_function\n",
     "\n",
     "\t\tself.max_iter = 27  \t# maximum iterations per configuration\n",
     "\t\tself.eta = 3\t\t\t# defines configuration downsampling rate (default = 3)\n",
     "\n",
     "\t\tself.logeta = lambda x: log( x ) / log( self.eta )\n",
     "\t\tself.s_max = int( self.logeta( self.max_iter ))\n",
     "\t\tself.B = ( self.s_max + 1 ) * self.max_iter\n",
     "\n",
     "\t\tself.results = []\t# list of dicts\n",
     "\t\tself.counter = 0\n",
     "\t\tself.best_loss = np.inf\n",
     "\t\tself.best_counter = -1\n",
     "\n",
     "\n",
     "\t# can be called multiple times\n",
     "\tdef run( self, skip_last = 0, dry_run = False ):\n",
     "\n",
     "\t\tfor s in reversed( range( self.s_max + 1 )):\n",
     "            \n",
     "\t\t\tprint (\" \")            \n",
     "\t\t\tprint (\"s = \", s)\n",
     "\n",
     "\t\t\t# initial number of configurations\n",
     "\t\t\tn = int( ceil( self.B / self.max_iter / ( s + 1 ) * self.eta ** s ))\n",
     "\n",
     "\t\t\t# initial number of iterations per config\n",
     "\t\t\tr = self.max_iter * self.eta ** ( -s )\n",
     "\n",
     "\t\t\t# n random configurations\n",
     "\t\t\tT = [ self.get_params() for i in range( n )]\n",
     "\n",
     "\t\t\tfor i in range(( s + 1 ) - int( skip_last )):\t# changed from s + 1\n",
     "\n",
     "\t\t\t\t# Run each of the n configs for <iterations>\n",
     "\t\t\t\t# and keep best (n_configs / eta) configurations\n",
     "\n",
     "\t\t\t\tn_configs = n * self.eta ** ( -i )\n",
     "\t\t\t\tn_iterations = r * self.eta ** ( i )\n",
     "\n",
     "\t\t\t\tprint \"\\n*** {} configurations x {:.1f} iterations each\".format(\n",
     "\t\t\t\t\tn_configs, n_iterations )\n",
     "\n",
     "\t\t\t\tval_losses = []\n",
     "\t\t\t\tearly_stops = []\n",
     "\n",
     "\t\t\t\tfor t in T:\n",
     "\n",
     "\t\t\t\t\tself.counter += 1\n",
     "\t\t\t\t\t#print \"\\n{} | {} | lowest loss so far: {:.4f} (run {})\\n\".format(\n",
     "\t\t\t\t\t#\tself.counter, ctime(), self.best_loss, self.best_counter )\n",
     "\n",
     "\t\t\t\t\tstart_time = time()\n",
     "\n",
     "\t\t\t\t\tif dry_run:\n",
     "\t\t\t\t\t\tresult = { 'loss': random(), 'log_loss': random(), 'auc': random()}\n",
     "\t\t\t\t\telse:\n",
     "\t\t\t\t\t\tresult = self.try_params( n_iterations, t )\t\t# <---\n",
     "\n",
     "\t\t\t\t\tassert( type( result ) == dict )\n",
     "\t\t\t\t\tassert( 'loss' in result )\n",
     "\n",
     "\t\t\t\t\tseconds = int( round( time() - start_time ))\n",
     "\t\t\t\t\t#print \"\\n{} seconds.\".format( seconds )\n",
     "\n",
     "\t\t\t\t\tloss = result['loss']\n",
     "\t\t\t\t\tval_losses.append( loss )\n",
     "\n",
     "\t\t\t\t\tearly_stop = result.get( 'early_stop', False )\n",
     "\t\t\t\t\tearly_stops.append( early_stop )\n",
     "\n",
     "\t\t\t\t\t# keeping track of the best result so far (for display only)\n",
     "\t\t\t\t\t# could do it be checking results each time, but hey\n",
     "\t\t\t\t\tif loss < self.best_loss:\n",
     "\t\t\t\t\t\tself.best_loss = loss\n",
     "\t\t\t\t\t\tself.best_counter = self.counter\n",
     "\n",
     "\t\t\t\t\tresult['counter'] = self.counter\n",
     "\t\t\t\t\tresult['seconds'] = seconds\n",
     "\t\t\t\t\tresult['params'] = t\n",
     "\t\t\t\t\tresult['iterations'] = n_iterations\n",
     "\n",
     "\t\t\t\t\tself.results.append( result )\n",
     "\n",
     "\t\t\t\t# select a number of best configurations for the next loop\n",
     "\t\t\t\t# filter out early stops, if any\n",
     "\t\t\t\tindices = np.argsort( val_losses )\n",
     "\t\t\t\tT = [ T[i] for i in indices if not early_stops[i]]\n",
     "\t\t\t\tT = T[ 0:int( n_configs / self.eta )]\n",
     "\n",
     "\t\treturn self.results\n"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
     "def get_params():\n",
     "    return"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
     "def try_params():\n",
     "    return"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 47,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       " \n",
       "('s = ', 3)\n",
       "\n",
       "*** 27 configurations x 1.0 iterations each\n",
       "\n",
       "*** 9.0 configurations x 3.0 iterations each\n",
       "\n",
       "*** 3.0 configurations x 9.0 iterations each\n",
       "\n",
       "*** 1.0 configurations x 27.0 iterations each\n",
       " \n",
       "('s = ', 2)\n",
       "\n",
       "*** 9 configurations x 3.0 iterations each\n",
       "\n",
       "*** 3.0 configurations x 9.0 iterations each\n",
       "\n",
       "*** 1.0 configurations x 27.0 iterations each\n",
       " \n",
       "('s = ', 1)\n",
       "\n",
       "*** 6 configurations x 9.0 iterations each\n",
       "\n",
       "*** 2.0 configurations x 27.0 iterations each\n",
       " \n",
       "('s = ', 0)\n",
       "\n",
       "*** 4 configurations x 27.0 iterations each\n"
      ]
     }
    ],
    "source": [
     "#!/usr/bin/env python\n",
     "\n",
     "\"bare-bones demonstration of using hyperband to tune sklearn GBT\"\n",
     "\n",
     "#from hyperband import Hyperband\n",
     "#from defs.gb import get_params, try_params\n",
     "\n",
     "hb = Hyperband( get_params, try_params )\n",
     "\n",
     "# no actual tuning, doesn't call try_params()\n",
     "results = hb.run( dry_run = True )\n",
     "\n",
     "#results = hb.run( skip_last = 1 ) # shorter run\n",
     "#results = hb.run()"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
    "display_name": "Python 2",
    "language": "python",
    "name": "python2"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
     "version": 2
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython2",
    "version": "2.7.10"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/config.py:13: ShimWarning: The `IPython.config` package has been deprecated since IPython 4.0. You should import from traitlets.config instead.\n",
	" \"You should import from traitlets.config instead.\", ShimWarning)\n",
	"/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/utils/traitlets.py:5: UserWarning: IPython.utils.traitlets has moved to a top-level traitlets package.\n",
	" warn(\"IPython.utils.traitlets has moved to a top-level traitlets package.\")\n"
	]
	}
	],
	"source": [
	"%load_ext sql"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"u'Connected: fmcquillan@madlib'"
	]
	},
	"execution_count": 3,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# Greenplum Database 5.x on GCP (PM demo machine) - direct external IP access\n",
	"#%sql postgresql://gpadmin@34.67.65.96:5432/madlib\n",
	"\n",
	"# Greenplum Database 5.x on GCP - via tunnel\n",
	"#%sql postgresql://gpadmin@localhost:8000/madlib\n",
	" \n",
	"# PostgreSQL local\n",
	"%sql postgresql://fmcquillan@localhost:5432/madlib"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 46,
	"metadata": {},
	"outputs": [],
	"source": [
	"import numpy as np\n",
	"\n",
	"from random import random\n",
	"from math import log, ceil\n",
	"from time import time, ctime\n",
	"\n",
	"\n",
	"class Hyperband:\n",
	"\n",
	"\tdef __init__( self, get_params_function, try_params_function ):\n",
	"\t\tself.get_params = get_params_function\n",
	"\t\tself.try_params = try_params_function\n",
	"\n",
	"\t\tself.max_iter = 27 \t# maximum iterations per configuration\n",
	"\t\tself.eta = 3\t\t\t# defines configuration downsampling rate (default = 3)\n",
	"\n",
	"\t\tself.logeta = lambda x: log( x ) / log( self.eta )\n",
	"\t\tself.s_max = int( self.logeta( self.max_iter ))\n",
	"\t\tself.B = ( self.s_max + 1 ) * self.max_iter\n",
	"\n",
	"\t\tself.results = []\t# list of dicts\n",
	"\t\tself.counter = 0\n",
	"\t\tself.best_loss = np.inf\n",
	"\t\tself.best_counter = -1\n",
	"\n",
	"\n",
	"\t# can be called multiple times\n",
	"\tdef run( self, skip_last = 0, dry_run = False ):\n",
	"\n",
	"\t\tfor s in reversed( range( self.s_max + 1 )):\n",
	" \n",
	"\t\t\tprint (\" \") \n",
	"\t\t\tprint (\"s = \", s)\n",
	"\n",
	"\t\t\t# initial number of configurations\n",
	"\t\t\tn = int( ceil( self.B / self.max_iter / ( s + 1 ) * self.eta ** s ))\n",
	"\n",
	"\t\t\t# initial number of iterations per config\n",
	"\t\t\tr = self.max_iter * self.eta ** ( -s )\n",
	"\n",
	"\t\t\t# n random configurations\n",
	"\t\t\tT = [ self.get_params() for i in range( n )]\n",
	"\n",
	"\t\t\tfor i in range(( s + 1 ) - int( skip_last )):\t# changed from s + 1\n",
	"\n",
	"\t\t\t\t# Run each of the n configs for <iterations>\n",
	"\t\t\t\t# and keep best (n_configs / eta) configurations\n",
	"\n",
	"\t\t\t\tn_configs = n * self.eta ** ( -i )\n",
	"\t\t\t\tn_iterations = r * self.eta ** ( i )\n",
	"\n",
	"\t\t\t\tprint \"\\n*** {} configurations x {:.1f} iterations each\".format(\n",
	"\t\t\t\t\tn_configs, n_iterations )\n",
	"\n",
	"\t\t\t\tval_losses = []\n",
	"\t\t\t\tearly_stops = []\n",
	"\n",
	"\t\t\t\tfor t in T:\n",
	"\n",
	"\t\t\t\t\tself.counter += 1\n",
	"\t\t\t\t\t#print \"\\n{} \| {} \| lowest loss so far: {:.4f} (run {})\\n\".format(\n",
	"\t\t\t\t\t#\tself.counter, ctime(), self.best_loss, self.best_counter )\n",
	"\n",
	"\t\t\t\t\tstart_time = time()\n",
	"\n",
	"\t\t\t\t\tif dry_run:\n",
	"\t\t\t\t\t\tresult = { 'loss': random(), 'log_loss': random(), 'auc': random()}\n",
	"\t\t\t\t\telse:\n",
	"\t\t\t\t\t\tresult = self.try_params( n_iterations, t )\t\t# <---\n",
	"\n",
	"\t\t\t\t\tassert( type( result ) == dict )\n",
	"\t\t\t\t\tassert( 'loss' in result )\n",
	"\n",
	"\t\t\t\t\tseconds = int( round( time() - start_time ))\n",
	"\t\t\t\t\t#print \"\\n{} seconds.\".format( seconds )\n",
	"\n",
	"\t\t\t\t\tloss = result['loss']\n",
	"\t\t\t\t\tval_losses.append( loss )\n",
	"\n",
	"\t\t\t\t\tearly_stop = result.get( 'early_stop', False )\n",
	"\t\t\t\t\tearly_stops.append( early_stop )\n",
	"\n",
	"\t\t\t\t\t# keeping track of the best result so far (for display only)\n",
	"\t\t\t\t\t# could do it be checking results each time, but hey\n",
	"\t\t\t\t\tif loss < self.best_loss:\n",
	"\t\t\t\t\t\tself.best_loss = loss\n",
	"\t\t\t\t\t\tself.best_counter = self.counter\n",
	"\n",
	"\t\t\t\t\tresult['counter'] = self.counter\n",
	"\t\t\t\t\tresult['seconds'] = seconds\n",
	"\t\t\t\t\tresult['params'] = t\n",
	"\t\t\t\t\tresult['iterations'] = n_iterations\n",
	"\n",
	"\t\t\t\t\tself.results.append( result )\n",
	"\n",
	"\t\t\t\t# select a number of best configurations for the next loop\n",
	"\t\t\t\t# filter out early stops, if any\n",
	"\t\t\t\tindices = np.argsort( val_losses )\n",
	"\t\t\t\tT = [ T[i] for i in indices if not early_stops[i]]\n",
	"\t\t\t\tT = T[ 0:int( n_configs / self.eta )]\n",
	"\n",
	"\t\treturn self.results\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [],
	"source": [
	"def get_params():\n",
	" return"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [],
	"source": [
	"def try_params():\n",
	" return"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 47,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	" \n",
	"('s = ', 3)\n",
	"\n",
	"*** 27 configurations x 1.0 iterations each\n",
	"\n",
	"*** 9.0 configurations x 3.0 iterations each\n",
	"\n",
	"*** 3.0 configurations x 9.0 iterations each\n",
	"\n",
	"*** 1.0 configurations x 27.0 iterations each\n",
	" \n",
	"('s = ', 2)\n",
	"\n",
	"*** 9 configurations x 3.0 iterations each\n",
	"\n",
	"*** 3.0 configurations x 9.0 iterations each\n",
	"\n",
	"*** 1.0 configurations x 27.0 iterations each\n",
	" \n",
	"('s = ', 1)\n",
	"\n",
	"*** 6 configurations x 9.0 iterations each\n",
	"\n",
	"*** 2.0 configurations x 27.0 iterations each\n",
	" \n",
	"('s = ', 0)\n",
	"\n",
	"*** 4 configurations x 27.0 iterations each\n"
	]
	}
	],
	"source": [
	"#!/usr/bin/env python\n",
	"\n",
	"\"bare-bones demonstration of using hyperband to tune sklearn GBT\"\n",
	"\n",
	"#from hyperband import Hyperband\n",
	"#from defs.gb import get_params, try_params\n",
	"\n",
	"hb = Hyperband( get_params, try_params )\n",
	"\n",
	"# no actual tuning, doesn't call try_params()\n",
	"results = hb.run( dry_run = True )\n",
	"\n",
	"#results = hb.run( skip_last = 1 ) # shorter run\n",
	"#results = hb.run()"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 2",
	"language": "python",
	"name": "python2"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.10"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}