blob: e27cfdad6167b23bf13cb8c6a3a64b4c36b72598 [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*
* @file madlib_keras_automl.sql_in
*
* @brief SQL functions for training with AutoML methods
* @date August 2020
*
*
*//* ----------------------------------------------------------------------- */
m4_include(`SQLCommon.m4')
/**
@addtogroup grp_automl
@brief Utility function to set up a model selection table for model architecture search
and hyperparameter tuning.
\warning <em> This MADlib method is still in early stage development.
Interface and implementation are subject to change. </em>
<div class="toc"><b>Contents</b><ul>
<li class="level1"><a href="#hyperband_schedule">Hyperband Schedule</a></li>
<li class="level1"><a href="#example">Examples</a></li>
<li class="level1"><a href="#notes">Notes</a></li>
<li class="level1"><a href="#related">Related Topics</a></li>
</ul></div>
This module sets up the Hyperband schedule of evaluating configurations
for use by the Keras AutoML of MADlib.
By configuration we mean both hyperparameter tuning and
model architecture search. The table defines the unique combinations
of model architectures, compile and fit parameters
to run on a massively parallel processing database cluster.
@anchor hyperband_schedule
@par Hyperband Schedule
<pre class="syntax">
hyperband_schedule(
schedule_table,
R,
eta,
skip_last
)
</pre>
\b Arguments
<dl class="arglist">
<dt>schedule_table</dt>
<dd>VARCHAR. Name of output table containing hyperband schedule.
</dd>
<dt>R</dt>
<dd>INTEGER. Maximum number of resources (iterations) that can be allocated
to a single configuration.
</dd>
<dt>eta</dt>
<dd>INTEGER, default 3. Controls the proportion of configurations discarded in
each round of successive halving. For example, for eta=3 will keep the best 1/3
the configurations for the next round.
</dd>
<dt>skip_last</dt>
<dd>INTEGER, default 0. The number of last rounds to skip. For example, for skip_last=1 will skip the
last round (i.e., last entry in each bracket), which is standard randomized search and can
be expensive when run for the total R iterations.
</dd>
</dl>
<b>Output table</b>
<br>
The hyperband schedule output table contains the following columns:
<table class="output">
<tr>
<th>s</th>
<td>INTEGER. Bracket number
</td>
</tr>
<tr>
<th>i</th>
<td>INTEGER. Round (depth) in bracket
</td>
</tr>
<tr>
<th>n_i</th>
<td>INTEGER. Number of configurations in this round
</td>
</tr>
<tr>
<th>r_i</th>
<td>INTEGER. Resources (iterations) in this round
</td>
</tr>
</table>
</br>
@anchor example
@par Examples
TBD.
@anchor notes
@par Notes
TBD.
@anchor related
@par Related Topics
TBD.
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.hyperband_schedule(
schedule_table VARCHAR,
r INTEGER,
eta INTEGER DEFAULT 3,
skip_last INTEGER DEFAULT 0
) RETURNS VOID AS $$
PythonFunctionBodyOnly(`deep_learning', `madlib_keras_automl')
with AOControl(False):
schedule_loader = madlib_keras_automl.HyperbandSchedule(schedule_table, r, eta, skip_last)
schedule_loader.load()
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');