src/test/scripts/functions/builtin/HyperbandLM.dml - systemds - Git at Google

 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #-------------------------------------------------------------

 l2norm = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B) return (Matrix[Double] loss) {
  loss = as.matrix(sum((y - X%*%B)^2));
 }

 X = read($1);
 y = read($2);

 # size of dataset chosen such that number of maximum iterations influences the
 # performance of candidates
 numTrSamples = 100;
 numValSamples = 100;

 X_train = X[1:numTrSamples,];
 y_train = y[1:numTrSamples,];
 X_val = X[(numTrSamples+1):(numTrSamples+numValSamples+1),];
 y_val = y[(numTrSamples+1):(numTrSamples+numValSamples+1),];
 X_test = X[(numTrSamples+numValSamples+2):nrow(X),];
 y_test = y[(numTrSamples+numValSamples+2):nrow(X),];

 params = list("reg", "tol");

 # only works with continuous hyper parameters in this implementation
 paramRanges = matrix(0, rows=2, cols=2);

 paramRanges[1,1] = 0;
 paramRanges[1,2] = 20;
 paramRanges[2,1] = 10^-10;
 paramRanges[2,2] = 10^-12;

 # use lmCG, because this implementation of hyperband only makes sense with
 # iterative algorithms
 [B1, optHyperParams] = hyperband(X_train=X_train, y_train=y_train, X_val=X_val,
   y_val=y_val, params=params, paramRanges=paramRanges, R=50, eta=3, verbose=TRUE);

 # train reference with default values
 B2 = lmCG(X=X_train, y=y_train, verbose=FALSE);

 l1 = l2norm(X_test, y_test, B1);
 l2 = l2norm(X_test, y_test, B2);
 R = as.scalar(l1 <= l2);

 write(R, $3)
	#-------------------------------------------------------------
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#-------------------------------------------------------------

	l2norm = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B) return (Matrix[Double] loss) {
	loss = as.matrix(sum((y - X%*%B)^2));
	}

	X = read($1);
	y = read($2);

	# size of dataset chosen such that number of maximum iterations influences the
	# performance of candidates
	numTrSamples = 100;
	numValSamples = 100;

	X_train = X[1:numTrSamples,];
	y_train = y[1:numTrSamples,];
	X_val = X[(numTrSamples+1):(numTrSamples+numValSamples+1),];
	y_val = y[(numTrSamples+1):(numTrSamples+numValSamples+1),];
	X_test = X[(numTrSamples+numValSamples+2):nrow(X),];
	y_test = y[(numTrSamples+numValSamples+2):nrow(X),];

	params = list("reg", "tol");

	# only works with continuous hyper parameters in this implementation
	paramRanges = matrix(0, rows=2, cols=2);

	paramRanges[1,1] = 0;
	paramRanges[1,2] = 20;
	paramRanges[2,1] = 10^-10;
	paramRanges[2,2] = 10^-12;

	# use lmCG, because this implementation of hyperband only makes sense with
	# iterative algorithms
	[B1, optHyperParams] = hyperband(X_train=X_train, y_train=y_train, X_val=X_val,
	y_val=y_val, params=params, paramRanges=paramRanges, R=50, eta=3, verbose=TRUE);

	# train reference with default values
	B2 = lmCG(X=X_train, y=y_train, verbose=FALSE);

	l1 = l2norm(X_test, y_test, B1);
	l2 = l2norm(X_test, y_test, B2);
	R = as.scalar(l1 <= l2);

	write(R, $3)