src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras_automl.py_in - madlib - Git at Google

 # coding=utf-8
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 m4_changequote(`<!', `!>')
 m4_ifdef(<!__POSTGRESQL__!>, <!print 'skipping automl for postgres'!>, <!

 import sys
 from os import path
 import math
 sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))))
 sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))

 from tensorflow import keras # still needed here even though not explicitly used. DO NOT REMOVE.
 import unittest
 from mock import *
 import plpy_mock as plpy

 class HyperbandScheduleTestCase(unittest.TestCase):
     def setUp(self):
         # The side effects of this class(writing to the output table) are not
         # tested here. They are tested in dev-check.
         self.plpy_mock = Mock(spec='error')
         patches = {
             'plpy': plpy,
             'utilities.mean_std_dev_calculator': Mock()
         }

         self.plpy_mock_execute = MagicMock()
         plpy.execute = self.plpy_mock_execute

         self.module_patcher = patch.dict('sys.modules', patches)
         self.module_patcher.start()
         import deep_learning.madlib_keras_automl_hyperband
         self.module = deep_learning.madlib_keras_automl_hyperband
         # self.module.MstLoaderInputValidator._validate_input_args = \
         #     MagicMock()

         self.subject = self.module.HyperbandSchedule
         self.schedule_table = 'schedule_table'
         self.R = 81
         self.eta = 3
         self.skip_last = 0

     def test_schedule_table_dimension(self):
         generate_schedule = self.subject(
             self.schedule_table,
             self.R,
             self.eta,
             self.skip_last
         )
         s_max = int(math.floor(math.log(self.R, self.eta)))
         num_depths = int((s_max+1) * (s_max+2) / 2.0)
         self.assertEqual(num_depths, len(generate_schedule.schedule_vals))

     def test_max_skip_last(self):
         self.skip_last = int(math.floor(math.log(self.R, self.eta)))+1 # s_max+1
         with self.assertRaises(plpy.PLPYException):
             generate_schedule = self.subject(
                 self.schedule_table,
                 self.R,
                 self.eta,
                 self.skip_last
             )
     def test_negative_skip_last(self):
         self.skip_last = -3
         with self.assertRaises(plpy.PLPYException):
             generate_schedule = self.subject(
                 self.schedule_table,
                 self.R,
                 self.eta,
                 self.skip_last
             )
     def test_zero_resources(self):
         self.R = 0
         with self.assertRaises(plpy.PLPYException):
             generate_schedule = self.subject(
                 self.schedule_table,
                 self.R,
                 self.eta,
                 self.skip_last
             )
     def test_negative_resources(self):
         self.R = -3
         with self.assertRaises(plpy.PLPYException):
             generate_schedule = self.subject(
                 self.schedule_table,
                 self.R,
                 self.eta,
                 self.skip_last
             )
     def test_non_discarding_eta(self):
         self.eta = 1
         with self.assertRaises(plpy.PLPYException):
             generate_schedule = self.subject(
                 self.schedule_table,
                 self.R,
                 self.eta,
                 self.skip_last
             )
     def test_negative_eta(self):
         self.eta = -2
         with self.assertRaises(plpy.PLPYException):
             generate_schedule = self.subject(
                 self.schedule_table,
                 self.R,
                 self.eta,
                 self.skip_last
             )

     def test_different_R(self):
         self.R = 27
         generate_schedule1 = self.subject(
             self.schedule_table,
             self.R,
             self.eta,
             self.skip_last
         )
         s_max = int(math.floor(math.log(self.R, self.eta)))
         num_depths = int((s_max+1) * (s_max+2) / 2.0)
         self.assertEqual(num_depths, len(generate_schedule1.schedule_vals))

         self.R = 13
         generate_schedule2 = self.subject(
             self.schedule_table,
             self.R,
             self.eta,
             self.skip_last
         )
         s_max = int(math.floor(math.log(self.R, self.eta)))
         num_depths = int((s_max+1) * (s_max+2) / 2.0)
         self.assertEqual(num_depths, len(generate_schedule2.schedule_vals))

         self.R = 100
         generate_schedule3 = self.subject(
             self.schedule_table,
             self.R,
             self.eta,
             self.skip_last
         )
         s_max = int(math.floor(math.log(self.R, self.eta)))
         num_depths = int((s_max+1) * (s_max+2) / 2.0)
         self.assertEqual(num_depths, len(generate_schedule3.schedule_vals))

     def test_different_eta(self):
         self.eta = 4
         generate_schedule3 = self.subject(
             self.schedule_table,
             self.R,
             self.eta,
             self.skip_last
         )
         s_max = int(math.floor(math.log(self.R, self.eta)))
         num_depths = int((s_max+1) * (s_max+2) / 2.0)
         self.assertEqual(num_depths, len(generate_schedule3.schedule_vals))

         self.R = 91
         self.eta = 6
         generate_schedule3 = self.subject(
             self.schedule_table,
             self.R,
             self.eta,
             self.skip_last
         )
         s_max = int(math.floor(math.log(self.R, self.eta)))
         num_depths = int((s_max+1) * (s_max+2) / 2.0)
         self.assertEqual(num_depths, len(generate_schedule3.schedule_vals))

     def test_different_skip_last(self):
         self.skip_last = 2
         generate_schedule3 = self.subject(
             self.schedule_table,
             self.R,
             self.eta,
             self.skip_last
         )
         s_max = int(math.floor(math.log(self.R, self.eta)))
         num_depths = int((s_max+1) * (s_max+2) / 2.0)
         self.assertEqual(num_depths - (2*(s_max+1)-1), len(generate_schedule3.schedule_vals))

         self.skip_last = 3
         generate_schedule3 = self.subject(
             self.schedule_table,
             self.R,
             self.eta,
             self.skip_last
         )
         s_max = int(math.floor(math.log(self.R, self.eta)))
         num_depths = int((s_max+1) * (s_max+2) / 2.0)
         self.assertEqual(num_depths - (3*(s_max+1)-3), len(generate_schedule3.schedule_vals))

     def tearDown(self):
         self.module_patcher.stop()

 class AutoMLHyperoptTestCase(unittest.TestCase):
     def setUp(self):
         # The side effects of this class(writing to the output table) are not
         # tested here. They are tested in dev-check.
         self.plpy_mock = Mock(spec='error')
         patches = {
             'plpy': plpy,
             'utilities.mean_std_dev_calculator': Mock()
         }

         self.plpy_mock_execute = MagicMock()
         plpy.execute = self.plpy_mock_execute

         self.module_patcher = patch.dict('sys.modules', patches)
         self.module_patcher.start()
         import deep_learning.madlib_keras_automl_hyperopt
         self.module = deep_learning.madlib_keras_automl_hyperopt

         # from deep_learning.madlib_keras_automl_hyperopt import AutoMLHyperopt
         self.seg_num_mock = Mock()

         class FakeAutoMLHyperopt(self.module.AutoMLHyperopt):
             def __init__(self, *args):
                 pass
             self.module.get_seg_number = self.seg_num_mock

         self.subject = FakeAutoMLHyperopt

     def test_get_configs_list_models_less_than_segments(self):
         automl_hyperopt = self.subject()
         configs = automl_hyperopt.get_configs_list(1,3)
         self.assertEquals([(1,1)], configs)

     def test_get_configs_list_models_equal_segments(self):
         automl_hyperopt = self.subject()
         configs = automl_hyperopt.get_configs_list(3,3)
         self.assertEquals([(1,3)], configs)

     def test_get_configs_list_last_bucket_models_less_than_half_segments(self):
         automl_hyperopt = self.subject()
         # Last bucket num models < 1/2 num workers
         configs = automl_hyperopt.get_configs_list(81,20)
         self.assertEquals([(1, 20), (21, 40), (41, 60), (61, 81)], configs)

     def test_get_configs_list_last_bucket_models_greater_than_half_segments(self):
         automl_hyperopt = self.subject()
         # Last bucket num models > 1/2 num workers
         configs = automl_hyperopt.get_configs_list(20,3)
         self.assertEquals([(1, 3), (4, 6), (7, 9), (10, 12), (13, 15), (16, 18),(19, 20)], configs)

     def test_get_configs_list_last_bucket_models_equal_half_segments(self):
         automl_hyperopt = self.subject()
         # Last bucket num models = 1/2 num workers
         configs = automl_hyperopt.get_configs_list(90,20)
         self.assertEquals([(1, 20), (21, 40), (41, 60), (61, 80),(81,90)], configs)

     def test_get_num_segments_all_segments(self):
         automl_hyperopt = self.subject()
         automl_hyperopt.source_table = 'dummy_table'
         self.plpy_mock_execute.return_value = [{'distribution_rules': 'all_segments'}]
         self.seg_num_mock.return_value = 3
         self.assertEquals(3, automl_hyperopt.get_num_segments())

     def test_get_num_segments_array_value(self):
         automl_hyperopt = self.subject()
         automl_hyperopt.source_table = 'dummy_table'
         # return list of segment ids as distribution_rules
         self.plpy_mock_execute.return_value = [{'distribution_rules': [3,1]}]
         self.assertEquals(2, automl_hyperopt.get_num_segments())

     def tearDown(self):
         self.module_patcher.stop()

 if __name__ == '__main__':
     unittest.main()

 !>)
	# coding=utf-8
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	m4_changequote(`<!', `!>')
	m4_ifdef(<!__POSTGRESQL__!>, <!print 'skipping automl for postgres'!>, <!

	import sys
	from os import path
	import math
	sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))))
	sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))

	from tensorflow import keras # still needed here even though not explicitly used. DO NOT REMOVE.
	import unittest
	from mock import *
	import plpy_mock as plpy

	class HyperbandScheduleTestCase(unittest.TestCase):
	def setUp(self):
	# The side effects of this class(writing to the output table) are not
	# tested here. They are tested in dev-check.
	self.plpy_mock = Mock(spec='error')
	patches = {
	'plpy': plpy,
	'utilities.mean_std_dev_calculator': Mock()
	}

	self.plpy_mock_execute = MagicMock()
	plpy.execute = self.plpy_mock_execute

	self.module_patcher = patch.dict('sys.modules', patches)
	self.module_patcher.start()
	import deep_learning.madlib_keras_automl_hyperband
	self.module = deep_learning.madlib_keras_automl_hyperband
	# self.module.MstLoaderInputValidator._validate_input_args = \
	# MagicMock()

	self.subject = self.module.HyperbandSchedule
	self.schedule_table = 'schedule_table'
	self.R = 81
	self.eta = 3
	self.skip_last = 0

	def test_schedule_table_dimension(self):
	generate_schedule = self.subject(
	self.schedule_table,
	self.R,
	self.eta,
	self.skip_last
	)
	s_max = int(math.floor(math.log(self.R, self.eta)))
	num_depths = int((s_max+1) * (s_max+2) / 2.0)
	self.assertEqual(num_depths, len(generate_schedule.schedule_vals))

	def test_max_skip_last(self):
	self.skip_last = int(math.floor(math.log(self.R, self.eta)))+1 # s_max+1
	with self.assertRaises(plpy.PLPYException):
	generate_schedule = self.subject(
	self.schedule_table,
	self.R,
	self.eta,
	self.skip_last
	)
	def test_negative_skip_last(self):
	self.skip_last = -3
	with self.assertRaises(plpy.PLPYException):
	generate_schedule = self.subject(
	self.schedule_table,
	self.R,
	self.eta,
	self.skip_last
	)
	def test_zero_resources(self):
	self.R = 0
	with self.assertRaises(plpy.PLPYException):
	generate_schedule = self.subject(
	self.schedule_table,
	self.R,
	self.eta,
	self.skip_last
	)
	def test_negative_resources(self):
	self.R = -3
	with self.assertRaises(plpy.PLPYException):
	generate_schedule = self.subject(
	self.schedule_table,
	self.R,
	self.eta,
	self.skip_last
	)
	def test_non_discarding_eta(self):
	self.eta = 1
	with self.assertRaises(plpy.PLPYException):
	generate_schedule = self.subject(
	self.schedule_table,
	self.R,
	self.eta,
	self.skip_last
	)
	def test_negative_eta(self):
	self.eta = -2
	with self.assertRaises(plpy.PLPYException):
	generate_schedule = self.subject(
	self.schedule_table,
	self.R,
	self.eta,
	self.skip_last
	)

	def test_different_R(self):
	self.R = 27
	generate_schedule1 = self.subject(
	self.schedule_table,
	self.R,
	self.eta,
	self.skip_last
	)
	s_max = int(math.floor(math.log(self.R, self.eta)))
	num_depths = int((s_max+1) * (s_max+2) / 2.0)
	self.assertEqual(num_depths, len(generate_schedule1.schedule_vals))

	self.R = 13
	generate_schedule2 = self.subject(
	self.schedule_table,
	self.R,
	self.eta,
	self.skip_last
	)
	s_max = int(math.floor(math.log(self.R, self.eta)))
	num_depths = int((s_max+1) * (s_max+2) / 2.0)
	self.assertEqual(num_depths, len(generate_schedule2.schedule_vals))

	self.R = 100
	generate_schedule3 = self.subject(
	self.schedule_table,
	self.R,
	self.eta,
	self.skip_last
	)
	s_max = int(math.floor(math.log(self.R, self.eta)))
	num_depths = int((s_max+1) * (s_max+2) / 2.0)
	self.assertEqual(num_depths, len(generate_schedule3.schedule_vals))

	def test_different_eta(self):
	self.eta = 4
	generate_schedule3 = self.subject(
	self.schedule_table,
	self.R,
	self.eta,
	self.skip_last
	)
	s_max = int(math.floor(math.log(self.R, self.eta)))
	num_depths = int((s_max+1) * (s_max+2) / 2.0)
	self.assertEqual(num_depths, len(generate_schedule3.schedule_vals))

	self.R = 91
	self.eta = 6
	generate_schedule3 = self.subject(
	self.schedule_table,
	self.R,
	self.eta,
	self.skip_last
	)
	s_max = int(math.floor(math.log(self.R, self.eta)))
	num_depths = int((s_max+1) * (s_max+2) / 2.0)
	self.assertEqual(num_depths, len(generate_schedule3.schedule_vals))

	def test_different_skip_last(self):
	self.skip_last = 2
	generate_schedule3 = self.subject(
	self.schedule_table,
	self.R,
	self.eta,
	self.skip_last
	)
	s_max = int(math.floor(math.log(self.R, self.eta)))
	num_depths = int((s_max+1) * (s_max+2) / 2.0)
	self.assertEqual(num_depths - (2*(s_max+1)-1), len(generate_schedule3.schedule_vals))

	self.skip_last = 3
	generate_schedule3 = self.subject(
	self.schedule_table,
	self.R,
	self.eta,
	self.skip_last
	)
	s_max = int(math.floor(math.log(self.R, self.eta)))
	num_depths = int((s_max+1) * (s_max+2) / 2.0)
	self.assertEqual(num_depths - (3*(s_max+1)-3), len(generate_schedule3.schedule_vals))

	def tearDown(self):
	self.module_patcher.stop()

	class AutoMLHyperoptTestCase(unittest.TestCase):
	def setUp(self):
	# The side effects of this class(writing to the output table) are not
	# tested here. They are tested in dev-check.
	self.plpy_mock = Mock(spec='error')
	patches = {
	'plpy': plpy,
	'utilities.mean_std_dev_calculator': Mock()
	}

	self.plpy_mock_execute = MagicMock()
	plpy.execute = self.plpy_mock_execute

	self.module_patcher = patch.dict('sys.modules', patches)
	self.module_patcher.start()
	import deep_learning.madlib_keras_automl_hyperopt
	self.module = deep_learning.madlib_keras_automl_hyperopt

	# from deep_learning.madlib_keras_automl_hyperopt import AutoMLHyperopt
	self.seg_num_mock = Mock()

	class FakeAutoMLHyperopt(self.module.AutoMLHyperopt):
	def __init__(self, *args):
	pass
	self.module.get_seg_number = self.seg_num_mock

	self.subject = FakeAutoMLHyperopt

	def test_get_configs_list_models_less_than_segments(self):
	automl_hyperopt = self.subject()
	configs = automl_hyperopt.get_configs_list(1,3)
	self.assertEquals([(1,1)], configs)

	def test_get_configs_list_models_equal_segments(self):
	automl_hyperopt = self.subject()
	configs = automl_hyperopt.get_configs_list(3,3)
	self.assertEquals([(1,3)], configs)

	def test_get_configs_list_last_bucket_models_less_than_half_segments(self):
	automl_hyperopt = self.subject()
	# Last bucket num models < 1/2 num workers
	configs = automl_hyperopt.get_configs_list(81,20)
	self.assertEquals([(1, 20), (21, 40), (41, 60), (61, 81)], configs)

	def test_get_configs_list_last_bucket_models_greater_than_half_segments(self):
	automl_hyperopt = self.subject()
	# Last bucket num models > 1/2 num workers
	configs = automl_hyperopt.get_configs_list(20,3)
	self.assertEquals([(1, 3), (4, 6), (7, 9), (10, 12), (13, 15), (16, 18),(19, 20)], configs)

	def test_get_configs_list_last_bucket_models_equal_half_segments(self):
	automl_hyperopt = self.subject()
	# Last bucket num models = 1/2 num workers
	configs = automl_hyperopt.get_configs_list(90,20)
	self.assertEquals([(1, 20), (21, 40), (41, 60), (61, 80),(81,90)], configs)

	def test_get_num_segments_all_segments(self):
	automl_hyperopt = self.subject()
	automl_hyperopt.source_table = 'dummy_table'
	self.plpy_mock_execute.return_value = [{'distribution_rules': 'all_segments'}]
	self.seg_num_mock.return_value = 3
	self.assertEquals(3, automl_hyperopt.get_num_segments())

	def test_get_num_segments_array_value(self):
	automl_hyperopt = self.subject()
	automl_hyperopt.source_table = 'dummy_table'
	# return list of segment ids as distribution_rules
	self.plpy_mock_execute.return_value = [{'distribution_rules': [3,1]}]
	self.assertEquals(2, automl_hyperopt.get_num_segments())

	def tearDown(self):
	self.module_patcher.stop()

	if __name__ == '__main__':
	unittest.main()

	!>)