| # coding=utf-8 |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| m4_changequote(`<!', `!>') |
| |
| import sys |
| import numpy as np |
| import os |
| import re |
| from os import path |
| # Add convex module to the pythonpath. |
| sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))) |
| sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) |
| |
| import unittest |
| from mock import * |
| import plpy_mock as plpy |
| |
| try: |
| from pgsanity.pgsanity import check_string as pglint |
| pglint("SELECT 1;") |
| except: |
| # Just skip this test if "pip install pgsanity" hasn't been run |
| # or isn't functioning (eg. ecpg isn't in path) |
| def pglint(q, s=False): |
| return (True, '') |
| |
| # helper for multiplying array by int |
| def mult(k,arr): |
| return [ k*a for a in arr ] |
| |
| class MadlibKerasFitEvalTransitionTestCase(unittest.TestCase): |
| def setUp(self): |
| self.plpy_mock = Mock(spec='error') |
| patches = { |
| 'plpy': plpy, |
| 'utilities.mean_std_dev_calculator': Mock() |
| } |
| |
| self.plpy_mock_execute = MagicMock() |
| plpy.execute = self.plpy_mock_execute |
| |
| self.module_patcher = patch.dict('sys.modules', patches) |
| self.module_patcher.start() |
| import madlib_keras |
| self.subject = madlib_keras |
| |
| self.model = Sequential() |
| self.model.add(Conv2D(2, kernel_size=(1, 1), activation='relu', |
| input_shape=(1,1,1,), padding='same')) |
| self.model.add(Flatten()) |
| |
| self.compile_params = "optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']" |
| self.fit_params = "batch_size=1, epochs=1" |
| self.model_weights = [3,4,5,6] |
| self.serialized_weights = np.array(self.model_weights, dtype=np.float32 |
| ).tostring() |
| self.loss = 0.5947071313858032 |
| self.accuracy = 1.0 |
| self.dist_key_mapping = [0,1,2] |
| self.accessible_gpus_for_seg = [0] |
| |
| self.independent_var_real = [[[[0.5]]]] * 10 |
| self.dependent_var_int = [[0,1]] * 10 |
| |
| # Params as bytea |
| self.independent_var = np.array(self.independent_var_real, dtype=np.float32).tobytes() |
| self.dependent_var = np.array(self.dependent_var_int, dtype=np.int16).tobytes() |
| |
| self.independent_var_shape = [10,1,1,1] |
| self.dependent_var_shape = [10,2] |
| # We test on segment 0, which has 3 buffers filled with 10 identical |
| # images each, or 30 images total |
| self.total_images_per_seg = [3*len(self.dependent_var_int),20,40] |
| |
| self.dummy_prev_weights = 'dummy weights' |
| |
| # Mock calls to tf.keras fit |
| model_class = self.subject.tf.keras.Model |
| model_class.fit = MagicMock() |
| |
| def tearDown(self): |
| self.module_patcher.stop() |
| self.subject.K.clear_session() |
| |
| def _test_fit_transition_first_buffer_pass(self, **kwargs): |
| ending_image_count = len(self.dependent_var_int) |
| |
| previous_state = np.array(self.model_weights, dtype=np.float32) |
| |
| new_state = self.subject.fit_transition( |
| None, self.dependent_var, self.independent_var, |
| self.dependent_var_shape, self.independent_var_shape, |
| self.model.to_json(), self.compile_params, self.fit_params, 0, |
| self.dist_key_mapping, 0, 4, self.total_images_per_seg, |
| self.accessible_gpus_for_seg, previous_state.tostring(), **kwargs) |
| |
| image_count = kwargs['GD']['agg_image_count'] |
| self.assertEqual(ending_image_count, image_count) |
| image_count = new_state |
| self.assertEqual(ending_image_count, image_count) |
| |
| def _test_fit_transition_multiple_model_no_cache_first_buffer_pass(self, **kwargs): |
| ending_image_count = len(self.dependent_var_int) |
| |
| new_state = self.subject.fit_transition( |
| None, self.dependent_var, self.independent_var, |
| self.dependent_var_shape, self.independent_var_shape, |
| self.model.to_json(), self.compile_params, self.fit_params, 0, |
| self.dist_key_mapping, 0, 4, self.total_images_per_seg, |
| self.accessible_gpus_for_seg, self.serialized_weights, |
| True, **kwargs) |
| |
| self.assertEqual(new_state, None, 'returned weights must be NULL for all rows but the last') |
| image_count = kwargs['GD']['agg_image_count'] |
| self.assertEqual(ending_image_count, image_count) |
| |
| def test_fit_transition_multiple_model_cache_first_buffer_pass(self): |
| ending_image_count = len(self.dependent_var_int) |
| |
| k = {'GD': {}} |
| new_state = self.subject.fit_multiple_transition_caching( |
| self.dependent_var, self.independent_var, |
| self.dependent_var_shape, self.independent_var_shape, |
| self.model.to_json(), self.compile_params, self.fit_params, 0, |
| self.dist_key_mapping, 0, 4, self.total_images_per_seg, |
| self.accessible_gpus_for_seg, self.serialized_weights, True, **k) |
| |
| self.assertEqual(new_state, None, 'returned weights must be NULL for all rows but the last') |
| image_count = k['GD']['agg_image_count'] |
| self.assertEqual(ending_image_count, image_count) |
| self.assertTrue('sess' not in k['GD']) |
| self.assertTrue('segment_model' not in k['GD']) |
| self.assertTrue(k['GD']['x_train']) |
| self.assertTrue(k['GD']['y_train']) |
| |
| def _test_fit_transition_middle_buffer_pass(self, **kwargs): |
| |
| starting_image_count = len(self.dependent_var_int) |
| ending_image_count = starting_image_count + len(self.dependent_var_int) |
| |
| state = starting_image_count |
| new_state = self.subject.fit_transition( |
| state, self.dependent_var, self.independent_var, |
| self.dependent_var_shape, self.independent_var_shape, |
| self.model.to_json(), None, self.fit_params, 0, |
| self.dist_key_mapping, 0, 4, self.total_images_per_seg, |
| self.accessible_gpus_for_seg, self.dummy_prev_weights, **kwargs) |
| |
| image_count = new_state |
| self.assertEqual(ending_image_count, image_count) |
| |
| def _test_fit_transition_multiple_model_no_cache_middle_buffer_pass(self, |
| **kwargs): |
| starting_image_count = len(self.dependent_var_int) |
| ending_image_count = starting_image_count + len(self.dependent_var_int) |
| |
| kwargs['GD']['agg_image_count'] = starting_image_count |
| |
| new_state = self.subject.fit_transition( |
| None, self.dependent_var, self.independent_var, |
| self.dependent_var_shape, self.independent_var_shape, |
| self.model.to_json(), None, self.fit_params, 0, |
| self.dist_key_mapping, 0, 4, self.total_images_per_seg, |
| self.accessible_gpus_for_seg, self.dummy_prev_weights, True, True, |
| **kwargs) |
| |
| self.assertEqual(new_state, None, 'returned weights must be NULL for all rows but the last') |
| image_count = kwargs['GD']['agg_image_count'] |
| self.assertEqual(ending_image_count, image_count) |
| |
| def test_fit_transition_multiple_model_cache_middle_buffer_pass(self): |
| starting_image_count = len(self.dependent_var_int) |
| ending_image_count = starting_image_count + len(self.dependent_var_int) |
| |
| x_train = list() |
| y_train = list() |
| x_train.append(self.subject.np_array_float32(self.independent_var, self.independent_var_shape)) |
| y_train.append(self.subject.np_array_int16(self.dependent_var, self.dependent_var_shape)) |
| |
| k = {'GD': {'x_train': x_train, 'y_train': y_train, |
| 'agg_image_count' : starting_image_count |
| } |
| } |
| |
| new_state = self.subject.fit_multiple_transition_caching( |
| self.dependent_var, self.independent_var, |
| self.dependent_var_shape, self.independent_var_shape, |
| self.model.to_json(), self.compile_params, self.fit_params, 0, |
| |
| self.dist_key_mapping, 0, 4, self.total_images_per_seg, |
| self.accessible_gpus_for_seg, self.serialized_weights, True, **k) |
| |
| self.assertEqual(new_state, None, 'returned weights must be NULL for all rows but the last') |
| image_count = k['GD']['agg_image_count'] |
| self.assertEqual(ending_image_count, image_count) |
| self.assertTrue('sess' not in k['GD']) |
| self.assertTrue('segment_model' not in k['GD']) |
| self.assertTrue(k['GD']['x_train']) |
| self.assertTrue(k['GD']['y_train']) |
| |
| def _test_fit_transition_last_buffer_pass(self, **kwargs): |
| starting_image_count = 2*len(self.dependent_var_int) |
| ending_image_count = starting_image_count + len(self.dependent_var_int) |
| |
| kwargs['GD']['agg_image_count'] = starting_image_count |
| |
| state = starting_image_count |
| previous_state = np.array(self.model_weights, dtype=np.float32) |
| new_state = self.subject.fit_transition( |
| state, self.dependent_var, self.independent_var, |
| self.dependent_var_shape, self.independent_var_shape, |
| self.model.to_json(), None, self.fit_params, 0, |
| self.dist_key_mapping, 0, 4, self.total_images_per_seg, |
| self.accessible_gpus_for_seg, previous_state.tostring(), |
| **kwargs) |
| |
| state = np.fromstring(new_state, dtype=np.float32) |
| image_count = state[0] |
| # We need to assert that the weights should be multiplied by final image count. |
| weights = state[1:] |
| multiplied_weights = mult(self.total_images_per_seg[0], self.model_weights) |
| self.assertTrue((weights == multiplied_weights).all()) |
| self.assertEqual(ending_image_count, image_count) |
| |
| def _test_internal_keras_eval_transition_first_buffer(self, |
| last_iteration = False, |
| **kwargs): |
| ending_image_count = len(self.dependent_var_int) |
| |
| state = [0,0,0] |
| new_state = self.subject.internal_keras_eval_transition( |
| state, self.dependent_var , self.independent_var, |
| self.dependent_var_shape, self.independent_var_shape, |
| self.model.to_json(), |
| self.serialized_weights, self.compile_params, 0, |
| self.dist_key_mapping, 0, 4, |
| self.total_images_per_seg, self.accessible_gpus_for_seg, |
| last_iteration, None, **kwargs) |
| |
| agg_loss, agg_accuracy, image_count = new_state |
| |
| self.assertEqual(ending_image_count, image_count) |
| # loss and accuracy should be unchanged |
| self.assertAlmostEqual(self.loss * image_count, agg_loss, 4) |
| self.assertAlmostEqual(self.accuracy * image_count, agg_accuracy, 4) |
| |
| def _test_internal_keras_eval_transition_last_buffer(self, |
| last_iteration = False, |
| **kwargs): |
| starting_image_count = 2*len(self.dependent_var_int) |
| ending_image_count = starting_image_count + len(self.dependent_var_int) |
| |
| state = [self.loss * starting_image_count, |
| self.accuracy * starting_image_count, |
| starting_image_count] |
| |
| new_state = self.subject.internal_keras_eval_transition( |
| state, self.dependent_var , self.independent_var, |
| self.dependent_var_shape, self.independent_var_shape, |
| self.model.to_json(), |
| 'dummy_model_weights', None, 0, |
| self.dist_key_mapping, 0, 4, |
| self.total_images_per_seg, self.accessible_gpus_for_seg, |
| last_iteration, **kwargs) |
| agg_loss, agg_accuracy, image_count = new_state |
| |
| self.assertEqual(ending_image_count, image_count) |
| # loss and accuracy should be unchanged |
| self.assertAlmostEqual(self.loss * ending_image_count, agg_loss, 4) |
| self.assertAlmostEqual(self.accuracy * ending_image_count, agg_accuracy, 4) |
| |
| def _test_internal_keras_eval_transition_middle_buffer(self, |
| last_iteration = False, |
| **kwargs): |
| starting_image_count = len(self.dependent_var_int) |
| ending_image_count = starting_image_count + len(self.dependent_var_int) |
| |
| state = [self.loss * starting_image_count, |
| self.accuracy * starting_image_count, starting_image_count] |
| |
| new_state = self.subject.internal_keras_eval_transition( |
| state, self.dependent_var , self.independent_var, |
| self.dependent_var_shape, self.independent_var_shape, |
| self.model.to_json(), |
| 'dummy_model_weights', None, 0, |
| self.dist_key_mapping, 0, 4, |
| self.total_images_per_seg, self.accessible_gpus_for_seg, |
| last_iteration, **kwargs) |
| |
| agg_loss, agg_accuracy, image_count = new_state |
| |
| self.assertEqual(ending_image_count, image_count) |
| self.assertAlmostEqual(self.loss * ending_image_count, agg_loss, 4) |
| self.assertAlmostEqual(self.accuracy * ending_image_count, agg_accuracy, 4) |
| |
| def _test_fit_transition_multiple_model_no_cache_last_buffer_pass(self, |
| **kwargs): |
| starting_image_count = 2*len(self.dependent_var_int) |
| |
| new_state = self.subject.fit_transition( |
| None, self.dependent_var, self.independent_var, |
| self.dependent_var_shape, self.independent_var_shape, |
| self.model.to_json(), None, self.fit_params, 0, |
| self.dist_key_mapping, 0, 4, self.total_images_per_seg, |
| self.accessible_gpus_for_seg, self.dummy_prev_weights, |
| True, **kwargs) |
| |
| state = np.fromstring(new_state, dtype=np.float32) |
| |
| ## image count should not be added to the final state of |
| # fit multiple |
| self.assertEqual(len(self.model_weights), len(state)) |
| |
| def test_fit_transition_multiple_model_cache_last_buffer_pass(self): |
| starting_image_count = 2*len(self.dependent_var_int) |
| ending_image_count = starting_image_count + len(self.dependent_var_int) |
| |
| x_train = list() |
| y_train = list() |
| x_train.append(self.subject.np_array_float32(self.independent_var, self.independent_var_shape)) |
| y_train.append(self.subject.np_array_int16(self.dependent_var, self.dependent_var_shape)) |
| x_train.append(self.subject.np_array_float32(self.independent_var, self.independent_var_shape)) |
| y_train.append(self.subject.np_array_int16(self.dependent_var, self.dependent_var_shape)) |
| |
| k = {'GD': {'x_train': x_train, 'y_train': y_train}} |
| |
| state = starting_image_count |
| graph1 = self.subject.tf.get_default_graph() |
| |
| k['GD']['agg_image_count'] = starting_image_count |
| |
| new_state = self.subject.fit_multiple_transition_caching( |
| self.dependent_var, self.independent_var, |
| self.dependent_var_shape, self.independent_var_shape, |
| self.model.to_json(), self.compile_params, self.fit_params, 0, |
| self.dist_key_mapping, 0, 4, self.total_images_per_seg, |
| self.accessible_gpus_for_seg, self.serialized_weights, False, **k) |
| graph2 = self.subject.tf.get_default_graph() |
| self.assertNotEquals(graph1, graph2) |
| state = np.fromstring(new_state, dtype=np.float32) |
| |
| ## image count should not be added to the final state of |
| # fit multiple |
| self.assertEqual(len(self.model_weights), len(state)) |
| |
| self.assertTrue('sess' not in k['GD']) |
| self.assertTrue('segment_model' not in k['GD']) |
| self.assertTrue(k['GD']['x_train']) |
| self.assertTrue(k['GD']['y_train']) |
| |
| # TODO: test is_final_training_call = True |
| |
| def test_fit_transition_multiple_model_cache_filled_pass(self): |
| starting_image_count = 2*len(self.dependent_var_int) |
| ending_image_count = starting_image_count + len(self.dependent_var_int) |
| |
| x_train = list() |
| y_train = list() |
| x_train.append(self.subject.np_array_float32(self.independent_var, self.independent_var_shape)) |
| y_train.append(self.subject.np_array_int16(self.dependent_var, self.dependent_var_shape)) |
| x_train.append(self.subject.np_array_float32(self.independent_var, self.independent_var_shape)) |
| y_train.append(self.subject.np_array_int16(self.dependent_var, self.dependent_var_shape)) |
| x_train.append(self.subject.np_array_float32(self.independent_var, self.independent_var_shape)) |
| y_train.append(self.subject.np_array_int16(self.dependent_var, self.dependent_var_shape)) |
| |
| self.subject.compile_and_set_weights(self.model, self.compile_params, |
| '/cpu:0', self.serialized_weights) |
| s1 = self.subject.K.get_session() |
| k = {'GD': {'x_train': x_train, 'y_train': y_train, |
| 'sess': s1, 'segment_model': self.model}} |
| graph1 = self.subject.tf.get_default_graph() |
| new_state = self.subject.fit_multiple_transition_caching( |
| None, None, |
| None, None, |
| self.model.to_json(), self.compile_params, self.fit_params, 0, |
| self.dist_key_mapping, 0, 4, self.total_images_per_seg, |
| self.accessible_gpus_for_seg, self.serialized_weights, False, **k) |
| graph2 = self.subject.tf.get_default_graph() |
| self.assertNotEquals(graph1, graph2) |
| weights = np.fromstring(new_state, dtype=np.float32) |
| |
| ## image count should not be added to the final state of |
| # fit multiple |
| self.assertEqual(len(self.model_weights), len(weights)) |
| |
| self.assertTrue('sess' not in k['GD']) |
| self.assertTrue('segment_model' not in k['GD']) |
| self.assertTrue(k['GD']['x_train']) |
| self.assertTrue(k['GD']['y_train']) |
| |
| def test_fit_transition_multiple_model_cache_filled_final_training_pass(self): |
| starting_image_count = 2*len(self.dependent_var_int) |
| ending_image_count = starting_image_count + len(self.dependent_var_int) |
| |
| x_train = list() |
| y_train = list() |
| x_train.append(self.subject.np_array_float32(self.independent_var, self.independent_var_shape)) |
| y_train.append(self.subject.np_array_int16(self.dependent_var, self.dependent_var_shape)) |
| x_train.append(self.subject.np_array_float32(self.independent_var, self.independent_var_shape)) |
| y_train.append(self.subject.np_array_int16(self.dependent_var, self.dependent_var_shape)) |
| x_train.append(self.subject.np_array_float32(self.independent_var, self.independent_var_shape)) |
| y_train.append(self.subject.np_array_int16(self.dependent_var, self.dependent_var_shape)) |
| |
| k = {'GD': {'x_train': x_train, 'y_train': y_train }} |
| graph1 = self.subject.tf.get_default_graph() |
| new_state = self.subject.fit_multiple_transition_caching( |
| None, None, |
| None, None, |
| self.model.to_json(), self.compile_params, self.fit_params, 0, |
| self.dist_key_mapping, 0, 4, self.total_images_per_seg, |
| self.accessible_gpus_for_seg, self.serialized_weights, True, **k) |
| graph2 = self.subject.tf.get_default_graph() |
| self.assertNotEquals(graph1, graph2) |
| |
| weights = np.fromstring(new_state, dtype=np.float32) |
| |
| ## image count should not be added to the final state of |
| # fit multiple |
| self.assertEqual(len(self.model_weights), len(weights)) |
| |
| self.assertTrue('sess' not in k['GD']) |
| self.assertTrue('segment_model' not in k['GD']) |
| self.assertTrue('x_train' not in k['GD']) |
| self.assertTrue('y_train' not in k['GD']) |
| |
| ############### GRAPH AND SESSION TESTS ################################ |
| def test_fit_eval_2_iterations_mcf_null_gpdb(self): |
| kwargs = {'GD': {}} |
| GD = kwargs['GD'] |
| |
| ######################### fit for 2 iterations ########## |
| # iteration 1 |
| first_iter_keras_sess = self._run_fit_iteration(**kwargs) |
| self._assert_keras_session_same_as_gd_session(GD) |
| |
| first_iter_tf_graph = self.subject.tf.get_default_graph() |
| |
| # iteration 2 (last iteration) |
| last_iter_keras_sess = self._run_fit_iteration(**kwargs) |
| self._assert_keras_session_same_as_gd_session(GD) |
| |
| last_iter_tf_graph = self.subject.tf.get_default_graph() |
| |
| self.assertEquals(first_iter_keras_sess, last_iter_keras_sess) |
| self.assertEquals(first_iter_tf_graph, last_iter_tf_graph) |
| |
| ###################### eval transition for last iteration ########### |
| self._run_eval_iteration(True, last_iter_keras_sess, last_iter_tf_graph, **kwargs) |
| eval_last_iter_keras_sess = self.subject.K.get_session() |
| eval_last_iter_tf_graph = self.subject.tf.get_default_graph() |
| |
| self.assertNotEquals(eval_last_iter_keras_sess, last_iter_keras_sess) |
| self.assertNotEquals(eval_last_iter_tf_graph, last_iter_tf_graph) |
| self._assert_gd_cleared(GD) |
| |
| def test_fit_eval_2_iterations_mcf_1_gpdb(self): |
| kwargs = {'GD': {}} |
| GD = kwargs['GD'] |
| |
| ######################### fit + eval for 2 iterations ########## |
| # iteration 1 fit |
| first_iter_keras_sess = self._run_fit_iteration(**kwargs) |
| self._assert_keras_session_same_as_gd_session(GD) |
| |
| first_iter_tf_graph = self.subject.tf.get_default_graph() |
| |
| # iteration 1 eval |
| self._run_eval_iteration(False, first_iter_keras_sess, first_iter_tf_graph, **kwargs) |
| self._assert_keras_session_same_as_gd_session(GD) |
| |
| eval_first_iter_keras_sess = self.subject.K.get_session() |
| eval_first_iter_tf_graph = self.subject.tf.get_default_graph() |
| |
| self.assertEquals(eval_first_iter_keras_sess, first_iter_keras_sess) |
| self.assertEquals(eval_first_iter_tf_graph, first_iter_tf_graph) |
| |
| # iteration 2 fit (last iteration) |
| last_iter_keras_sess = self._run_fit_iteration(**kwargs) |
| self._assert_keras_session_same_as_gd_session(GD) |
| |
| last_iter_tf_graph = self.subject.tf.get_default_graph() |
| |
| self.assertEquals(first_iter_keras_sess, last_iter_keras_sess) |
| self.assertEquals(first_iter_tf_graph, last_iter_tf_graph) |
| |
| # iteration 2 eval (last iteration) |
| # eval and fit use the same session & graph for all iterations. |
| # After the last call to eval(last iteration), we want to assert |
| # the session was closed and the graph cleared out. |
| # To assert this we call get_session() and get_default_graph() |
| # that will give a new session and a new graph and assert its not |
| # equal to the prev iteration session and graph. |
| self._run_eval_iteration(True, last_iter_keras_sess, last_iter_tf_graph, **kwargs) |
| |
| eval_last_iter_keras_sess = self.subject.K.get_session() |
| eval_last_iter_tf_graph = self.subject.tf.get_default_graph() |
| |
| self.assertNotEquals(eval_last_iter_keras_sess, last_iter_keras_sess) |
| self.assertNotEquals(eval_last_iter_tf_graph, last_iter_tf_graph) |
| self._assert_gd_cleared(GD) |
| |
| def test_fit_multiple_2_iterations(self): |
| kwargs = {'GD': {}} |
| GD = kwargs['GD'] |
| |
| ############ fit multiple for 2 iterations ########## |
| # iteration 1 |
| # first_iter_tf_graph is used to assert that calling fit_multiple closes the tf session |
| # and graph at the last buffer. |
| # It is fetched prior to calling the fit_transition(from fit_multiple) as when we create |
| # a session inside fit_transition, instead of creating a new graph it will use first_iter_tf_graph. |
| # This enables us to do the not equals assert. |
| first_iter_tf_graph = self.subject.tf.get_default_graph() |
| first_iter_keras_sess = self._run_fit_multiple_iteration(**kwargs) |
| self._assert_gd_cleared(GD) |
| |
| # iteration 2 (last iteration) |
| last_iter_tf_graph = self.subject.tf.get_default_graph() |
| last_iter_keras_sess = self._run_fit_multiple_iteration(**kwargs) |
| self._assert_gd_cleared(GD) |
| |
| self.assertNotEquals(first_iter_keras_sess, last_iter_keras_sess) |
| self.assertNotEquals(first_iter_tf_graph, last_iter_tf_graph) |
| |
| def test_eval_multiple_any_iteration(self): |
| # This test tests 2 things: |
| # 1. Calling eval_transition from fit_multiple |
| # 2. Calling eval_transition from evaluate directly |
| kwargs = {'GD': {}} |
| GD = kwargs['GD'] |
| |
| # eval_iter_tf_graph1 is used to assert that calling eval clears the tf session and graph |
| # It is fetched prior to calling the eval_transition as when we create a session inside |
| # eval_transition, instead of creating a new graph it will use eval_iter_tf_graph1. |
| # This enables us to do the not equals assert. |
| eval_iter_tf_graph1 = self.subject.tf.get_default_graph() |
| eval_iter_keras_sess1 = self._run_eval_iteration(True, None, None, True, **kwargs) |
| eval_iter_keras_sess2 = self.subject.K.get_session() |
| eval_iter_tf_graph2 = self.subject.tf.get_default_graph() |
| |
| self.assertNotEquals(eval_iter_keras_sess1, eval_iter_keras_sess2) |
| self.assertNotEquals(eval_iter_tf_graph1, eval_iter_tf_graph2) |
| self._assert_gd_cleared(GD) |
| |
| def _run_eval_iteration(self, final_iteration, prev_keras_sess, prev_tf_graph, called_from_fit_multiple=False, **kwargs): |
| self._test_internal_keras_eval_transition_first_buffer(final_iteration, |
| **kwargs) |
| self._assert_gd_is_valid(kwargs['GD']) |
| self._assert_keras_session_same_as_gd_session(kwargs['GD']) |
| |
| eval_first_buffer_keras_sess = kwargs['GD']['sess'] |
| self.assertFalse(eval_first_buffer_keras_sess._closed) |
| eval_first_buffer_tf_graph = self.subject.tf.get_default_graph() |
| |
| if not called_from_fit_multiple: |
| self.assertEquals(eval_first_buffer_keras_sess, prev_keras_sess) |
| self.assertEquals(eval_first_buffer_tf_graph, prev_tf_graph) |
| |
| self._test_internal_keras_eval_transition_middle_buffer(final_iteration, |
| **kwargs ) |
| self._assert_gd_is_valid(kwargs['GD']) |
| self._assert_keras_session_same_as_gd_session(kwargs['GD']) |
| self.assertFalse(eval_first_buffer_keras_sess._closed) |
| |
| self._test_internal_keras_eval_transition_last_buffer(final_iteration, |
| **kwargs) |
| if final_iteration: |
| self._assert_gd_cleared(kwargs['GD']) |
| self.assertTrue(eval_first_buffer_keras_sess._closed) |
| else: |
| self._assert_gd_is_valid(kwargs['GD']) |
| self.assertFalse(eval_first_buffer_keras_sess._closed) |
| return eval_first_buffer_keras_sess |
| |
| def _run_fit_iteration(self, **kwargs): |
| self._test_fit_transition_first_buffer_pass(**kwargs) |
| gd_first_buffer = kwargs['GD'] |
| self._assert_gd_is_valid(gd_first_buffer) |
| iter_sess = gd_first_buffer['sess'] |
| self.assertFalse(iter_sess._closed) |
| self._assert_keras_session_same_as_gd_session(gd_first_buffer) |
| |
| self._test_fit_transition_middle_buffer_pass(**kwargs) |
| gd_middle_buffer = kwargs['GD'] |
| self._assert_gd_is_valid(gd_middle_buffer) |
| self.assertFalse(iter_sess._closed) |
| |
| self._test_fit_transition_last_buffer_pass(**kwargs) |
| gd_last_buffer = kwargs['GD'] |
| self._assert_gd_is_valid(gd_last_buffer) |
| self.assertFalse(iter_sess._closed) |
| return iter_sess |
| |
| def _run_fit_multiple_iteration(self, **kwargs): |
| self._test_fit_transition_multiple_model_no_cache_first_buffer_pass(**kwargs) |
| self._assert_gd_is_valid(kwargs['GD']) |
| self._assert_keras_session_same_as_gd_session(kwargs['GD']) |
| iter_sess = kwargs['GD']['sess'] |
| self.assertFalse(iter_sess._closed) |
| |
| self._test_fit_transition_multiple_model_no_cache_middle_buffer_pass(**kwargs) |
| self._assert_gd_is_valid(kwargs['GD']) |
| self._assert_keras_session_same_as_gd_session(kwargs['GD']) |
| self.assertFalse(iter_sess._closed) |
| |
| self._test_fit_transition_multiple_model_no_cache_last_buffer_pass(**kwargs) |
| self._assert_gd_cleared(kwargs['GD']) |
| self.assertTrue(iter_sess._closed) |
| return iter_sess |
| |
| def _init_GD(self, gd): |
| self.subject.compile_and_set_weights(self.model, self.compile_params, |
| '/cpu:0', self.serialized_weights) |
| gd = {'segment_model': self.model, |
| 'sess': Mock(), |
| 'agg_image_count' : starting_image_count |
| } |
| |
| def _assert_keras_session_same_as_gd_session(self, gd): |
| sess = self.subject.K.get_session() |
| self.assertEquals(sess, gd['sess']) |
| |
| def _assert_gd_cleared(self, gd): |
| self.assertEquals(0, len(gd.keys())) |
| |
| def _assert_gd_is_valid(self, gd): |
| self.assertTrue(gd['sess']) |
| self.assertTrue(gd['segment_model']) |
| |
| ################################################################ |
| |
| def test_fit_merge(self): |
| image_count = self.total_images_per_seg[0] |
| state1 = [image_count] |
| state1.extend(mult(3,self.model_weights)) |
| state1 = np.array(state1, dtype=np.float32) |
| state2 = [image_count+30] |
| state2.extend(mult(2,self.model_weights)) |
| state2 = np.array(state2, dtype=np.float32) |
| merged_state = self.subject.fit_merge(state1.tostring(),state2.tostring()) |
| state = np.fromstring(merged_state, dtype=np.float32) |
| image_count_total = state[0] |
| weights = np.rint(state[1:]).astype(np.int) |
| |
| self.assertEqual( 2*image_count+30 , image_count_total ) |
| self.assertTrue( (mult(5,self.model_weights) == weights).all()) |
| |
| def test_fit_merge_none_first(self): |
| image_count = self.total_images_per_seg[0] |
| input_state = [image_count] |
| input_state.extend(self.model_weights) |
| input_state = np.array(input_state, dtype=np.float32) |
| merged_state = self.subject.fit_merge(None, input_state.tostring()) |
| state = np.fromstring(merged_state, dtype=np.float32) |
| image_count_total = state[0] |
| weights = np.rint(state[1:]).astype(np.int) |
| |
| self.assertEqual(image_count, image_count_total) |
| self.assertTrue((self.model_weights == weights).all()) |
| |
| def test_fit_merge_none_second(self): |
| image_count = self.total_images_per_seg[0] |
| input_state = [image_count] |
| input_state.extend(self.model_weights) |
| input_state = np.array(input_state, dtype=np.float32) |
| merged_state = self.subject.fit_merge(input_state.tostring(), None) |
| state = np.fromstring(merged_state, dtype=np.float32) |
| image_count_total = state[0] |
| weights = np.rint(state[1:]).astype(np.int) |
| |
| self.assertEqual(image_count, image_count_total) |
| self.assertTrue((self.model_weights == weights).all()) |
| |
| def test_fit_merge_both_none(self): |
| result = self.subject.fit_merge(None,None) |
| self.assertEqual(None, result) |
| |
| def test_fit_final(self): |
| image_count = self.total_images_per_seg[0] |
| input_state = [image_count] |
| input_state.extend(mult(image_count,self.model_weights)) |
| input_state = np.array(input_state, dtype=np.float32) |
| |
| output_state = self.subject.fit_final(input_state.tostring()) |
| output_state = np.fromstring(output_state, dtype=np.float32) |
| weights = np.rint(output_state).astype(np.int) |
| |
| self.assertTrue((self.model_weights == weights).all()) |
| |
| def test_fit_final_none(self): |
| result = self.subject.fit_final(None) |
| self.assertEqual(result, None) |
| |
| def test_fit_final_image_count_zero(self): |
| input_state = [0] |
| input_state.extend(self.model_weights) |
| input_state = np.array(input_state, dtype=np.float32) |
| |
| with self.assertRaises(plpy.PLPYException): |
| result = self.subject.fit_final(input_state.tostring()) |
| |
| def test_should_compute_metrics_this_iter_last_iter_case1(self): |
| res = self.subject.should_compute_metrics_this_iter(5, 5, 5) |
| self.assertEqual(True, res) |
| def test_should_compute_metrics_this_iter_last_iter_case2(self): |
| res = self.subject.should_compute_metrics_this_iter(5, 3, 5) |
| self.assertEqual(True, res) |
| def test_should_compute_metrics_this_iter_before_interval(self): |
| res = self.subject.should_compute_metrics_this_iter(1, 3, 5) |
| self.assertEqual(False, res) |
| def test_should_compute_metrics_this_iter_after_interval(self): |
| res = self.subject.should_compute_metrics_this_iter(4, 3, 5) |
| self.assertEqual(False, res) |
| def test_should_compute_metrics_this_iter_at_interval(self): |
| res = self.subject.should_compute_metrics_this_iter(3, 3, 5) |
| self.assertEqual(True, res) |
| def test_should_compute_metrics_this_iter_every_iter(self): |
| res = self.subject.should_compute_metrics_this_iter(2, 1, 5) |
| self.assertEqual(True, res) |
| |
| class InternalKerasPredictTestCase(unittest.TestCase): |
| def setUp(self): |
| self.plpy_mock = Mock(spec='error') |
| patches = { |
| 'plpy': plpy, |
| 'utilities.mean_std_dev_calculator': Mock() |
| } |
| |
| self.plpy_mock_execute = MagicMock() |
| plpy.execute = self.plpy_mock_execute |
| |
| self.module_patcher = patch.dict('sys.modules', patches) |
| self.module_patcher.start() |
| import madlib_keras_predict |
| self.subject = madlib_keras_predict |
| |
| self.model = Sequential() |
| self.model.add(Conv2D(2, kernel_size=(1, 1), activation='relu', |
| input_shape=(1,1,1,), padding='same')) |
| self.model.add(Flatten()) |
| |
| self.all_seg_ids = [0,1,2] |
| |
| self.independent_var = [[[240]]] |
| self.total_images_per_seg = [3,3,4] |
| |
| def tearDown(self): |
| self.module_patcher.stop() |
| |
| def test_predict_first_image_pass_gpdb(self): |
| self.subject.is_platform_pg = Mock(return_value = False) |
| model_weights = [1, 2, 3, 4] |
| serialized_weights = np.array(model_weights, dtype=np.float32).tostring() |
| |
| k = {'SD': {}} |
| result = self.subject.internal_keras_predict( |
| self.independent_var, self.model.to_json(), |
| serialized_weights, 255, 0, self.all_seg_ids, |
| self.total_images_per_seg, 0, 4, **k) |
| self.assertEqual(2, len(result)) |
| self.assertEqual(1, k['SD']['row_count']) |
| self.assertEqual(True, 'segment_model_predict' in k['SD']) |
| |
| def test_predict_middle_image_pass_gpdb(self): |
| self.subject.is_platform_pg = Mock(return_value = False) |
| |
| k = {'SD': { 'row_count': 1}} |
| k['SD']['segment_model_predict'] = self.model |
| result = self.subject.internal_keras_predict( |
| self.independent_var, None, None, 255, 0, |
| self.all_seg_ids, self.total_images_per_seg, 0, 4, **k) |
| self.assertEqual(2, len(result)) |
| self.assertEqual(2, k['SD']['row_count']) |
| self.assertEqual(True, 'segment_model_predict' in k['SD']) |
| |
| def test_predict_last_image_pass_gpdb(self): |
| self.subject.is_platform_pg = Mock(return_value = False) |
| self.model.add(Dense(3)) |
| |
| k = {'SD': { 'row_count': 2}} |
| k['SD']['segment_model_predict'] = self.model |
| result = self.subject.internal_keras_predict( |
| self.independent_var, None, None, 255, 0, |
| self.all_seg_ids, self.total_images_per_seg, 0, 4, **k) |
| |
| # we except len(result) to be 3 because we have 3 dense layers in the |
| # architecture |
| self.assertEqual(3, len(result)) |
| self.assertEqual(False, 'row_count' in k['SD']) |
| self.assertEqual(False, 'segment_model_predict' in k['SD']) |
| |
| def test_predict_error_should_clear_sd(self): |
| self.subject.is_platform_pg = Mock(return_value = False) |
| # self.model.add(Dense(3)) |
| model_weights = [1, 2, 3, 4] |
| serialized_weights = np.array(model_weights, dtype=np.float32).tostring() |
| |
| # inject error by passing current_seg_id as -1 |
| current_seg_id = -1 |
| k = {'SD':{}} |
| with self.assertRaises(plpy.PLPYException) as error: |
| self.subject.internal_keras_predict( |
| self.independent_var, self.model.to_json(), serialized_weights, |
| 255, current_seg_id, self.all_seg_ids, |
| self.total_images_per_seg, 0, 4, **k) |
| self.assertEqual("ValueError('-1 is not in list',)", str(error.exception)) |
| self.assertEqual(False, 'row_count' in k['SD']) |
| self.assertEqual(False, 'segment_model_predict' in k['SD']) |
| |
| |
| class MadlibKerasPredictBYOMTestCase(unittest.TestCase): |
| def setUp(self): |
| self.plpy_mock = Mock(spec='error') |
| patches = { |
| 'plpy': plpy, |
| 'utilities.mean_std_dev_calculator': Mock() |
| } |
| |
| self.plpy_mock_execute = MagicMock() |
| plpy.execute = self.plpy_mock_execute |
| |
| self.module_patcher = patch.dict('sys.modules', patches) |
| self.module_patcher.start() |
| self.num_classes = 5 |
| self.model = Sequential() |
| self.model.add(Conv2D(2, kernel_size=(1, 1), activation='relu', |
| input_shape=(1,1,1,), padding='same')) |
| self.model.add(Dense(self.num_classes)) |
| |
| self.pred_type = 'prob' |
| self.use_gpus = False |
| self.class_values = ['foo', 'bar', 'baaz', 'foo2', 'bar2'] |
| self.normalizing_const = 255.0 |
| |
| import madlib_keras_predict |
| self.module = madlib_keras_predict |
| self.module.get_model_arch_weights = Mock(return_value=( |
| self.model.to_json(), 'weights')) |
| self.module.InputValidator.validate_predict_byom_tables = Mock() |
| self.module.InputValidator.validate_input_shape = Mock() |
| self.module.BasePredict.call_internal_keras = Mock() |
| |
| def tearDown(self): |
| self.module_patcher.stop() |
| |
| def test_predictbyom_defaults_1(self): |
| self.module.get_accessible_gpus_for_seg = Mock(return_value = [2,2,2]) |
| res = self.module.PredictBYOM('schema_madlib', 'model_arch_table', |
| 'model_id', 'test_table', 'id_col', |
| 'independent_varname', 'output_table', None, |
| True, None, None) |
| self.assertEqual('prob', res.pred_type) |
| self.assertEqual(2, res.gpus_per_host) |
| self.assertEqual([0,1,2,3,4], res.class_values) |
| self.assertEqual(1.0, res.normalizing_const) |
| |
| def test_predictbyom_defaults_2(self): |
| res = self.module.PredictBYOM('schema_madlib', 'model_arch_table', |
| 'model_id', 'test_table', 'id_col', |
| 'independent_varname', 'output_table', |
| self.pred_type, self.use_gpus, |
| self.class_values, self.normalizing_const) |
| self.assertEqual('prob', res.pred_type) |
| self.assertEqual(0, res.gpus_per_host) |
| self.assertEqual(['foo', 'bar', 'baaz', 'foo2', 'bar2'], res.class_values) |
| self.assertEqual(255.0, res.normalizing_const) |
| |
| def test_predictbyom_exception_invalid_params(self): |
| with self.assertRaises(plpy.PLPYException) as error: |
| self.module.PredictBYOM('schema_madlib', 'model_arch_table', |
| 'model_id', 'test_table', 'id_col', |
| 'independent_varname', 'output_table', |
| 'invalid_pred_type', self.use_gpus, |
| self.class_values, self.normalizing_const) |
| self.assertIn('invalid_pred_type', str(error.exception)) |
| |
| with self.assertRaises(plpy.PLPYException) as error: |
| self.module.PredictBYOM('schema_madlib', 'model_arch_table', |
| 'model_id', 'test_table', 'id_col', |
| 'independent_varname', 'output_table', |
| self.pred_type, self.use_gpus, |
| ["foo", "bar", "baaz"], self.normalizing_const) |
| self.assertIn('class values', str(error.exception).lower()) |
| |
| with self.assertRaises(plpy.PLPYException) as error: |
| self.module.PredictBYOM('schema_madlib', 'model_arch_table', |
| 'model_id', 'test_table', 'id_col', |
| 'independent_varname', 'output_table', |
| self.pred_type, self.use_gpus, |
| self.class_values, 0) |
| self.assertIn('normalizing const', str(error.exception).lower()) |
| |
| |
| class MadlibKerasWrapperTestCase(unittest.TestCase): |
| def setUp(self): |
| self.plpy_mock = Mock(spec='error') |
| patches = { |
| 'plpy': plpy, |
| 'utilities.mean_std_dev_calculator': Mock() |
| } |
| |
| self.plpy_mock_execute = MagicMock() |
| plpy.execute = self.plpy_mock_execute |
| |
| self.module_patcher = patch.dict('sys.modules', patches) |
| self.module_patcher.start() |
| |
| import madlib_keras_wrapper |
| self.subject = madlib_keras_wrapper |
| |
| self.use_gpus = False |
| |
| def tearDown(self): |
| self.module_patcher.stop() |
| |
| def test_get_gpu_memory_fraction(self): |
| |
| gpus_per_host = 4 |
| segments_per_host = 4 |
| result = self.subject.get_gpu_memory_fraction(gpus_per_host, segments_per_host) |
| self.assertEqual(result, 0.9) |
| |
| gpus_per_host = 10 |
| segments_per_host = 4 |
| result = self.subject.get_gpu_memory_fraction(gpus_per_host, segments_per_host) |
| self.assertEqual(result, 0.9) |
| |
| gpus_per_host = 2 |
| segments_per_host = 6 |
| result = self.subject.get_gpu_memory_fraction(gpus_per_host, segments_per_host) |
| self.assertEqual(result, 0.3) |
| |
| gpus_per_host = 1 |
| segments_per_host = 4 |
| result = self.subject.get_gpu_memory_fraction(gpus_per_host, segments_per_host) |
| self.assertEqual(result, 0.225) |
| |
| def test_get_device_name_and_set_cuda_env_postgres(self): |
| self.subject.is_platform_pg = Mock(return_value = True) |
| |
| seg_id = -1 |
| gpus_per_host = 3 |
| |
| self.assertEqual('/gpu:0', self.subject.get_device_name_and_set_cuda_env( |
| gpus_per_host, seg_id )) |
| self.assertEqual('0,1,2', os.environ['CUDA_VISIBLE_DEVICES']) |
| |
| gpus_per_host = 0 |
| self.assertEqual('/cpu:0', self.subject.get_device_name_and_set_cuda_env( |
| gpus_per_host, seg_id )) |
| self.assertEqual('-1', os.environ['CUDA_VISIBLE_DEVICES']) |
| |
| def test_get_device_name_and_set_cuda_env_gpdb(self): |
| self.subject.is_platform_pg = Mock(return_value = False) |
| |
| seg_id=3 |
| gpus_per_host=2 |
| self.assertEqual('/gpu:0', self.subject.get_device_name_and_set_cuda_env( |
| gpus_per_host, seg_id)) |
| self.assertEqual('1', os.environ['CUDA_VISIBLE_DEVICES']) |
| |
| gpus_per_host=0 |
| self.assertEqual('/cpu:0', self.subject.get_device_name_and_set_cuda_env( |
| gpus_per_host, seg_id)) |
| self.assertEqual('-1', os.environ['CUDA_VISIBLE_DEVICES']) |
| |
| |
| def test_split_and_strip(self): |
| self.assertEqual(('a','b'), self.subject.split_and_strip(' a = b ')) |
| |
| def test_parse_optimizer(self): |
| opt_name = 'SGD' |
| final_args = {'lr':0.01, 'decay':1e-6, 'nesterov':True} |
| compile_dict = {} |
| compile_dict['optimizer']='SGD(lr=0.01, decay=1e-6, nesterov=True)' |
| result_name, result_params = self.subject.parse_optimizer(compile_dict) |
| |
| self.assertEqual(result_name, opt_name) |
| self.assertDictEqual(result_params, final_args) |
| |
| def test_validate_and_literal_eval_keys(self): |
| test_dict = {'batch_size':'2', 'epochs':'1', 'verbose':'0'} |
| target_dict = {'batch_size':2, 'epochs':1, 'verbose':0} |
| literal_eval_fit_params = ['batch_size','epochs','verbose','shuffle', |
| 'class_weight','initial_epoch','steps_per_epoch'] |
| accepted_fit_params = literal_eval_fit_params |
| result_params = self.subject.validate_and_literal_eval_keys( |
| test_dict, |
| literal_eval_fit_params, |
| accepted_fit_params) |
| self.assertDictEqual(result_params, target_dict) |
| |
| def test_parse_and_validate_fit_params(self): |
| test_str = "batch_size=2, epochs=1, verbose=0" |
| fit_dict = {'batch_size':2, 'epochs':1, 'verbose':0} |
| result_params = self.subject.parse_and_validate_fit_params(test_str) |
| self.assertDictEqual(result_params, fit_dict) |
| |
| test_str = "batch_size=2, epochs=1, verbose=0, shuffle = 'batch'" |
| fit_dict = {'batch_size':2, 'epochs':1, 'verbose':0, 'shuffle':'batch'} |
| result_params = self.subject.parse_and_validate_fit_params(test_str) |
| self.assertDictEqual(result_params, fit_dict) |
| |
| test_str = "batch_size=2, epochs=1, verbose=0, shuffle = True" |
| fit_dict = {'batch_size':2, 'epochs':1, 'verbose':0, 'shuffle':True} |
| result_params = self.subject.parse_and_validate_fit_params(test_str) |
| self.assertDictEqual(result_params, fit_dict) |
| |
| test_str = "" |
| fit_dict = {} |
| result_params = self.subject.parse_and_validate_fit_params(test_str) |
| self.assertDictEqual(result_params, fit_dict) |
| |
| def test_parse_and_validate_fit_params_duplicate_params(self): |
| test_str = "batch_size=1, batch_size=10, verbose=0" |
| fit_dict = {'batch_size':10, 'verbose':0} |
| result_params = self.subject.parse_and_validate_fit_params(test_str) |
| self.assertDictEqual(result_params, fit_dict) |
| |
| def test_parse_and_validate_fit_params_invalid_fit_param_fail(self): |
| test_str = "does_not_exist=2, epochs=1, verbose=0" |
| with self.assertRaises(plpy.PLPYException) as error: |
| self.subject.parse_and_validate_fit_params(test_str) |
| |
| self.assertIn('not currently accepted', str(error.exception)) |
| |
| test_str = "batch_size=not_lit_eval(1), epochs=1, verbose=0" |
| with self.assertRaises(plpy.PLPYException) as error: |
| self.subject.parse_and_validate_fit_params(test_str) |
| |
| self.assertIn('invalid input value', str(error.exception)) |
| |
| def test_parse_and_validate_compile_params(self): |
| test_str = "optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), " \ |
| "loss='categorical_crossentropy', metrics=['accuracy']" |
| compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)', |
| 'metrics':['accuracy'], 'loss':'categorical_crossentropy'} |
| opt_name,opt_args,result_params = self.subject.parse_and_validate_compile_params(test_str) |
| self.assertDictEqual(result_params, compile_dict) |
| |
| # Test without the metrics |
| test_str = "optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), " \ |
| "loss='categorical_crossentropy'" |
| compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)', |
| 'loss':'categorical_crossentropy'} |
| opt_name,opt_args,result_params = self.subject.parse_and_validate_compile_params(test_str) |
| self.assertDictEqual(result_params, compile_dict) |
| |
| def test_validate_metrics_None_pass(self): |
| compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)', |
| 'metrics':['accuracy'], 'loss':'categorical_crossentropy'} |
| self.subject._validate_metrics(compile_dict) |
| |
| def test_validate_metrics_empty_pass(self): |
| compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)', |
| 'metrics':[], 'loss':'categorical_crossentropy'} |
| self.subject._validate_metrics(compile_dict) |
| |
| def test_validate_metrics_two_params_fail(self): |
| compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)', |
| 'metrics':['accuracy','mae'], 'loss':'categorical_crossentropy'} |
| with self.assertRaises(plpy.PLPYException) as error: |
| self.subject._validate_metrics(compile_dict) |
| |
| def test_validate_metrics_one_unsupported_fail(self): |
| compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)', |
| 'metrics':['sparse_categorical_accuracy'], 'loss':'categorical_crossentropy'} |
| with self.assertRaises(plpy.PLPYException) as error: |
| self.subject._validate_metrics(compile_dict) |
| |
| def test_validate_metrics_two_unsupported_fail(self): |
| compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)', |
| 'metrics':['sparse_categorical_accuracy', 'sparse_categorical_crossentropy'], 'loss':'categorical_crossentropy'} |
| with self.assertRaises(plpy.PLPYException) as error: |
| self.subject._validate_metrics(compile_dict) |
| |
| def test_validate_metrics_one_supported_one_unsupported_fail(self): |
| compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)', |
| 'metrics':['accuracy', 'sparse_categorical_crossentropy'], 'loss':'categorical_crossentropy'} |
| with self.assertRaises(plpy.PLPYException) as error: |
| self.subject._validate_metrics(compile_dict) |
| |
| def test_parse_and_validate_compile_params_default_optimizer_pass(self): |
| test_str = "optimizer='SGD', loss='categorical_crossentropy'" |
| _,_,result_dict = self.subject.parse_and_validate_compile_params(test_str) |
| self.assertEqual('SGD', result_dict['optimizer']) |
| |
| test_str = "optimizer='sgd()', loss='categorical_crossentropy'" |
| _,_,result_dict = self.subject.parse_and_validate_compile_params(test_str) |
| self.assertEqual('sgd()', result_dict['optimizer']) |
| |
| def test_parse_and_validate_compile_params_duplicate_loss(self): |
| test_str = "optimizer='SGD', loss='loss1', metrics=['accuracy'], loss='loss2'" |
| _,_,result_dict = self.subject.parse_and_validate_compile_params(test_str) |
| self.assertEqual('loss2', result_dict['loss']) |
| |
| def test_parse_and_validate_compile_params_no_optimizer_fail(self): |
| test_str = "loss='categorical_crossentropy', metrics=['accuracy']" |
| |
| with self.assertRaises(plpy.PLPYException): |
| self.subject.parse_and_validate_compile_params(test_str) |
| |
| def test_parse_and_validate_compile_params_no_loss_fail(self): |
| test_str = "optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), " \ |
| "metrics=['accuracy']" |
| |
| with self.assertRaises(plpy.PLPYException): |
| self.subject.parse_and_validate_compile_params(test_str) |
| |
| def test_parse_and_validate_compile_params_unsupported_loss_fail(self): |
| test_str = "optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), " \ |
| "metrics=['accuracy'], loss='sparse_categorical_crossentropy'" |
| |
| with self.assertRaises(plpy.PLPYException): |
| self.subject.parse_and_validate_compile_params(test_str) |
| |
| def test_parse_and_validate_compile_params_dict_metrics_fail(self): |
| test_str = "optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), " \ |
| "loss='categorical_crossentropy', metrics={'0':'accuracy'}" |
| |
| with self.assertRaises(plpy.PLPYException): |
| self.subject.parse_and_validate_compile_params(test_str) |
| |
| def test_parse_and_validate_compile_params_tensor_loss_weights_fail(self): |
| |
| test_str = "optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True)," \ |
| " loss='categorical_crossentropy', metrics=['accuracy']," \ |
| " loss_weights = keras.layers.Input(shape=(32,))" |
| |
| with self.assertRaises(plpy.PLPYException): |
| self.subject.parse_and_validate_compile_params(test_str) |
| |
| def test_parse_and_validate_compile_params_list_dict_sample_weight_mode_fail(self): |
| test_str = "optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True)," \ |
| " loss='categorical_crossentropy', metrics=['accuracy']," \ |
| " sample_weight_mode = [0,1]" |
| |
| with self.assertRaises(plpy.PLPYException): |
| self.subject.parse_and_validate_compile_params(test_str) |
| |
| test_str = "optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True)," \ |
| " loss='categorical_crossentropy', metrics=['accuracy']," \ |
| " sample_weight_mode = {'0':'1'}" |
| |
| with self.assertRaises(plpy.PLPYException): |
| self.subject.parse_and_validate_compile_params(test_str) |
| |
| |
| def test_parse_and_validate_compile_params_target_tensors_fail(self): |
| test_str = "optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True)," \ |
| " loss='categorical_crossentropy', metrics=['accuracy']," \ |
| " target_tensors = keras.layers.Input(shape=(32,))" |
| |
| with self.assertRaises(plpy.PLPYException): |
| self.subject.parse_and_validate_compile_params(test_str) |
| |
| def test_parse_and_validate_fit_params_callbacks_fail(self): |
| |
| test_str = "batch_size=2, epochs=1, verbose=0, callbacks=keras.callbacks.Callback()" |
| with self.assertRaises(plpy.PLPYException): |
| self.subject.parse_and_validate_fit_params(test_str) |
| |
| def test_parse_and_validate_fit_params_validation_split_fail(self): |
| |
| test_str = "batch_size=2, epochs=1, verbose=0, validation_split=0.1" |
| with self.assertRaises(plpy.PLPYException): |
| self.subject.parse_and_validate_fit_params(test_str) |
| |
| def test_parse_and_validate_fit_params_validation_data_fail(self): |
| |
| test_str = "batch_size=2, epochs=1, verbose=0, validation_data=(1,2,3)" |
| with self.assertRaises(plpy.PLPYException): |
| self.subject.parse_and_validate_fit_params(test_str) |
| |
| def test_parse_and_validate_fit_params_sample_weight_fail(self): |
| |
| test_str = "batch_size=2, epochs=1, verbose=0, sample_weight=np.array([1,2,3])" |
| with self.assertRaises(plpy.PLPYException): |
| self.subject.parse_and_validate_fit_params(test_str) |
| |
| def test_parse_and_validate_fit_params_validation_steps_fail(self): |
| |
| test_str = "batch_size=2, epochs=1, verbose=0, validation_steps=1" |
| with self.assertRaises(plpy.PLPYException): |
| self.subject.parse_and_validate_fit_params(test_str) |
| |
| def test_parse_and_validate_fit_params_validation_freq_fail(self): |
| test_str = "batch_size=2, epochs=1, verbose=0, validation_freq=1" |
| with self.assertRaises(plpy.PLPYException): |
| self.subject.parse_and_validate_fit_params(test_str) |
| |
| test_str = "batch_size=2, epochs=1, verbose=0, validation_freq=[1]" |
| with self.assertRaises(plpy.PLPYException): |
| self.subject.parse_and_validate_fit_params(test_str) |
| |
| def test_parse_and_validate_compile_params_syntax_error_fail(self): |
| #missing end parentheses |
| test_str = "optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True" |
| with self.assertRaises(ValueError) as error: |
| self.subject.parse_and_validate_compile_params(test_str) |
| self.assertIn('could not convert string to float', str(error.exception)) |
| |
| #missing beginning parentheses |
| test_str = "optimizer=SGDlr=0.01, decay=1e-6, nesterov=True)," \ |
| " loss='categorical_crossentropy'" |
| with self.assertRaises(plpy.PLPYException) as error: |
| self.subject.parse_and_validate_compile_params(test_str) |
| self.assertIn('not currently accepted', str(error.exception)) |
| |
| #missing comma |
| test_str = "optimizer=SGD(lr=0.01 decay=1e-6, nesterov=True)," \ |
| " loss='categorical_crossentropy'" |
| with self.assertRaises(ValueError) as error: |
| self.subject.parse_and_validate_compile_params(test_str) |
| self.assertIn('invalid literal for float', str(error.exception)) |
| |
| def test_parse_and_validate_fit_params_invalid_optimizer_fail(self): |
| test_str = "optimizer='SGD1', loss='categorical_crossentropy'" |
| with self.assertRaises(plpy.PLPYException) as error: |
| self.subject.parse_and_validate_compile_params(test_str) |
| self.assertIn('invalid optimizer', str(error.exception)) |
| |
| |
| class MadlibKerasFitCommonValidatorTestCase(unittest.TestCase): |
| def setUp(self): |
| self.plpy_mock = Mock(spec='error') |
| patches = { |
| 'plpy': plpy, |
| 'utilities.mean_std_dev_calculator': Mock() |
| } |
| |
| self.plpy_mock_execute = MagicMock() |
| plpy.execute = self.plpy_mock_execute |
| |
| self.module_patcher = patch.dict('sys.modules', patches) |
| self.module_patcher.start() |
| import madlib_keras_validator |
| self.subject = madlib_keras_validator |
| |
| def tearDown(self): |
| self.module_patcher.stop() |
| |
| |
| def test_is_valid_metrics_compute_frequency_True_None(self): |
| self.subject.FitCommonValidator._validate_common_args = Mock() |
| obj = self.subject.FitCommonValidator( |
| 'test_table', 'val_table', 'model_table', 'model_arch_table', 2, |
| 'dep_varname', 'independent_varname', 5, None, False, False, [0], |
| 'module_name', None) |
| self.assertEqual(True, obj._is_valid_metrics_compute_frequency()) |
| |
| def test_is_valid_metrics_compute_frequency_True_num(self): |
| self.subject.FitCommonValidator._validate_common_args = Mock() |
| obj = self.subject.FitCommonValidator( |
| 'test_table', 'val_table', 'model_table', 'model_arch_table', 2, |
| 'dep_varname', 'independent_varname', 5, 3, False, False, [0], |
| 'module_name', None) |
| self.assertEqual(True, obj._is_valid_metrics_compute_frequency()) |
| |
| def test_is_valid_metrics_compute_frequency_False_zero(self): |
| self.subject.FitCommonValidator._validate_common_args = Mock() |
| obj = self.subject.FitCommonValidator( |
| 'test_table', 'val_table', 'model_table', 'model_arch_table', 2, |
| 'dep_varname', 'independent_varname', 5, 0, False, False, [0], |
| 'module_name', None) |
| self.assertEqual(False, obj._is_valid_metrics_compute_frequency()) |
| |
| def test_is_valid_metrics_compute_frequency_False_greater(self): |
| self.subject.FitCommonValidator._validate_common_args = Mock() |
| obj = self.subject.FitCommonValidator( |
| 'test_table', 'val_table', 'model_table', 'model_arch_table', 2, |
| 'dep_varname', 'independent_varname', 5, 6, False, False, [0], |
| 'module_name', None) |
| self.assertEqual(False, obj._is_valid_metrics_compute_frequency()) |
| |
| |
| class InputValidatorTestCase(unittest.TestCase): |
| def setUp(self): |
| self.plpy_mock = Mock(spec='error') |
| patches = { |
| 'plpy': plpy, |
| 'utilities.mean_std_dev_calculator': Mock() |
| } |
| |
| self.plpy_mock_execute = MagicMock() |
| plpy.execute = self.plpy_mock_execute |
| |
| self.module_patcher = patch.dict('sys.modules', patches) |
| self.module_patcher.start() |
| import madlib_keras_validator |
| self.module = madlib_keras_validator |
| self.subject = self.module.InputValidator |
| |
| self.module_name = 'module' |
| self.test_table = 'test_table' |
| self.model_table = 'model_table' |
| self.id_col = 'id_col' |
| self.ind_var = 'ind_var' |
| self.model_arch_table = 'model_arch_table' |
| self.model_id = 2 |
| self.num_classes = 1598 |
| self.model = Sequential() |
| self.model.add(Conv2D(2, kernel_size=(1, 1), activation='relu', |
| input_shape=(1,1,1,), padding='same')) |
| self.model.add(Dense(self.num_classes)) |
| self.classes = ['train', 'boat', 'car', 'airplane'] |
| |
| def tearDown(self): |
| self.module_patcher.stop() |
| |
| def test_validate_pred_type_invalid_pred_type(self): |
| with self.assertRaises(plpy.PLPYException) as error: |
| self.subject.validate_pred_type( |
| self.module_name, 'invalid_pred_type', ['cat', 'dog']) |
| self.assertIn('type', str(error.exception).lower()) |
| |
| def test_validate_class_values_greater_than_1600_class_values(self): |
| self.model.add(Dense(1599)) |
| with self.assertRaises(plpy.PLPYException) as error: |
| self.subject.validate_class_values( |
| self.module_name, range(1599), 'prob', self.model.to_json()) |
| self.assertIn('1600', str(error.exception)) |
| |
| def test_validate_class_values_valid_class_values_prob(self): |
| self.subject.validate_class_values( |
| self.module_name, range(self.num_classes), 'prob', self.model.to_json()) |
| self.subject.validate_class_values( |
| self.module_name, None, 'prob', self.model.to_json()) |
| |
| def test_validate_class_values_valid_pred_type_valid_class_values_response(self): |
| self.subject.validate_class_values( |
| self.module_name, range(self.num_classes), 'response', self.model.to_json()) |
| self.subject.validate_class_values( |
| self.module_name, None, 'response', self.model.to_json()) |
| |
| def test_validate_input_shape_shapes_do_not_match(self): |
| # minibatched data |
| self.plpy_mock_execute.return_value = [{'shape': [1,3,32,32]}] |
| with self.assertRaises(plpy.PLPYException): |
| self.subject.validate_input_shape( |
| self.test_table, self.ind_var, [32,32,3], 2, True) |
| # non-minibatched data |
| self.plpy_mock_execute.return_value = [{'n_0': 1,'n_1': 32,'n_2': 32,'n_3': 3}] |
| with self.assertRaises(plpy.PLPYException): |
| self.subject.validate_input_shape( |
| self.test_table, self.ind_var, [32,32,3], 1) |
| self.plpy_mock_execute.return_value = [{'n_0': 1,'n_1': 3}] |
| with self.assertRaises(plpy.PLPYException): |
| self.subject.validate_input_shape( |
| self.test_table, self.ind_var, [3,32], 1) |
| |
| def test_validate_input_shape_shapes_match(self): |
| # minibatched data |
| self.plpy_mock_execute.return_value = [{'shape': [1,32,32,3]}] |
| self.subject.validate_input_shape( |
| self.test_table, self.ind_var, [32,32,3], 2, True) |
| # non-minibatched data |
| self.plpy_mock_execute.return_value = [{'n_0': 32,'n_1': 32,'n_2': 3}] |
| self.subject.validate_input_shape( |
| self.test_table, self.ind_var, [32,32,3], 1) |
| |
| def test_validate_model_arch_table_none_values(self): |
| with self.assertRaises(plpy.PLPYException) as error: |
| obj = self.subject.validate_model_arch_table( |
| self.module_name, None, self.model_id) |
| self.assertIn('null', str(error.exception).lower()) |
| |
| self.module.input_tbl_valid = Mock() |
| with self.assertRaises(plpy.PLPYException) as error: |
| obj = self.subject.validate_model_arch_table( |
| self.module_name, self.model_arch_table, None) |
| self.assertIn('id', str(error.exception).lower()) |
| |
| def test_validate_gpu_config_with_gpu_all_segments(self): |
| |
| self.plpy_mock_execute.return_value = [{'__internal_gpu_config__': 'all_segments'}] |
| obj = self.subject._validate_gpu_config(self.module_name, 'foo', [1]) |
| |
| def test_validate_gpu_config_no_gpu_all_segments(self): |
| |
| self.plpy_mock_execute.return_value = [{'__internal_gpu_config__': 'all_segments'}] |
| with self.assertRaises(plpy.PLPYException) as error: |
| obj = self.subject._validate_gpu_config(self.module_name, 'foo', [0]) |
| self.assertIn('missing gpus', str(error.exception).lower()) |
| |
| def test_validate_gpu_config_with_gpu_valid_seg_list(self): |
| |
| self.plpy_mock_execute.return_value = [{'__internal_gpu_config__': [0,1]}] |
| obj = self.subject._validate_gpu_config(self.module_name, 'foo', [1,1,0,1]) |
| |
| def test_validate_gpu_config_with_gpu_invalid_seg_list(self): |
| |
| self.plpy_mock_execute.return_value = [{'__internal_gpu_config__': [0,1]}] |
| with self.assertRaises(plpy.PLPYException) as error: |
| obj = self.subject._validate_gpu_config(self.module_name, 'foo', [1,0,0,1]) |
| self.assertIn('does not have gpu', str(error.exception).lower()) |
| |
| |
| class MadlibSerializerTestCase(unittest.TestCase): |
| def setUp(self): |
| self.plpy_mock = Mock(spec='error') |
| patches = { |
| 'plpy': plpy, |
| 'utilities.mean_std_dev_calculator': Mock() |
| } |
| |
| self.plpy_mock_execute = MagicMock() |
| plpy.execute = self.plpy_mock_execute |
| |
| self.module_patcher = patch.dict('sys.modules', patches) |
| self.module_patcher.start() |
| import madlib_keras_serializer |
| self.subject = madlib_keras_serializer |
| |
| def tearDown(self): |
| self.module_patcher.stop() |
| |
| def test_deserialize_image_1d_weights_null_state_returns_none(self): |
| self.assertEqual(None, self.subject.deserialize_as_image_1d_weights(None)) |
| |
| def test_deserialize_image_1d_weights_returns_not_none(self): |
| dummy_state = np.array([2,3,4,5,6], dtype=np.float32) |
| res = self.subject.deserialize_as_image_1d_weights(dummy_state.tostring()) |
| self.assertEqual(2, res[0]) |
| self.assertEqual([3,4,5,6], res[1].tolist()) |
| |
| def test_deserialize_nd_weights_null_input_returns_none(self): |
| dummy_state = np.array([0,1,2,3,4,5,6], dtype=np.float32) |
| self.assertEqual(None, self.subject.deserialize_as_nd_weights(dummy_state.tostring(), None)) |
| self.assertEqual(None, self.subject.deserialize_as_nd_weights(None, [1, 2, 3])) |
| self.assertEqual(None, self.subject.deserialize_as_nd_weights(None, None)) |
| |
| def test_deserialize_nd_weights_valid_input_returns_not_none(self): |
| dummy_model_weights = np.array([3,4,5], dtype=np.float32) |
| dummy_model_shape = [(2, 1, 1, 1), (1,)] |
| res = self.subject.deserialize_as_nd_weights(dummy_model_weights.tostring(), |
| dummy_model_shape) |
| self.assertEqual([[[[3.0]]], [[[4.0]]]], res[0].tolist()) |
| self.assertEqual([5], res[1].tolist()) |
| |
| def test_deserialize_nd_weights_invalid_input_fails(self): |
| # pass an invalid state with missing model weights |
| invalid_model_weights = np.array([1,2], dtype=np.float32) |
| dummy_model_shape = [(2, 1, 1, 1), (1,)] |
| |
| # we expect raise exception because we cannot reshape |
| # model weights of size 0 into shape (2,2,3,1) |
| with self.assertRaises(plpy.PLPYException) as error: |
| self.subject.deserialize_as_nd_weights(invalid_model_weights.tostring(), |
| dummy_model_shape) |
| |
| invalid_model_weights = np.array([1,2,3,4], dtype=np.float32) |
| dummy_model_shape = [(2, 2, 3, 1), (1,)] |
| # we expect raise exception because we cannot reshape |
| # model weights of size 2 into shape (2,2,3,1) |
| with self.assertRaises(plpy.PLPYException) as error: |
| self.subject.deserialize_as_nd_weights(invalid_model_weights.tostring(), |
| dummy_model_shape) |
| |
| invalid_model_weights = np.array([0,1,2,3,4], dtype=np.float32) |
| dummy_model_shape = [(2, 1), (1,)] |
| # we expect to raise exception because we cannot reshape |
| # model weights of size 2 into shape (1,) |
| with self.assertRaises(plpy.PLPYException) as error: |
| self.subject.deserialize_as_nd_weights(invalid_model_weights.tostring(), |
| dummy_model_shape) |
| |
| def test_serialize_image_nd_weights_none_weights_returns_none(self): |
| res = self.subject.serialize_state_with_nd_weights(0, None) |
| self.assertEqual(None , res) |
| |
| def test_serialize_image_nd_weights_valid_output(self): |
| res = self.subject.serialize_state_with_nd_weights(0, [np.array([1, 3]), |
| np.array([4,5])]) |
| self.assertEqual(np.array([0,1,3,4,5], dtype=np.float32).tostring(), |
| res) |
| |
| def test_serialize_image_1d_weights_none_weights_returns_none(self): |
| res = self.subject.serialize_state_with_1d_weights(0, None) |
| self.assertEqual(None , res) |
| |
| def test_serialize_image_1d_weights_valid_output(self): |
| res = self.subject.serialize_state_with_1d_weights(0, np.array([1, 3, 4, 5])) |
| self.assertEqual(np.array([0,1,3,4,5], dtype=np.float32).tostring(), |
| res) |
| |
| |
| class MadlibKerasHelperTestCase(unittest.TestCase): |
| def setUp(self): |
| self.plpy_mock = Mock(spec='error') |
| patches = { |
| 'plpy': plpy |
| } |
| |
| self.plpy_mock_execute = MagicMock() |
| plpy.execute = self.plpy_mock_execute |
| |
| self.module_patcher = patch.dict('sys.modules', patches) |
| self.module_patcher.start() |
| import madlib_keras_helper |
| self.subject = madlib_keras_helper |
| self.input_data = [32, 32, 3] |
| |
| def tearDown(self): |
| self.module_patcher.stop() |
| |
| def test_expand_input_dims(self): |
| self.assertEqual(np.array(self.input_data).shape, (3,)) |
| res = self.subject.expand_input_dims(self.input_data) |
| self.assertEqual(res.shape, (1, 3)) |
| |
| def test_strip_trailing_nulls_from_class_values(self): |
| self.assertEqual(['cat', 'dog'], |
| self.subject.strip_trailing_nulls_from_class_values( |
| ['cat', 'dog'])) |
| self.assertEqual([None, 'cat', 'dog'], |
| self.subject.strip_trailing_nulls_from_class_values( |
| [None, 'cat', 'dog'])) |
| self.assertEqual([None, 'cat', 'dog'], |
| self.subject.strip_trailing_nulls_from_class_values( |
| [None, 'cat', 'dog', None, None])) |
| self.assertEqual(['cat', 'dog'], |
| self.subject.strip_trailing_nulls_from_class_values( |
| ['cat', 'dog', None, None])) |
| self.assertEqual([None], |
| self.subject.strip_trailing_nulls_from_class_values( |
| [None, None])) |
| |
| def test_get_gpus_per_one_seg_gpu_gpdb(self): |
| |
| self.subject.is_platform_pg = Mock(return_value = False) |
| |
| self.plpy_mock_execute.side_effect = \ |
| [ [], |
| [ {'hostname': 'mdw0', 'count' : 1}], |
| [], |
| [ {'hostname': 'mdw0', 'segment_id' : 0}, |
| {'hostname': 'mdw1', 'segment_id' : 1}, |
| {'hostname': 'mdw2', 'segment_id' : 2} |
| ]] |
| |
| self.assertEqual([1,0,0], self.subject.get_accessible_gpus_for_seg( |
| 'schema_madlib', 2, 'foo')) |
| |
| def test_get_gpus_per_mult_seg_gpu_gpdb(self): |
| |
| self.subject.is_platform_pg = Mock(return_value = False) |
| |
| self.plpy_mock_execute.side_effect = \ |
| [[], |
| [ {'hostname': 'mdw0', 'count' : 1}], |
| [], |
| [ {'hostname': 'mdw0', 'segment_id' : 0}, |
| {'hostname': 'mdw0', 'segment_id' : 1}, |
| {'hostname': 'mdw1', 'segment_id' : 2}, |
| {'hostname': 'mdw1', 'segment_id' : 3} |
| ]] |
| |
| self.assertEqual([1,1,0,0], self.subject.get_accessible_gpus_for_seg( |
| 'schema_madlib', 2, 'foo')) |
| |
| def test_get_gpus_per_no_gpu_gpdb(self): |
| |
| self.subject.is_platform_pg = Mock(return_value = False) |
| |
| self.plpy_mock_execute.side_effect = [[],[],[]] |
| with self.assertRaises(plpy.PLPYException) as error: |
| self.subject.get_accessible_gpus_for_seg('schema_madlib', 2, 'foo') |
| self.assertIn('no gpus configured on hosts', str(error.exception).lower()) |
| |
| def test_get_metrics_sql_string(self): |
| NaN = float('nan') |
| test_metrics = [ 3.2, NaN, 0.0, None, 8.94, -6.8, 1.2, NaN, NaN ] |
| |
| py2sql = { x : str(x) for x in test_metrics if type(x) == float } |
| py2sql[NaN] = "'NaN'::DOUBLE PRECISION" |
| py2sql[None] = 'NULL' |
| correct = [ py2sql[x] for x in test_metrics ] |
| |
| final, metrics = self.subject.get_metrics_sql_string(test_metrics) |
| |
| m = re.match("\(ARRAY\[(.*)\]\)", metrics) |
| answers = m.group(1).split(', ') |
| self.assertListEqual(answers, correct) |
| self.assertEqual(final, '(' + correct[-1] + ')') |
| |
| # Check that postgresql parser sees this as a valid sql data type |
| res = pglint('SELECT ' + final, True) |
| self.assertEqual( |
| res, |
| (True, ''), |
| "This is not valid PostgresSQL: {}".format('SELECT' + final) |
| ) |
| |
| # Check that postgresql parser sees this as a valid array |
| res = pglint('SELECT {}[1]'.format(metrics), True) |
| self.assertEqual( |
| res, |
| (True, ''), |
| "This is not valid PostgresSQL: SELECT {}[1]".format(metrics) |
| ) |
| |
| class MadlibKerasEvaluationMergeFinalTestCase(unittest.TestCase): |
| def setUp(self): |
| self.plpy_mock = Mock(spec='error') |
| patches = { |
| 'plpy': plpy, |
| 'utilities.mean_std_dev_calculator': Mock() |
| } |
| |
| self.plpy_mock_execute = MagicMock() |
| plpy.execute = self.plpy_mock_execute |
| |
| self.module_patcher = patch.dict('sys.modules', patches) |
| self.module_patcher.start() |
| import madlib_keras |
| self.subject = madlib_keras |
| |
| self.model = Sequential() |
| self.model.add(Conv2D(2, kernel_size=(1, 1), activation='relu', |
| input_shape=(1,1,1,), padding='same')) |
| self.model.add(Flatten()) |
| |
| self.compile_params = "optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']" |
| self.model_weights = [3,4,5,6] |
| self.serialized_weights = np.array(self.model_weights, dtype='float32' |
| ).tostring() |
| self.loss = 0.5947071313858032 |
| self.accuracy = 1.0 |
| self.dist_key_mapping = [0,1,2] |
| self.accessible_gpus_for_seg = [0] |
| |
| #self.model.evaluate = Mock(return_value = [self.loss, self.accuracy]) |
| |
| self.independent_var_real = [[[[0.5]]]] * 10 |
| self.dependent_var_int = [[0,1]] * 10 |
| |
| # Params as bytea |
| self.independent_var = np.array(self.independent_var_real, dtype=np.float32).tobytes() |
| self.dependent_var = np.array(self.dependent_var_int, dtype=np.int16).tobytes() |
| |
| self.independent_var_shape = [10,1,1,1] |
| self.dependent_var_shape = [10,2] |
| # We test on segment 0, which has 3 buffers filled with 10 identical |
| # images each, or 30 images total |
| self.total_images_per_seg = [3*len(self.dependent_var_int),20,40] |
| |
| def tearDown(self): |
| self.module_patcher.stop() |
| |
| def test_internal_keras_eval_merge(self): |
| image_count = self.total_images_per_seg[0] |
| state1 = [3.0*self.loss, 3.0*self.accuracy, image_count] |
| state1 = state1 |
| state2 = [2.0*self.loss, 2.0*self.accuracy, image_count+30] |
| state2 = state2 |
| merged_state = self.subject.internal_keras_eval_merge(state1,state2) |
| agg_loss = merged_state[0] |
| agg_accuracy = merged_state[1] |
| image_count_total = merged_state[2] |
| |
| self.assertEqual( 2*image_count+30 , image_count_total ) |
| self.assertAlmostEqual( 5.0*self.loss, agg_loss, 2) |
| self.assertAlmostEqual( 5.0*self.accuracy, agg_accuracy, 2) |
| |
| def test_internal_keras_eval_merge_none_first(self): |
| image_count = self.total_images_per_seg[0] |
| input_state = [self.loss, self.accuracy, image_count] |
| merged_state = self.subject.internal_keras_eval_merge(None, input_state) |
| agg_loss = merged_state[0] |
| agg_accuracy = merged_state[1] |
| image_count_total = merged_state[2] |
| |
| self.assertEqual(image_count, image_count_total) |
| self.assertAlmostEqual(self.loss, agg_loss, 2) |
| self.assertAlmostEqual(self.accuracy, agg_accuracy, 2) |
| |
| def test_internal_keras_eval_merge_none_second(self): |
| image_count = self.total_images_per_seg[0] |
| input_state = [self.loss, self.accuracy, image_count] |
| merged_state = self.subject.internal_keras_eval_merge(input_state, None) |
| agg_loss = merged_state[0] |
| agg_accuracy = merged_state[1] |
| image_count_total = merged_state[2] |
| |
| self.assertEqual(image_count, image_count_total) |
| self.assertAlmostEqual(self.loss, agg_loss, 2) |
| self.assertAlmostEqual(self.accuracy, agg_accuracy, 2) |
| |
| def test_internal_keras_eval_merge_both_none(self): |
| result = self.subject.internal_keras_eval_merge(None,None) |
| self.assertEqual(None, result) |
| |
| def test_internal_keras_eval_final(self): |
| image_count = self.total_images_per_seg[0] |
| input_state = [image_count*self.loss, image_count*self.accuracy, image_count] |
| |
| output_state = self.subject.internal_keras_eval_final(input_state) |
| self.assertEqual(len(output_state), 2) |
| agg_loss = output_state[0] |
| agg_accuracy = output_state[1] |
| |
| self.assertAlmostEqual(self.loss, agg_loss,2) |
| self.assertAlmostEqual(self.accuracy, agg_accuracy,2) |
| |
| def internal_keras_eval_final_none(self): |
| result = self.subject.internal_keras_eval_final(None) |
| self.assertEqual(result, None) |
| |
| def test_internal_keras_eval_final_image_count_zero(self): |
| input_state = [0, 0, 0] |
| |
| with self.assertRaises(plpy.PLPYException): |
| result = self.subject.internal_keras_eval_final(input_state) |
| |
| def tearDown(self): |
| self.module_patcher.stop() |
| |
| if __name__ == '__main__': |
| # Do not move any of the tensorflow imports outside of this block. This is |
| # because importing tensorflow.keras.models/layers changes the `__name__` |
| # variable because of which the if condition fails and the unit tests don't |
| # get run |
| |
| # turn off verbose output |
| os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' |
| import tensorflow as tf |
| tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) |
| |
| from tensorflow.keras.models import * |
| from tensorflow.keras.layers import * |
| unittest.main() |
| # --------------------------------------------------------------------- |