DL: Fix misc bugs
JIRA: MADLIB-1464
1. When validating for the validation table, we were passing the wrong
table name to the validate_input_shape function.
2. Add not supported error message for Multiple dependent and
independent variables for fit_multiple
3. PredictBYOM: Uncomment code and test for validating
class_values(validate_class_values)
4. Add error message for the case when fit and fit_multiple are called
with an old version of preprocessed data.
Co-authored-by: Ekta Khanna <ekhanna@vmware.com>
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
index 0e5b1b9..d23d765 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
@@ -337,8 +337,8 @@
# are traversed in order. It won't work for multi-io and prone to breaking
# in the regular case.
- # InputValidator.validate_class_values(
- # self.module_name, self.class_values, self.pred_type, self.model_arch)
+ InputValidator.validate_class_values(
+ self.module_name, self.class_values, self.pred_type, self.model_arch)
InputValidator.validate_input_shape(
self.test_table, self.independent_varname,
get_input_shape(self.model_arch), 1)
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
index 21eff15..439d9d9 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
@@ -289,6 +289,11 @@
self.dependent_varname = self.src_summary_dict['dependent_varname']
self.independent_varname = self.src_summary_dict['independent_varname']
+ if not isinstance(self.dependent_varname, list) or \
+ not isinstance(self.independent_varname, list):
+ #TODO improve error message
+ plpy.error("Input table '{0}' has not been preprocessed properly. "
+ "Please run input preprocessor again.".format(self.source_table))
self.dep_shape_cols = [add_postfix(i, "_shape") for i in self.dependent_varname]
self.ind_shape_cols = [add_postfix(i, "_shape") for i in self.independent_varname]
@@ -406,7 +411,7 @@
input_shape, 2, True)
if self.validation_table:
InputValidator.validate_input_shape(
- self.validation_table, self.independent_varname,
+ self.validation_table, self.val_ind_var,
input_shape, 2, True)
@@ -459,11 +464,12 @@
use_gpus,
accessible_gpus_for_seg,
self.module_name,
- self.object_table,
- val_dep_var,
- val_ind_var)
- self.output_model_info_table = add_postfix(output_model_table,
- '_info')
+ self.object_table)
+ _assert(len(self.dependent_varname) == 1
+ or len(self.independent_varname) == 1,
+ "Multiple dependent and independent variables not supported "
+ "for madlib_keras_fit_multiple_model!")
+ self.output_model_info_table = add_postfix(output_model_table, '_info')
if warm_start:
input_tbl_valid(self.output_model_info_table, self.module_name)
diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
index 928b753..5ef4517 100644
--- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
@@ -907,14 +907,14 @@
self.assertIn('invalid_pred_type', str(error.exception))
# The validation for this test has been disabled
- # with self.assertRaises(plpy.PLPYException) as error:
- # self.module.PredictBYOM('schema_madlib', 'model_arch_table',
- # 'model_id', 'test_table', 'id_col',
- # 'independent_varname', 'output_table',
- # self.pred_type, self.use_gpus,
- # ["foo", "bar", "baaz"], self.normalizing_const,
- # self.dependent_count)
- # self.assertIn('class values', str(error.exception).lower())
+ with self.assertRaises(plpy.PLPYException) as error:
+ self.module.PredictBYOM('schema_madlib', 'model_arch_table',
+ 'model_id', 'test_table', 'id_col',
+ 'independent_varname', 'output_table',
+ self.pred_type, self.use_gpus,
+ ["foo", "bar", "baaz"], self.normalizing_const,
+ self.dependent_count)
+ self.assertIn('class values', str(error.exception).lower())
with self.assertRaises(plpy.PLPYException) as error:
self.module.PredictBYOM('schema_madlib', 'model_arch_table',
@@ -1313,6 +1313,37 @@
'module_name', None)
self.assertEqual(False, obj._is_valid_metrics_compute_frequency())
+ def test_validator_dep_indep_type_not_array(self):
+ # only dep is not array
+ self.subject.FitCommonValidator.get_source_summary_table_dict = \
+ Mock(return_value={'dependent_varname':'a',
+ 'independent_varname':['b']})
+ with self.assertRaises(plpy.PLPYException) as error:
+ self.subject.FitCommonValidator(
+ 'test_table', 'val_table', 'model_table', 5, None, False, False, [0],
+ 'module_name', None)
+ self.assertIn('not been preprocessed properly', str(error.exception))
+
+ # only indep is not array
+ self.subject.FitCommonValidator.get_source_summary_table_dict = \
+ Mock(return_value={'dependent_varname':['a'],
+ 'independent_varname':'b'})
+ with self.assertRaises(plpy.PLPYException) as error:
+ self.subject.FitCommonValidator(
+ 'test_table', 'val_table', 'model_table', 5, None, False, False, [0],
+ 'module_name', None)
+ self.assertIn('not been preprocessed properly', str(error.exception))
+
+ # both indep and dep are not arrays
+ self.subject.FitCommonValidator.get_source_summary_table_dict = \
+ Mock(return_value={'dependent_varname':'a',
+ 'independent_varname':'b'})
+ with self.assertRaises(plpy.PLPYException) as error:
+ self.subject.FitCommonValidator(
+ 'test_table', 'val_table', 'model_table', 5, None, False, False, [0],
+ 'module_name', None)
+ self.assertIn('not been preprocessed properly', str(error.exception))
+
class InputValidatorTestCase(unittest.TestCase):
def setUp(self):
@@ -1391,9 +1422,9 @@
def test_validate_input_shape_shapes_match(self):
# minibatched data
- # self.plpy_mock_execute.return_value = [{'shape': [1,32,32,3]}]
- # self.subject.validate_input_shape(
- # self.test_table, [self.ind_var], [[32,32,3]], 2, True)
+ self.plpy_mock_execute.return_value = [{'shape': [1,32,32,3]}]
+ self.subject.validate_input_shape(
+ self.test_table, [self.ind_var], [[32,32,3]], 2, True)
# non-minibatched data
self.plpy_mock_execute.return_value = [{'shape': [32,32,3]}]
self.subject.validate_input_shape(