DL: Check for NULL in class_values for fit_multiple_model

For, already one-hot encoded data(y), the class_values is set to NULL by
the input preprocessor. Prior to this commit, the function
`madlib_keras_fit_multiple_model()`, assumed the class_values will
always be present and failed incase of class_values being passed in as
NULL. This commit fixes this issue and adds a dev-check test for the
same.

Also, this commit updates the `validation_table` to be set to NULL instead
of None in the summary table

Co-authored-by: Ekta Khanna <ekhanna@pivotal.io>
diff --git a/src/ports/postgres/modules/deep_learning/input_data_preprocessor.py_in b/src/ports/postgres/modules/deep_learning/input_data_preprocessor.py_in
index d638843..1f833eb 100644
--- a/src/ports/postgres/modules/deep_learning/input_data_preprocessor.py_in
+++ b/src/ports/postgres/modules/deep_learning/input_data_preprocessor.py_in
@@ -272,7 +272,7 @@
         self._create_output_summary_table()
 
     def _create_output_summary_table(self):
-        class_level_str='NULL::TEXT'
+        class_level_str='NULL::{0}[]'.format(self.dependent_vartype)
         if self.dependent_levels:
             # Update dependent_levels to include NULL when
             # num_classes > len(self.dependent_levels)
@@ -285,7 +285,7 @@
                 self.dependent_levels, array_type=self.dependent_vartype,
                 long_format=True)
         if self.num_classes is None:
-            self.num_classes = 'NULL'
+            self.num_classes = 'NULL::INTEGER'
         query = """
             CREATE TABLE {self.output_summary_table} AS
             SELECT
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
index 0188106..3ae4c9d 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
@@ -333,7 +333,15 @@
         independent_varname = \
             src_summary_dict['independent_varname_in_source_table']
         norm_const = src_summary_dict['norm_const']
-        num_classes = len(class_values)
+        self.validation_table = 'NULL' if self.validation_table is None \
+            else '$MAD${0}$MAD$'.format(self.validation_table)
+        if class_values is None:
+            class_values_str = 'NULL::{0}'.format(src_summary_dict['class_values_type'])
+            num_classes = 'NULL'
+        else:
+            class_values_str = 'ARRAY{0}::{1}'.format(class_values,
+                                                      src_summary_dict['class_values_type'])
+            num_classes = len(class_values)
         class_values_colname = CLASS_VALUES_COLNAME
         dependent_vartype_colname = DEPENDENT_VARTYPE_COLNAME
         normalizing_const_colname = NORMALIZING_CONST_COLNAME
@@ -342,7 +350,9 @@
                 CREATE TABLE {self.model_summary_table} AS
                 SELECT
                     $MAD${self.source_table}$MAD$::TEXT AS source_table,
-                    $MAD${self.validation_table}$MAD$::TEXT AS validation_table,
+                    {self.validation_table}::TEXT AS validation_table,
+                    $MAD${self.model_output_table}$MAD$::TEXT AS model,
+                    $MAD${self.model_info_table}$MAD$::TEXT AS model_info,
                     $MAD${dependent_varname}$MAD$::TEXT AS dependent_varname,
                     $MAD${independent_varname}$MAD$::TEXT AS independent_varname,
                     $MAD${self.model_arch_table}$MAD$::TEXT AS model_arch_table,
@@ -351,7 +361,7 @@
                     '{self.end_training_time}'::TIMESTAMP AS end_training_time,
                     '{self.version}'::TEXT AS madlib_version,
                     {num_classes}::INTEGER AS num_classes,
-                    ARRAY{class_values}::TEXT[] AS {class_values_colname},
+                    {class_values_str} AS {class_values_colname},
                     $MAD${dep_vartype}$MAD$::TEXT AS {dependent_vartype_colname},
                     {norm_const}::{float32_sql_type} AS {normalizing_const_colname}
             """.format(**locals())
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_iris.setup.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_iris.setup.sql_in
index 066adb8..181288c 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_iris.setup.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_iris.setup.sql_in
@@ -179,6 +179,11 @@
 (149,ARRAY[6.2,3.4,5.4,2.3],'Iris-virginica'),
 (150,ARRAY[5.9,3.0,5.1,1.8],'Iris-virginica');
 
+
+CREATE TABLE iris_data_one_hot_encoded as select id, attributes, ARRAY[class_text is not distinct from 'Iris-setosa', class_text is not distinct from 'Iris-versicolor', class_text is not distinct from 'Iris-virginica']::int[] as class_one_hot_encoded
+from iris_data;
+
+
 DROP TABLE IF EXISTS iris_data_packed, iris_data_packed_summary;
 SELECT training_preprocessor_dl('iris_data',         -- Source table
                                 'iris_data_packed',  -- Output table
@@ -186,6 +191,13 @@
                                 'attributes'         -- Independent variable
                                 );
 
+DROP TABLE IF EXISTS iris_data_one_hot_encoded_packed, iris_data_one_hot_encoded_packed_summary;
+SELECT training_preprocessor_dl('iris_data_one_hot_encoded',         -- Source table
+                                'iris_data_one_hot_encoded_packed',  -- Output table
+                                'class_one_hot_encoded',        -- Dependent variable
+                                'attributes'         -- Independent variable
+           );
+
 DROP TABLE IF EXISTS iris_model_arch;
 -- NOTE: The seed is set to 0 for every layer.
 SELECT load_keras_model('iris_model_arch',  -- Output table,
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
index c061378..2a20467 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_model_selection.sql_in
@@ -180,6 +180,35 @@
     ]
 );
 
+-- Test for one-hot encoded input data
+DROP TABLE if exists iris_multiple_model, iris_multiple_model_summary, iris_multiple_model_info;
+SELECT madlib_keras_fit_multiple_model(
+	'iris_data_one_hot_encoded_packed',
+	'iris_multiple_model',
+	'mst_table_4row',
+	3,
+	0
+);
+
+SELECT assert(
+        model_arch_table = 'iris_model_arch' AND
+        validation_table is NULL AND
+        model_info = 'iris_multiple_model_info' AND
+        source_table = 'iris_data_one_hot_encoded_packed' AND
+        model = 'iris_multiple_model' AND
+        dependent_varname = 'class_one_hot_encoded' AND
+        independent_varname = 'attributes' AND
+        madlib_version is NOT NULL AND
+        num_iterations = 3 AND
+        start_training_time < now() AND
+        end_training_time < now() AND
+        dependent_vartype = 'integer[]' AND
+        num_classes = NULL AND
+        class_values = NULL AND
+        normalizing_const = 1,
+        'Keras Fit Multiple Output Summary Validation failed when user passes in 1-hot encoded label vector. Actual:' || __to_char(summary))
+FROM (SELECT * FROM iris_multiple_model_summary) summary;
+
 -- Test when number of configs(3) equals number of segments(3)
 DROP TABLE IF EXISTS iris_multiple_model, iris_multiple_model_summary, iris_multiple_model_info;
 SELECT setseed(0);
@@ -188,18 +217,25 @@
 	'iris_multiple_model',
 	'mst_table',
 	6,
-	0
+	0,
+	'iris_data_one_hot_encoded_packed'
 );
 
 SELECT assert(
-        model_arch_table = 'iris_model_arch' AND
         source_table = 'iris_data_packed' AND
+        validation_table = 'iris_data_one_hot_encoded_packed' AND
+        model = 'iris_multiple_model' AND
+        model_info = 'iris_multiple_model_info' AND
         dependent_varname = 'class_text' AND
         independent_varname = 'attributes' AND
-        madlib_version is NOT NULL AND
+        model_arch_table = 'iris_model_arch' AND
         num_iterations = 6 AND
+        start_training_time < now() AND
+        end_training_time < now() AND
+        madlib_version is NOT NULL AND
         num_classes = 3 AND
         class_values = '{Iris-setosa,Iris-versicolor,Iris-virginica}' AND
+        dependent_vartype LIKE '%char%' AND
         normalizing_const = 1,
         'Keras Fit Multiple Output Summary Validation failed. Actual:' || __to_char(summary))
 FROM (SELECT * FROM iris_multiple_model_summary) summary;
@@ -217,6 +253,10 @@
         training_loss_final  >= 0  AND
         array_upper(training_metrics, 1) = 6 AND
         array_upper(training_loss, 1) = 6 AND
+        validation_metrics_final >= 0  AND
+        validation_loss_final  >= 0  AND
+        array_upper(validation_metrics, 1) = 6 AND
+        array_upper(validation_loss, 1) = 6 AND
         array_upper(metrics_elapsed_time, 1) = 6,
         'Keras Fit Multiple Output Info Validation failed. Actual:' || __to_char(info))
 FROM (SELECT * FROM iris_multiple_model_info) info;