MLP: Fix bug in weights argument

JIRA: MADLIB-1471

When passing in the weights param as a column in the table, it would
fail with the error `column does not exist`. This column was missed when
calling the step function. If it were passed in as a constant value,
this would work fine. We fixed this by adding the weights column to the
normalized/scaled table.

Additionally, there was a bug in validating the
weights column type, it validated only for integer and float types and
would fail for other numeric types. The intented column type was all
numeric types.
This commit fixes both these bugs and adds tests for
passing in weights as a table column.

Co-authored-by: Ekta Khanna <ekhanna@vmware.com>
diff --git a/src/ports/postgres/modules/convex/mlp_igd.py_in b/src/ports/postgres/modules/convex/mlp_igd.py_in
index 956ea4c..329a426 100644
--- a/src/ports/postgres/modules/convex/mlp_igd.py_in
+++ b/src/ports/postgres/modules/convex/mlp_igd.py_in
@@ -415,7 +415,8 @@
                                         schema_madlib=args["schema_madlib"],
                                         x_mean_table=args["x_mean_table"],
                                         y_mean_table='',
-                                        grouping_col=args["grouping_col"])
+                                        grouping_col=args["grouping_col"],
+                                        weights=args["weights"])
     else:
         # When no grouping_col is defined, the mean and std for 'x'
         # can be defined using strings, stored in x_mean_str, x_std_str.
@@ -440,7 +441,8 @@
                                x_std_str=x_std_str,
                                y_mean='',
                                y_std='',
-                               grouping_col=args["grouping_col"])
+                               grouping_col=args["grouping_col"],
+                               weights=args["weights"])
 
     return None
 # ------------------------------------------------------------------------
@@ -735,10 +737,7 @@
                 "MLP Error: The input weights param is not supported with"
                 " mini-batch version of MLP.")
     else:
-        int_types = ['integer', 'smallint', 'bigint']
-        float_types = ['double precision', 'real']
-        _assert(get_expr_type(weights, source_table) in int_types + float_types,
-                "MLP error: Weights should be a numeric type")
+        is_valid_psql_type(get_expr_type(weights, source_table), NUMERIC)
         _assert(array_col_has_same_dimension(source_table, independent_varname),
                 "Independent variable column should refer to arrays of the same length")
 
diff --git a/src/ports/postgres/modules/convex/test/mlp.sql_in b/src/ports/postgres/modules/convex/test/mlp.sql_in
index 9053df7..056179a 100644
--- a/src/ports/postgres/modules/convex/test/mlp.sql_in
+++ b/src/ports/postgres/modules/convex/test/mlp.sql_in
@@ -308,6 +308,63 @@
     'mlp_prediction_output',
     'output');
 
+-- Test for passing NUMERIC row_weights with grouping
+DROP TABLE IF EXISTS iris_data_row_weight;
+CREATE TABLE iris_data_row_weight
+  AS SELECT *, id::NUMERIC AS row_weight FROM iris_data;
+DROP TABLE IF EXISTS mlp_class, mlp_class_summary, mlp_class_standardization;
+SELECT mlp_classification(
+    'iris_data_row_weight',    -- Source table
+    'mlp_class',    -- Desination table
+    'attributes',   -- Input features
+    'class',        -- Label
+    ARRAY[5],   -- Number of units per layer
+    'learning_rate_init=0.1,
+    learning_rate_policy=constant,
+    n_iterations=5,
+    n_tries=3,
+    tolerance=0',
+    'sigmoid',
+    'row_weight',
+    False,
+    False,
+    'grp'
+);
+DROP TABLE IF EXISTS mlp_prediction_output;
+SELECT mlp_predict(
+    'mlp_class',
+    'iris_data',
+    'id',
+    'mlp_prediction_output',
+    'output');
+
+-- Test for passing NUMERIC row_weights without grouping
+DROP TABLE IF EXISTS iris_data_row_weight;
+CREATE TABLE iris_data_row_weight
+  AS SELECT *, id::NUMERIC AS row_weight FROM iris_data;
+DROP TABLE IF EXISTS mlp_class, mlp_class_summary, mlp_class_standardization;
+SELECT mlp_classification(
+    'iris_data_row_weight',    -- Source table
+    'mlp_class',    -- Desination table
+    'attributes',   -- Input features
+    'class',        -- Label
+    ARRAY[5],   -- Number of units per layer
+    'learning_rate_init=0.1,
+    learning_rate_policy=constant,
+    n_iterations=5,
+    n_tries=3,
+    tolerance=0',
+    'sigmoid',
+    'row_weight'
+);
+DROP TABLE IF EXISTS mlp_prediction_output;
+SELECT mlp_predict(
+    'mlp_class',
+    'iris_data',
+    'id',
+    'mlp_prediction_output',
+    'output');
+
 -- minibatch without grouping and without warm_start
 DROP TABLE IF EXISTS mlp_class_batch, mlp_class_batch_summary, mlp_class_batch_standardization;
 SELECT mlp_classification(
@@ -1006,6 +1063,66 @@
              )
 FROM mlp_prediction_regress  LIMIT 1;
 
+-- with weights with grouping without minibatch  without warm start
+CREATE TABLE lin_housing_wi_with_row_weight AS SELECT *, (id%3) +1 AS row_weight FROM lin_housing_wi;
+DROP TABLE IF EXISTS mlp_regress, mlp_regress_summary, mlp_regress_standardization;
+SELECT mlp_regression(
+               'lin_housing_wi_with_row_weight',           -- Source table
+               'mlp_regress',              -- Desination table
+               'x',                        -- Input features
+               'y',                        -- Dependent variable
+               ARRAY[40],                 -- Number of units per layer
+               'learning_rate_init=0.015,
+               learning_rate_policy=inv,
+               n_iterations=5, n_tries=3,
+               tolerance=0',
+               'sigmoid',
+               'row_weight',
+               False,
+               False,
+               'grp');
+DROP TABLE IF EXISTS mlp_prediction_regress;
+SELECT mlp_predict(
+               'mlp_regress',
+               'lin_housing_wi',
+               'id',
+               'mlp_prediction_regress',
+               'output');
+SELECT assert(
+                   __to_char(pg_typeof(estimated_y)) = 'double precision[]',
+                   'Estimated y should be an array. Actual ' || __to_char(pg_typeof(estimated_y))
+           )
+FROM mlp_prediction_regress  LIMIT 1;
+
+-- with weights without grouping without minibatch  without warm start
+DROP TABLE IF EXISTS mlp_regress, mlp_regress_summary, mlp_regress_standardization;
+SELECT mlp_regression(
+               'lin_housing_wi_with_row_weight',           -- Source table
+               'mlp_regress',              -- Desination table
+               'x',                        -- Input features
+               'y',                        -- Dependent variable
+               ARRAY[40],                 -- Number of units per layer
+               'learning_rate_init=0.015,
+               learning_rate_policy=inv,
+               n_iterations=5, n_tries=3,
+               tolerance=0',
+               'sigmoid',
+               'row_weight',
+               False,
+               False);
+DROP TABLE IF EXISTS mlp_prediction_regress;
+SELECT mlp_predict(
+               'mlp_regress',
+               'lin_housing_wi',
+               'id',
+               'mlp_prediction_regress',
+               'output');
+SELECT assert(
+                   __to_char(pg_typeof(estimated_y)) = 'double precision[]',
+                   'Estimated y should be an array. Actual ' || __to_char(pg_typeof(estimated_y))
+           )
+FROM mlp_prediction_regress  LIMIT 1;
+
 -- minibatch without grouping and without warm start
 DROP TABLE IF EXISTS mlp_regress_batch, mlp_regress_batch_summary, mlp_regress_batch_standardization;
 SELECT mlp_regression(
diff --git a/src/ports/postgres/modules/convex/utils_regularization.py_in b/src/ports/postgres/modules/convex/utils_regularization.py_in
index 6ed98bf..2b9a279 100644
--- a/src/ports/postgres/modules/convex/utils_regularization.py_in
+++ b/src/ports/postgres/modules/convex/utils_regularization.py_in
@@ -239,6 +239,7 @@
         y_mean_join_clause = "INNER JOIN {0} AS __y__ ON {1}".format(
             kwargs.get('y_mean_table'), group_where_y)
     ydecenter_str = "- __y__.mean".format(**kwargs) if y_decenter else ""
+    weights_str = ", {weights}".format(**kwargs) if 'weights' in kwargs else ""
     plpy.execute("""
             CREATE TEMP TABLE {tbl_data_scaled}
             m4_ifdef(`__POSTGRESQL__', `', `WITH (appendonly=true)')
@@ -250,6 +251,7 @@
                     AS {col_ind_var_norm_new},
                 ({col_dep_var} {ydecenter_str})  AS {col_dep_var_norm_new},
                 {select_grouping_cols}
+                {weights_str}
             FROM {tbl_data}
             {x_mean_join_clause}
             {y_mean_join_clause}
@@ -257,6 +259,7 @@
                    x_mean_join_clause=x_mean_join_clause,
                    y_mean_join_clause=y_mean_join_clause,
                    select_grouping_cols=select_grouping_cols,
+                   weights_str=weights_str,
                    **kwargs))
     return None
 # ========================================================================
@@ -286,6 +289,7 @@
                        to be compatible with array[...] expressions
     """
     ydecenter_str = "- {y_mean}".format(**kwargs) if y_decenter else ""
+    weights_str = ", {weights}".format(**kwargs) if 'weights' in kwargs else ""
     plpy.execute(
         """
             CREATE TEMP TABLE {tbl_data_scaled}
@@ -298,8 +302,11 @@
                         '{x_std_str}'::double precision[]))
                     AS {col_ind_var_norm_new},
                 ({col_dep_var} {ydecenter_str})  AS {col_dep_var_norm_new}
+                {weights_str}
             FROM {tbl_data}
-        """.format(ydecenter_str=ydecenter_str, **kwargs))
+        """.format(ydecenter_str=ydecenter_str,
+                   weights_str=weights_str,
+                   **kwargs))
 
     return None
 # ========================================================================