| # coding=utf-8 |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| """ |
| @file mean_std_dev_calculator.py_in |
| |
| @brief |
| |
| @namespace utilities |
| |
| """ |
| import plpy |
| from convex.utils_regularization import utils_ind_var_scales |
| from convex.utils_regularization import utils_ind_var_scales_grouping |
| from utilities import _array_to_string |
| |
| m4_changequote(`<!', `!>') |
| |
| #TODO: use this for all the modules that calculate the std dev and mean for x |
| # mlp, pca, elastic_net |
| class MeanStdDevCalculator: |
| def __init__(self, schema_madlib, source_table, indep_var_array_str, dimension): |
| self.schema_madlib= schema_madlib |
| self.source_table= source_table |
| self.indep_var_array_str = indep_var_array_str |
| self.dimension = dimension |
| |
| def get_mean_and_std_dev_for_ind_var(self): |
| x_scaled_vals = utils_ind_var_scales(self.source_table, |
| self.indep_var_array_str, |
| self.dimension, |
| self.schema_madlib, |
| x_mean_table = None, # do not dump the output to a temp table |
| set_zero_std_to_one=True) |
| x_mean_str = _array_to_string(x_scaled_vals["mean"]) |
| x_std_str = _array_to_string(x_scaled_vals["std"]) |
| |
| if not x_mean_str or not x_std_str: |
| plpy.error("mean/stddev for the independent variable" |
| "cannot be null") |
| |
| return x_mean_str, x_std_str |
| |
| def create_mean_std_table_for_ind_var_grouping(self, x_mean_table, grouping_cols): |
| utils_ind_var_scales_grouping(self.source_table, |
| self.indep_var_array_str, |
| self.dimension, |
| self.schema_madlib, |
| grouping_cols, |
| x_mean_table, |
| set_zero_std_to_one = True, |
| create_temp_table = False) |