| # coding=utf-8 |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| import sys |
| import numpy as np |
| from os import path |
| |
| # Add modules to the pythonpath. |
| sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))) |
| sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) |
| |
| import unittest |
| from mock import * |
| import plpy_mock as plpy |
| |
| m4_changequote(`<!', `!>') |
| |
| class InputPreProcessorDLTestCase(unittest.TestCase): |
| def setUp(self): |
| self.plpy_mock = Mock(spec='error') |
| patches = { |
| 'plpy': plpy, |
| 'utilities.mean_std_dev_calculator': Mock(), |
| } |
| # we need to use MagicMock() instead of Mock() for the plpy.execute mock |
| # to be able to iterate on the return value |
| self.plpy_mock_execute = MagicMock() |
| plpy.execute = self.plpy_mock_execute |
| |
| self.module_patcher = patch.dict('sys.modules', patches) |
| self.module_patcher.start() |
| |
| self.default_schema_madlib = "madlib" |
| self.default_source_table = "source" |
| self.default_output_table = "output" |
| self.default_dep_var = "depvar" |
| self.default_ind_var = "indvar" |
| self.default_buffer_size = 5 |
| self.default_normalizing_const = 1.0 |
| self.default_num_classes = None |
| self.default_distribution_rules = "all_segments" |
| self.default_module_name = "dummy" |
| |
| import deep_learning.input_data_preprocessor |
| self.module = deep_learning.input_data_preprocessor |
| import utilities.minibatch_preprocessing |
| self.util_module = utilities.minibatch_preprocessing |
| import utilities.control |
| self.control_module = utilities.control |
| self.module.get_expr_type = Mock(side_effect = ['integer[]', 'integer[]']) |
| self.module.validate_module_input_params = Mock() |
| self.module.get_distinct_col_levels = Mock(return_value = [0,22,100]) |
| |
| def tearDown(self): |
| self.module_patcher.stop() |
| |
| def test_input_preprocessor_dl_executes_query(self): |
| self.module.get_expr_type = Mock(side_effect = ['integer[]', 'integer[]']) |
| self.control_module.OptimizerControl.__enter__ = Mock() |
| self.control_module.OptimizerControl.optimizer_control = True |
| self.control_module.OptimizerControl.optimizer_enabled = True |
| preprocessor_obj = self.module.InputDataPreprocessorDL( |
| self.default_schema_madlib, |
| "input", |
| "out", |
| self.default_dep_var, |
| self.default_ind_var, |
| self.default_buffer_size, |
| self.default_normalizing_const, |
| self.default_num_classes, |
| self.default_distribution_rules, |
| self.default_module_name) |
| preprocessor_obj.input_preprocessor_dl() |
| |
| def test_input_preprocessor_null_buffer_size_executes_query(self): |
| self.module.get_expr_type = Mock(side_effect = ['integer[]', 'integer[]']) |
| self.control_module.OptimizerControl.__enter__ = Mock() |
| self.control_module.OptimizerControl.optimizer_control = True |
| self.control_module.OptimizerControl.optimizer_enabled = True |
| preprocessor_obj = self.module.InputDataPreprocessorDL( |
| self.default_schema_madlib, |
| "input", |
| "out", |
| self.default_dep_var, |
| self.default_ind_var, |
| None, |
| self.default_normalizing_const, |
| self.default_num_classes, |
| self.default_distribution_rules, |
| self.default_module_name) |
| self.util_module.MiniBatchBufferSizeCalculator.calculate_default_buffer_size = Mock(return_value = 5) |
| preprocessor_obj.input_preprocessor_dl() |
| |
| def test_input_preprocessor_multiple_dep_var_raises_exception(self): |
| self.module.get_expr_type = Mock(side_effect = ['integer[]', 'integer[]']) |
| with self.assertRaises(plpy.PLPYException): |
| self.module.InputDataPreprocessorDL( |
| self.default_schema_madlib, |
| self.default_source_table, |
| self.default_output_table, |
| "y1,y2", |
| self.default_ind_var, |
| self.default_buffer_size, |
| self.default_normalizing_const, |
| self.default_num_classes, |
| self.default_distribution_rules, |
| self.default_module_name) |
| |
| def test_input_preprocessor_multiple_indep_var_raises_exception(self): |
| self.module.get_expr_type = Mock(side_effect = ['integer[]', 'integer[]']) |
| with self.assertRaises(plpy.PLPYException): |
| self.module.InputDataPreprocessorDL( |
| self.default_schema_madlib, |
| self.default_source_table, |
| self.default_output_table, |
| self.default_dep_var, |
| "x1,x2", |
| self.default_buffer_size, |
| self.default_normalizing_const, |
| self.default_num_classes, |
| self.default_distribution_rules, |
| self.default_module_name) |
| |
| def test_input_preprocessor_buffer_size_zero_fails(self): |
| self.module.get_expr_type = Mock(side_effect = ['integer[]', 'integer[]']) |
| with self.assertRaises(plpy.PLPYException): |
| self.module.InputDataPreprocessorDL( |
| self.default_schema_madlib, |
| self.default_source_table, |
| self.default_output_table, |
| self.default_dep_var, |
| self.default_ind_var, |
| 0, |
| self.default_normalizing_const, |
| self.default_num_classes, |
| self.default_distribution_rules, |
| self.default_module_name) |
| |
| def test_input_preprocessor_negative_buffer_size_fails(self): |
| self.module.get_expr_type = Mock(side_effect = ['integer[]', 'integer[]']) |
| with self.assertRaises(plpy.PLPYException): |
| self.module.InputDataPreprocessorDL(self.default_schema_madlib, |
| self.default_source_table, |
| self.default_output_table, |
| self.default_dep_var, |
| self.default_ind_var, |
| -1, |
| self.default_normalizing_const, |
| self.default_num_classes, |
| self.default_distribution_rules, |
| self.default_module_name) |
| |
| def test_input_preprocessor_invalid_indep_vartype_raises_exception(self): |
| self.module.get_expr_type = Mock(side_effect = ['integer', 'integer[]']) |
| with self.assertRaises(plpy.PLPYException): |
| self.module.InputDataPreprocessorDL(self.default_schema_madlib, |
| self.default_source_table, |
| self.default_output_table, |
| self.default_dep_var, |
| self.default_ind_var, |
| self.default_buffer_size, |
| self.default_normalizing_const, |
| self.default_num_classes, |
| self.default_distribution_rules, |
| self.default_module_name) |
| |
| def test_input_preprocessor_invalid_dep_vartype_raises_exception(self): |
| self.module.get_expr_type = Mock(side_effect = ['integer[]', 'text[]']) |
| with self.assertRaises(plpy.PLPYException): |
| self.module.InputDataPreprocessorDL(self.default_schema_madlib, |
| self.default_source_table, |
| self.default_output_table, |
| self.default_dep_var, |
| self.default_ind_var, |
| self.default_buffer_size, |
| self.default_normalizing_const, |
| self.default_num_classes, |
| self.default_distribution_rules, |
| self.default_module_name) |
| |
| def test_input_preprocessor_normalizing_const_zero_fails(self): |
| self.module.get_expr_type = Mock(side_effect = ['integer[]', 'integer[]']) |
| with self.assertRaises(plpy.PLPYException): |
| self.module.InputDataPreprocessorDL(self.default_schema_madlib, |
| self.default_source_table, |
| self.default_output_table, |
| self.default_dep_var, |
| self.default_ind_var, |
| self.default_buffer_size, |
| 0, |
| self.default_num_classes, |
| self.default_distribution_rules, |
| self.default_module_name) |
| |
| def test_input_preprocessor_negative_normalizing_const_fails(self): |
| self.module.get_expr_type = Mock(side_effect = ['integer[]', 'integer[]']) |
| with self.assertRaises(plpy.PLPYException): |
| self.module.InputDataPreprocessorDL(self.default_schema_madlib, |
| self.default_source_table, |
| self.default_output_table, |
| self.default_dep_var, |
| self.default_ind_var, |
| self.default_buffer_size, |
| -1, |
| self.default_num_classes, |
| self.default_distribution_rules, |
| self.default_module_name) |
| |
| def test_get_one_hot_encoded_dep_var_expr_null_val(self): |
| self.module.get_expr_type = Mock(side_effect = ['smallint[]', 'text']) |
| self.module.get_distinct_col_levels = Mock(return_value = ["NULL", "'a'"]) |
| obj = self.module.InputDataPreprocessorDL( |
| self.default_schema_madlib, |
| self.default_source_table, |
| self.default_output_table, |
| self.default_dep_var, |
| self.default_ind_var, |
| self.default_buffer_size, |
| self.default_normalizing_const, |
| self.default_num_classes, |
| self.default_distribution_rules, |
| self.default_module_name) |
| obj.dependent_levels = ["NULL", "'a'"] |
| dep_var_array_expr = obj.get_one_hot_encoded_dep_var_expr() |
| self.assertEqual("array[({0}) is not distinct from null, " \ |
| "({0}) is not distinct from 'a']::integer[]::smallint[]". |
| format(self.default_dep_var), |
| dep_var_array_expr.lower()) |
| |
| def test_get_one_hot_encoded_dep_var_expr_numeric_array_val(self): |
| self.module.get_expr_type = Mock(side_effect = ['smallint[]', 'integer[]']) |
| obj = self.module.InputDataPreprocessorDL( |
| self.default_schema_madlib, |
| self.default_source_table, |
| self.default_output_table, |
| self.default_dep_var, |
| self.default_ind_var, |
| self.default_buffer_size, |
| self.default_normalizing_const, |
| self.default_num_classes, |
| self.default_distribution_rules, |
| self.default_module_name) |
| dep_var_array_expr = obj.get_one_hot_encoded_dep_var_expr() |
| self.assertEqual("{0}::smallint[]". |
| format(self.default_dep_var), |
| dep_var_array_expr.lower()) |
| |
| def test_validate_num_classes_none(self): |
| self.module.get_expr_type = Mock(side_effect = ['integer[]', 'text']) |
| obj = self.module.InputDataPreprocessorDL( |
| self.default_schema_madlib, |
| self.default_source_table, |
| self.default_output_table, |
| self.default_dep_var, |
| self.default_ind_var, |
| self.default_buffer_size, |
| self.default_normalizing_const, |
| None, |
| self.default_distribution_rules, |
| self.default_module_name) |
| obj.dependent_levels = ["dummy"] |
| self.assertEqual(0, obj.padding_size) |
| |
| def test_validate_num_classes_greater(self): |
| self.module.get_expr_type = Mock(side_effect = ['integer[]', 'text']) |
| self.module._get_dependent_levels = Mock(return_value = ["'a'", "'b'", "'c'"]) |
| obj = self.module.TrainingDataPreprocessorDL( |
| self.default_schema_madlib, |
| self.default_source_table, |
| self.default_output_table, |
| self.default_dep_var, |
| self.default_ind_var, |
| self.default_buffer_size, |
| self.default_normalizing_const, |
| 5, |
| self.default_distribution_rules) |
| obj._set_one_hot_encoding_variables() |
| self.assertEqual(2, obj.padding_size) |
| |
| def test_validate_num_classes_lesser(self): |
| self.module.get_expr_type = Mock(side_effect = ['integer[]', 'text']) |
| self.module.dependent_levels = Mock(return_value = ["'a'", "'b'", "'c'"]) |
| obj = self.module.TrainingDataPreprocessorDL( |
| self.default_schema_madlib, |
| self.default_source_table, |
| self.default_output_table, |
| self.default_dep_var, |
| self.default_ind_var, |
| self.default_buffer_size, |
| self.default_normalizing_const, |
| 2, |
| self.default_distribution_rules) |
| with self.assertRaises(plpy.PLPYException): |
| obj._set_one_hot_encoding_variables() |
| |
| def test_validate_distribution_table(self): |
| self.module.get_expr_type = Mock(side_effect = ['integer[]', 'integer[]']) |
| |
| obj = self.module.InputDataPreprocessorDL(self.default_schema_madlib, |
| self.default_source_table, |
| self.default_output_table, |
| self.default_dep_var, |
| self.default_ind_var, |
| self.default_buffer_size, |
| self.default_normalizing_const, |
| self.default_num_classes, |
| self.default_distribution_rules, |
| self.default_module_name) |
| self.module.input_tbl_valid = Mock() |
| self.module.is_var_valid = Mock() |
| self.plpy_mock_execute.side_effect = [ |
| [{'dbids': [2,3,4]}], |
| [{'dbids': [3,4], 'c1': 2, 'c2': 2}] |
| ] |
| |
| obj._validate_distribution_table() |
| |
| def test_validate_distribution_table_dup(self): |
| self.module.get_expr_type = Mock(side_effect = ['integer[]', 'integer[]']) |
| |
| obj = self.module.InputDataPreprocessorDL(self.default_schema_madlib, |
| self.default_source_table, |
| self.default_output_table, |
| self.default_dep_var, |
| self.default_ind_var, |
| self.default_buffer_size, |
| self.default_normalizing_const, |
| self.default_num_classes, |
| self.default_distribution_rules, |
| self.default_module_name) |
| self.module.input_tbl_valid = Mock() |
| self.module.is_var_valid = Mock() |
| self.plpy_mock_execute.side_effect = [ |
| [{'dbids': [2,3,4]}], |
| [{'dbids': [3,3], 'c1': 2, 'c2': 1}] |
| ] |
| with self.assertRaises(plpy.PLPYException) as error: |
| obj._validate_distribution_table() |
| self.assertIn('duplicate', str(error.exception).lower()) |
| |
| def test_validate_distribution_table_invalid(self): |
| self.module.get_expr_type = Mock(side_effect = ['integer[]', 'integer[]']) |
| obj = self.module.InputDataPreprocessorDL(self.default_schema_madlib, |
| self.default_source_table, |
| self.default_output_table, |
| self.default_dep_var, |
| self.default_ind_var, |
| self.default_buffer_size, |
| self.default_normalizing_const, |
| self.default_num_classes, |
| self.default_distribution_rules, |
| self.default_module_name) |
| self.module.input_tbl_valid = Mock() |
| self.module.is_var_valid = Mock() |
| self.plpy_mock_execute.side_effect = [ |
| [{'dbids': [2,3,4]}], |
| [{'dbids': [3,30], 'c1': 2, 'c2': 2}] |
| ] |
| with self.assertRaises(plpy.PLPYException) as error: |
| obj._validate_distribution_table() |
| self.assertIn('invalid', str(error.exception).lower()) |
| |
| |
| if __name__ == '__main__': |
| unittest.main() |
| |
| # --------------------------------------------------------------------- |