blob: 7de9868bb8d204ee54c834627d6e2b6a8a8c5c7b [file] [log] [blame]
# coding=utf-8
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
m4_changequote(`<!', `!>')
import sys
import os
from os import path
# Add convex module to the pythonpath.
sys.path.append(path.dirname(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))))
sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__)))))
import keras
import unittest
from mock import *
import plpy_mock as plpy
import numpy as np
class GenerateModelSelectionConfigsTestCase(unittest.TestCase):
def setUp(self):
# The side effects of this class(writing to the output table) are not
# tested here. They are tested in dev-check.
self.plpy_mock = Mock(spec='error')
patches = {
'plpy': plpy,
'utilities.mean_std_dev_calculator': Mock()
}
self.plpy_mock_execute = MagicMock()
plpy.execute = self.plpy_mock_execute
self.module_patcher = patch.dict('sys.modules', patches)
self.module_patcher.start()
import deep_learning.madlib_keras_model_selection
self.module = deep_learning.madlib_keras_model_selection
self.module.MstLoaderInputValidator = MagicMock()
self.subject = self.module.MstSearch
self.madlib_schema = 'mad'
self.model_selection_table = 'mst_table'
self.model_arch_table = 'model_arch_library'
self.model_id_list = [1, 2]
self.compile_params_grid = """
{'loss': ['categorical_crossentropy'],
'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.0001, 0.1]} ],
'metrics': ['accuracy']}
"""
self.fit_params_grid = """
{'batch_size': [8, 32], 'epochs': [1, 2]}
"""
self.search_type = 'grid'
self.num_configs = None
self.random_state = None
self.object_table = 'custom_function_table'
def test_mst_table_dimension(self):
generate_mst = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_grid,
self.fit_params_grid
)
self.assertEqual(32, len(generate_mst.msts))
generate_mst = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_grid,
self.fit_params_grid,
'random',
9,
42
)
self.assertEqual(9, len(generate_mst.msts))
generate_mst = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_grid,
self.fit_params_grid,
'random',
9,
None
)
self.assertEqual(9, len(generate_mst.msts))
self.compile_params_grid = """
{'loss': ['categorical_crossentropy'],
'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.0001, 0.1]} ],
'metrics': ['accuracy']}
"""
generate_mst = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_grid,
self.fit_params_grid,
'random',
9,
None
)
self.assertEqual(9, len(generate_mst.msts))
def test_invalid_input_args(self):
with self.assertRaises(plpy.PLPYException):
generate_mst = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_grid,
self.fit_params_grid,
self.search_type,
8
)
with self.assertRaises(plpy.PLPYException):
generate_mst = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_grid,
self.fit_params_grid,
self.search_type,
8,
42,
self.object_table
)
with self.assertRaises(plpy.PLPYException):
generate_mst = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_grid,
self.fit_params_grid,
'random'
)
with self.assertRaises(plpy.PLPYException):
generate_mst = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_grid,
self.fit_params_grid,
'random',
None,
19
)
with self.assertRaises(plpy.PLPYException):
generate_mst = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
[-3],
self.compile_params_grid,
self.fit_params_grid,
'random',
None,
19
)
self.compile_params_grid = """
{'losss': ['categorical_crossentropy'],
'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], 'lr': [0.0001, 0.1]} ],
'metrics': ['accuracy']}
"""
self.fit_params_grid = """
{'batch_size': [8, 32], 'epchs': [1, 2]}
"""
with self.assertRaises(plpy.PLPYException):
generate_mst = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_grid,
self.fit_params_grid
)
def test_duplicate_params(self):
self.model_id_list = [1, 1, 2]
generate_mst = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_grid,
self.fit_params_grid
)
self.assertEqual(32, len(generate_mst.msts))
generate_mst = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_grid,
self.fit_params_grid,
'random',
17
)
self.assertEqual(17, len(generate_mst.msts))
def test_array_inference(self):
# as literal_eval can only parse python datatypes
lr_lst = repr(list(np.random.uniform(0.001, 0.1, 3)))
self.compile_params_grid = "{'loss': ['categorical_crossentropy'], " \
"'optimizer_params_list': [ {'optimizer': ['Adam', 'SGD'], " \
"'lr': " + str(lr_lst) + "} ], " \
"'metrics': ['accuracy']}"
generate_mst1 = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_grid,
self.fit_params_grid
)
self.assertEqual(48, len(generate_mst1.msts))
def test_output_types(self):
generate_mst1 = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_grid,
self.fit_params_grid,
'grid'
)
for d1 in generate_mst1.msts:
self.assertEqual("loss='categorical_crossentropy'" in d1['compile_params'], True)
generate_mst2 = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_grid,
self.fit_params_grid,
'random',
6,
47
)
for d2 in generate_mst2.msts:
self.assertEqual("loss='categorical_crossentropy'" in d2['compile_params'], True)
def test_seed_result_reproducibility(self):
generate_mst1 = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_grid,
self.fit_params_grid,
'random',
6,
47
)
generate_mst2 = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_grid,
self.fit_params_grid,
'random',
6,
47
)
generate_mst3 = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_grid,
self.fit_params_grid,
'random',
6,
47
)
self.assertEqual(generate_mst1.msts, generate_mst2.msts, generate_mst3.msts)
def test_multiple_optimizer_configs(self):
self.compile_params_grid = """
{'loss': ['categorical_crossentropy'],
'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'], 'lr': [0.9, 0.95]},
{'optimizer': ['Adam', 'SGD']} ],
'metrics': ['accuracy']}
"""
generate_mst1 = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_grid,
self.fit_params_grid,
'gr'
)
self.assertEqual(48, len(generate_mst1.msts))
self.compile_params_grid = """
{'loss': ['categorical_crossentropy'],
'optimizer_params_list': [ {'optimizer': ['Adagrad', 'Adam'], 'lr': [0.9, 0.95, 'log'],
'epsilon': [0.3, 0.5, 'log_near_one']},
{'optimizer': ['Adam', 'SGD'], 'lr': [0.6, 0.65, 'log']} ],
'metrics': ['accuracy']}
"""
generate_mst2 = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_grid,
self.fit_params_grid,
'rand',
6,
6
)
self.assertEqual(6, len(generate_mst2.msts))
def tearDown(self):
self.module_patcher.stop()
class LoadModelSelectionTableTestCase(unittest.TestCase):
def setUp(self):
# The side effects of this class(writing to the output table) are not
# tested here. They are tested in dev-check.
self.plpy_mock = Mock(spec='error')
patches = {
'plpy': plpy,
'utilities.mean_std_dev_calculator': Mock()
}
self.plpy_mock_execute = MagicMock()
plpy.execute = self.plpy_mock_execute
self.module_patcher = patch.dict('sys.modules', patches)
self.module_patcher.start()
import deep_learning.madlib_keras_model_selection
self.module = deep_learning.madlib_keras_model_selection
self.module.MstLoaderInputValidator._validate_input_args = \
MagicMock()
self.subject = self.module.MstLoader
self.madlib_schema = 'mad'
self.model_selection_table = 'mst_table'
self.model_arch_table = 'model_arch_library'
self.object_table = 'custom_function_table'
self.model_id_list = [1]
self.compile_params_list = [
"""
loss='categorical_crossentropy',
optimizer='Adam(lr=0.1)',
metrics=['accuracy']
""",
"""
loss='categorical_crossentropy',
optimizer='Adam(lr=0.01)',
metrics=['accuracy']
""",
"""
loss='categorical_crossentropy',
optimizer='Adam(lr=0.001)',
metrics=['accuracy']
"""
]
self.fit_params_list = [
"batch_size=5,epochs=1",
"batch_size=10,epochs=1"
]
def test_mst_table_dimension(self):
generate_mst = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_list,
self.fit_params_list
)
self.assertEqual(6, len(generate_mst.msts))
def test_invalid_input_args(self):
self.module.MstLoaderInputValidator \
._validate_input_args \
.side_effect = plpy.PLPYException('Invalid input args')
with self.assertRaises(plpy.PLPYException):
generate_mst = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_list,
self.fit_params_list
)
def test_invalid_input_args_optional_param(self):
self.module.MstLoaderInputValidator \
._validate_input_args \
.side_effect = plpy.PLPYException('Invalid input args')
with self.assertRaises(plpy.PLPYException):
generate_mst = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_list,
self.fit_params_list,
"invalid_table"
)
def test_duplicate_params(self):
self.model_id_list = [1, 1, 2]
self.compile_params_list = [
"""
loss='categorical_crossentropy',
optimizer='Adam(lr=0.1)',
metrics=['accuracy']
""",
"""
loss='categorical_crossentropy',
optimizer='Adam(lr=0.1)',
metrics=['accuracy']
""",
"""
loss='categorical_crossentropy',
optimizer='Adam(lr=0.001)',
metrics=['accuracy']
"""
]
self.fit_params_list = [
"batch_size= 5,epochs=1",
"epochs=1 ,batch_size=5",
"batch_size=10,epochs =1"
]
generate_mst = self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_id_list,
self.compile_params_list,
self.fit_params_list
)
self.assertEqual(8, len(generate_mst.msts))
def tearDown(self):
self.module_patcher.stop()
class MstLoaderInputValidatorTestCase(unittest.TestCase):
def setUp(self):
# The side effects of this class(writing to the output table) are not
# tested here. They are tested in dev-check.
self.plpy_mock = Mock(spec='error')
patches = {
'plpy': plpy,
'utilities.mean_std_dev_calculator': Mock()
}
self.plpy_mock_execute = MagicMock()
plpy.execute = self.plpy_mock_execute
self.module_patcher = patch.dict('sys.modules', patches)
self.module_patcher.start()
import deep_learning.madlib_keras_validator
self.module = deep_learning.madlib_keras_validator
self.subject = self.module.MstLoaderInputValidator
self.madlib_schema = 'mad'
self.model_selection_table = 'mst_table'
self.model_arch_table = 'model_arch_library'
self.model_arch_summary_table = 'model_arch_library_summary'
self.object_table = 'custom_function_table'
self.model_id_list = [1]
self.compile_params_list = [
"""
loss='categorical_crossentropy',
optimizer='Adam(lr=0.1)',
metrics=['accuracy']
""",
"""
loss='categorical_crossentropy',
optimizer='Adam(lr=0.01)',
metrics=['accuracy']
""",
"""
loss='categorical_crossentropy',
optimizer='Adam(lr=0.001)',
metrics=['accuracy']
"""
]
self.fit_params_list = [
"batch_size=5,epochs=1",
"batch_size=10,epochs=1"
]
def test_validate_compile_params_no_custom_fn_table(self):
self.subject._validate_input_output_tables = Mock()
self.subject._validate_model_ids = Mock()
self.subject.parse_and_validate_fit_params = Mock()
self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_arch_summary_table,
self.model_id_list,
self.compile_params_list,
self.fit_params_list,
None
)
def test_test_validate_compile_params_custom_fn_table(self):
self.subject._validate_input_output_tables = Mock()
self.subject._validate_model_ids = Mock()
self.subject.parse_and_validate_fit_params = Mock()
self.plpy_mock_execute.side_effect = [[{'name': 'custom_fn1'},
{'name': 'custom_fn2'}]]
self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_arch_summary_table,
self.model_id_list,
self.compile_params_list,
self.fit_params_list,
self.object_table
)
def test_test_validate_compile_params_valid_custom_fn(self):
self.subject._validate_input_output_tables = Mock()
self.subject._validate_model_ids = Mock()
self.subject.parse_and_validate_fit_params = Mock()
self.plpy_mock_execute.side_effect = [[{'name': 'custom_fn1'},
{'name': 'custom_fn2'}]]
self.compile_params_list_valid_custom_fn = [
"""
loss='custom_fn1',
optimizer='Adam(lr=0.1)',
metrics=['accuracy']
"""
]
self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_arch_summary_table,
self.model_id_list,
self.compile_params_list_valid_custom_fn,
self.fit_params_list,
self.object_table
)
def test_test_validate_compile_params_valid_custom_fn_missing_obj_tbl(self):
self.subject._validate_input_output_tables = Mock()
self.subject._validate_model_ids = Mock()
self.subject.parse_and_validate_fit_params = Mock()
self.plpy_mock_execute.side_effect = [[{'name': 'custom_fn1'},
{'name': 'custom_fn2'}]]
self.compile_params_list_valid_custom_fn = [
"""
loss='custom_fn1',
optimizer='Adam(lr=0.1)',
metrics=['accuracy']
"""
]
with self.assertRaises(plpy.PLPYException) as error:
self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_arch_summary_table,
self.model_id_list,
self.compile_params_list_valid_custom_fn,
self.fit_params_list,
None
)
self.assertIn("object table missing", str(error.exception).lower())
def test_test_validate_compile_params_missing_loss_fn(self):
self.subject._validate_input_output_tables = Mock()
self.subject._validate_model_ids = Mock()
self.subject.parse_and_validate_fit_params = Mock()
self.plpy_mock_execute.side_effect = [[{'name': 'custom_fn1'},
{'name': 'custom_fn2'}]]
self.compile_params_list_invalid_loss_fn = [
"""
loss='invalid_loss',
optimizer='Adam(lr=0.1)',
metrics=['accuracy']
"""
]
with self.assertRaises(plpy.PLPYException) as error:
self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_arch_summary_table,
self.model_id_list,
self.compile_params_list_invalid_loss_fn,
self.fit_params_list,
self.object_table
)
self.assertIn("invalid_loss", str(error.exception).lower())
def test_test_validate_compile_params_missing_metric_fn(self):
self.subject._validate_input_output_tables = Mock()
self.subject._validate_model_ids = Mock()
self.subject.parse_and_validate_fit_params = Mock()
self.plpy_mock_execute.side_effect = [[{'name': 'custom_fn1'},
{'name': 'custom_fn2'}]]
self.compile_params_list_invalid_metric_fn = [
"""
loss='custom_fn1',
optimizer='Adam(lr=0.1)',
metrics=['invalid_metrics']
"""
]
with self.assertRaises(plpy.PLPYException) as error:
self.subject(
self.madlib_schema,
self.model_selection_table,
self.model_arch_table,
self.model_arch_summary_table,
self.model_id_list,
self.compile_params_list_invalid_metric_fn,
self.fit_params_list,
self.object_table
)
self.assertIn("invalid_metrics", str(error.exception).lower())
def tearDown(self):
self.module_patcher.stop()
if __name__ == '__main__':
unittest.main()
# ---------------------------------------------------------------------