Disallow 'deserialization', 'serialization', & 'get' loss and metrics
Also:
- Remove whitelisting of any metrics containing the string
"top_k_categorical_accuracy". This is already a builtin metric, and
would compromise security if we allowed arbitrary python code
containing this string to be passed along to keras.
- Remove elements which start with an underscore from list of builtins.
- Avoid using metrics[2:-2] which assumes first 2 characters are [' or
[" and '] or "]. This prevents sneaky inputs like metrics=[*__builtins__ ]
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in
index 32a5757..f2f06d6 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in
@@ -168,13 +168,26 @@
sql = "DROP TABLE {0}".format(object_table)
plpy.execute(sql, 0)
+dangerous_builtins = set(('serialize', 'deserialize', 'get'))
+
def update_builtin_metrics(builtin_metrics):
builtin_metrics.append('accuracy')
builtin_metrics.append('acc')
builtin_metrics.append('crossentropy')
builtin_metrics.append('ce')
+
+ builtin_metrics = [ b for b in builtin_metrics \
+ if not b.startswith('_') and \
+ b not in dangerous_builtins ]
+
return builtin_metrics
+def update_builtin_losses(builtin_losses):
+ builtin_losses = [ b for b in builtin_losses \
+ if not b.startswith('_') and \
+ b not in dangerous_builtins ]
+ return builtin_losses
+
@MinWarning("error")
def load_top_k_accuracy_function(schema_madlib, object_table, k, **kwargs):
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
index 2db346e..aa88fbe 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
@@ -353,7 +353,7 @@
DEBUG.print_timing('eval_model_total')
def populate_object_map(self):
- builtin_losses = dir(losses)
+ builtin_losses = update_builtin_losses(dir(losses))
builtin_metrics = update_builtin_metrics(dir(metrics))
# Track distinct custom functions in compile_params
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
index ab8d336..de5c63d 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
@@ -49,6 +49,8 @@
from utilities.validate_args import output_tbl_valid
from madlib_keras_wrapper import parse_and_validate_fit_params
from madlib_keras_wrapper import parse_and_validate_compile_params
+from madlib_keras_custom_function import update_builtin_metrics
+from madlib_keras_custom_function import update_builtin_losses
import tensorflow.keras.losses as losses
import tensorflow.keras.metrics as metrics
@@ -541,18 +543,18 @@
""".format(fit_params, str(e)))
if not self.compile_params_list:
plpy.error( "compile_params_list cannot be NULL")
- custom_fn_name = []
- ## Initialize builtin loss/metrics functions
- builtin_losses = dir(losses)
- builtin_metrics = dir(metrics)
- # Default metrics, since it is not part of the builtin metrics list
- builtin_metrics.append('accuracy')
+ custom_fn_names = []
+
+ # Initialize builtin loss/metrics functions
+ builtin_losses = update_builtin_losses(dir(losses))
+ builtin_metrics = update_builtin_metrics(dir(metrics))
+
if self.object_table is not None:
res = plpy.execute("SELECT {0} from {1}".format(CustomFunctionSchema.FN_NAME,
self.object_table))
for r in res:
- custom_fn_name.append(r[CustomFunctionSchema.FN_NAME])
+ custom_fn_names.append(r[CustomFunctionSchema.FN_NAME])
for compile_params in self.compile_params_list:
try:
_, _, res = parse_and_validate_compile_params(compile_params)
@@ -563,11 +565,11 @@
if self.object_table is not None:
error_suffix = "is not defined in object table '{0}'!".format(self.object_table)
- _assert(res['loss'] in custom_fn_name or res['loss'] in builtin_losses,
+ _assert(res['loss'] in custom_fn_names or res['loss'] in builtin_losses,
"custom function '{0}' used in compile params "\
"{1}".format(res['loss'], error_suffix))
if 'metrics' in res:
- _assert((len(set(res['metrics']).intersection(custom_fn_name)) > 0
+ _assert((len(set(res['metrics']).intersection(custom_fn_names)) > 0
or len(set(res['metrics']).intersection(builtin_metrics)) > 0),
"custom function '{0}' used in compile params " \
"{1}".format(res['metrics'], error_suffix))
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
index c23f8d3..e3f9f01 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
@@ -28,6 +28,7 @@
import madlib_keras_gpu_info
from madlib_keras_custom_function import CustomFunctionSchema
from madlib_keras_custom_function import update_builtin_metrics
+from madlib_keras_custom_function import update_builtin_losses
from utilities.utilities import _assert
from utilities.utilities import is_platform_pg
@@ -228,14 +229,14 @@
literal_eval_compile_params,
accepted_compile_params)
if len(additional_params) == 0:
- # optimizer is not a required parameter for keras compile
+ # optimizer is a required parameter for keras compile
_assert('optimizer' in compile_dict, "optimizer is a required parameter for compile")
opt_name, opt_args = parse_optimizer(compile_dict)
else:
opt_name, opt_args = None, None
_assert('loss' in compile_dict, "loss is a required parameter for compile")
- unsupported_loss_list = ['sparse_categorical_crossentropy']
+ unsupported_loss_list = ['sparse_categorical_crossentropy', 'serialize', 'deserialize', 'get' ]
_assert(compile_dict['loss'] not in unsupported_loss_list,
"Loss function {0} is not supported.".format(compile_dict['loss']))
validate_compile_param_types(compile_dict)
@@ -251,7 +252,10 @@
if 'metrics' in compile_dict and compile_dict['metrics']:
unsupported_metrics_list = ['sparse_categorical_accuracy',
'sparse_categorical_crossentropy',
- 'sparse_top_k_categorical_accuracy']
+ 'sparse_top_k_categorical_accuracy',
+ 'serialize'
+ 'deserialize'
+ 'get']
_assert(len(compile_dict['metrics']) == 1,
"Only one metric at a time is supported.")
_assert(compile_dict['metrics'][0] not in unsupported_metrics_list,
@@ -474,16 +478,23 @@
"""
compile_dict = convert_string_of_args_to_dict(compile_params)
- builtin_losses = dir(losses)
+ builtin_losses = update_builtin_losses(dir(losses))
builtin_metrics = update_builtin_metrics(dir(metrics))
custom_fn_list = []
+
local_loss = compile_dict['loss'].lower() if 'loss' in compile_dict else None
- local_metric = compile_dict['metrics'].lower()[2:-2] if 'metrics' in compile_dict else None
+ try:
+ metrics_list = ast.literal_eval(compile_dict['metrics']) \
+ if 'metrics' in compile_dict else []
+ except ValueError:
+ plpy.error(("Invalid input value for parameter {0}, "
+ "please refer to the documentation").format(compile_dict['metrics']))
+ local_metric = metrics_list[0].lower() if (len(metrics_list) > 0) else None
+
if local_loss and (local_loss not in [a.lower() for a in builtin_losses]):
custom_fn_list.append(local_loss)
if local_metric and (local_metric not in [a.lower() for a in builtin_metrics]):
- if 'top_k_categorical_accuracy' not in local_metric:
- custom_fn_list.append(local_metric)
+ custom_fn_list.append(local_metric)
return custom_fn_list