blob: 8593aa0bcda3801297c4608dcc7d906a1fc26571 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Performance benchmark tests for MXNet NDArray Miscellaneous Operations.
Below 16 Miscellaneous Operators are covered:
['reset_arrays', 'multi_all_finite', 'multi_sum_sq', 'add_n', 'UpSampling', 'Custom', 'squeeze',
'all_finite', 'clip', 'multi_lars', 'SequenceReverse', 'SequenceLast', 'SequenceMask', 'cast_storage',
'cumsum', 'fill_element_0index']
"""
import mxnet as mx
from benchmark.opperf.utils.benchmark_utils import run_op_benchmarks
from benchmark.opperf.utils.op_registry_utils import get_remaining_miscellaneous_operators
from benchmark.opperf.utils.benchmark_utils import run_performance_test
from benchmark.opperf.utils.common_utils import merge_map_list
from benchmark.opperf.rules.default_params import MX_OP_MODULE
from benchmark.opperf.custom_operations.custom_operations import CustomAddOneProp
def run_mx_misc_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype) for all the miscellaneous
operators in MXNet.
Parameters
----------
ctx: mx.ctx
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
int64_tensor: str, default 'off'
Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
Number of runs to capture benchmark results
Returns
-------
Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.
"""
standard_inputs_array_ops = [{"args": [(1024, 1024)],
"num_arrays": 1},
{"args": [(10000, 1)],
"num_arrays": 1},
{"args": [(10000, 10)],
"num_arrays": 1}]
int64_tensor_inputs_array_ops = [{"args": [(2**32, 1)],
"num_arrays":1}]
standard_inputs_add_n = [{"args": [(1024, 1024)]},
{"args": [(10000, 1)]},
{"args": [(10000, 10)]}]
int64_tensor_inputs_add_n = [{"args": [(2**16, 2**16)]}]
standard_inputs_upsampling = [{"args": (32, 3, 256, 256),
"scale": 2,
"sample_type": "nearest"},
{"args": (32, 3, 10000, 1),
"scale": 4,
"sample_type": "nearest"}]
int64_tensor_inputs_upsampling = [{"args": (2**32 + 1, 1, 1, 1),
"scale": 2,
"sample_type": "nearest"}]
standard_inputs_custom = [{"args": [(1024, 1024)],
"op_type": "CustomAddOne"},
{"args": [(10000, 1)],
"op_type": "CustomAddOne"},
{"args": [(10000, 10)],
"op_type": "CustomAddOne"}]
int64_tensor_inputs_custom = [{"args": [(2**32 + 1, 1)],
"op_type": "CustomAddOne"}]
if int64_tensor == 'on':
inputs_array_ops = int64_tensor_inputs_array_ops
inputs_add_n = int64_tensor_inputs_add_n
inputs_upsampling = int64_tensor_inputs_upsampling
inputs_custom = int64_tensor_inputs_custom
else:
inputs_array_ops = standard_inputs_array_ops
inputs_add_n = standard_inputs_add_n
inputs_upsampling = standard_inputs_upsampling
inputs_custom = standard_inputs_custom
# Individual tests for ops with positional args
array_ops_benchmark = run_performance_test([getattr(MX_OP_MODULE, "reset_arrays"),
getattr(MX_OP_MODULE, "multi_all_finite"),
getattr(MX_OP_MODULE, "multi_sum_sq")],
run_backward=False,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=inputs_array_ops,
warmup=warmup,
runs=runs)
add_n_benchmark = run_performance_test([getattr(MX_OP_MODULE, "add_n")],
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=inputs_add_n,
warmup=warmup,
runs=runs)
# There are currently issus with UpSampling with bilinear interpolation.
# track issue here: https://github.com/apache/mxnet/issues/9138
upsampling_benchmark = run_performance_test([getattr(MX_OP_MODULE, "UpSampling")],
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=inputs_upsampling,
warmup=warmup,
runs=runs)
# Create and register CustomAddOne operator for use in Custom op testing
c = CustomAddOneProp()
c.create_operator(ctx, [(1024,1024)], [dtype])
custom_benchmark = run_performance_test([getattr(MX_OP_MODULE, "Custom")],
run_backward=True,
dtype=dtype,
ctx=ctx,
profiler=profiler,
inputs=inputs_custom,
warmup=warmup,
runs=runs)
# Fetch remaining Miscellaneous Operators
mx_misc_ops = get_remaining_miscellaneous_operators()
# Run benchmarks
mx_misc_op_results = run_op_benchmarks(mx_misc_ops, dtype, ctx, profiler, int64_tensor, warmup, runs)
return merge_map_list(array_ops_benchmark + add_n_benchmark + upsampling_benchmark + custom_benchmark + [mx_misc_op_results])