blob: 5b3d92bc35bb42c07e1381f39960a3f572c51919 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import ctypes
from mxnet.test_utils import *
import os
import time
import argparse
from mxnet.base import check_call, _LIB
parser = argparse.ArgumentParser(description="Benchmark cast storage operators",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--num-omp-threads', type=int, default=1, help='number of omp threads to set in MXNet')
args = parser.parse_args()
def measure_cost(repeat, f, *args, **kwargs):
start = time.time()
results = []
for i in range(repeat):
(f(*args, **kwargs)).wait_to_read()
end = time.time()
diff = end - start
return diff / repeat
def run_cast_storage_synthetic():
def dense_to_sparse(m, n, density, ctx, repeat, stype):
set_default_device(ctx)
data_shape = (m, n)
dns_data = rand_ndarray(data_shape, stype, density).tostype('default')
dns_data.wait_to_read()
# do one warm up run, verify correctness
assert same(mx.nd.cast_storage(dns_data, stype).asnumpy(), dns_data.asnumpy())
# start benchmarking
cost = measure_cost(repeat, mx.nd.cast_storage, dns_data, stype)
results = f'{density*100:10.1f} {str(ctx):>10} {m:8d} {n:8d} {cost * 1000:10.2f}'
print(results)
check_call(_LIB.MXSetNumOMPThreads(ctypes.c_int(args.num_omp_threads)))
# params
# m number of rows
# n number of columns
# density density of the matrix
# num_repeat number of benchmark runs to average over
# contexts mx.cpu(), mx.gpu()
# note: benchmark different contexts separately; to benchmark cpu, compile without CUDA
# benchmarks dns_to_csr, dns_to_rsp
m = [ 512, 512]
n = [50000, 100000]
density = [1.00, 0.80, 0.60, 0.40, 0.20, 0.10, 0.05, 0.02, 0.01]
num_repeat = 10
contexts = [mx.gpu()]
benchmarks = ["dns_to_csr", "dns_to_rsp"]
# run benchmark
for b in benchmarks:
stype = ''
print("==================================================")
if b is "dns_to_csr":
stype = 'csr'
print(" cast_storage benchmark: dense to csr, size m x n ")
elif b is "dns_to_rsp":
stype = 'row_sparse'
print(" cast_storage benchmark: dense to rsp, size m x n ")
else:
print(f"invalid benchmark: {b}")
continue
print("==================================================")
headline = f"{'density(%)':>10} {'context':>10} {'m':>8} {'n':>8} {'time(ms)':>10}"
print(headline)
for i in range(len(n)):
for ctx in contexts:
for den in density:
dense_to_sparse(m[i], n[i], den, ctx, num_repeat, stype)
print("")
print("")
if __name__ == "__main__":
run_cast_storage_synthetic()