| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| import ctypes |
| |
| from mxnet.test_utils import * |
| import os |
| import time |
| import argparse |
| |
| from mxnet.base import check_call, _LIB |
| |
| parser = argparse.ArgumentParser(description="Benchmark cast storage operators", |
| formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
| parser.add_argument('--num-omp-threads', type=int, default=1, help='number of omp threads to set in MXNet') |
| args = parser.parse_args() |
| |
| def measure_cost(repeat, f, *args, **kwargs): |
| start = time.time() |
| results = [] |
| for i in range(repeat): |
| (f(*args, **kwargs)).wait_to_read() |
| end = time.time() |
| diff = end - start |
| return diff / repeat |
| |
| |
| def run_cast_storage_synthetic(): |
| def dense_to_sparse(m, n, density, ctx, repeat, stype): |
| set_default_device(ctx) |
| data_shape = (m, n) |
| dns_data = rand_ndarray(data_shape, stype, density).tostype('default') |
| dns_data.wait_to_read() |
| |
| # do one warm up run, verify correctness |
| assert same(mx.nd.cast_storage(dns_data, stype).asnumpy(), dns_data.asnumpy()) |
| |
| # start benchmarking |
| cost = measure_cost(repeat, mx.nd.cast_storage, dns_data, stype) |
| results = f'{density*100:10.1f} {str(ctx):>10} {m:8d} {n:8d} {cost * 1000:10.2f}' |
| print(results) |
| |
| check_call(_LIB.MXSetNumOMPThreads(ctypes.c_int(args.num_omp_threads))) |
| |
| # params |
| # m number of rows |
| # n number of columns |
| # density density of the matrix |
| # num_repeat number of benchmark runs to average over |
| # contexts mx.cpu(), mx.gpu() |
| # note: benchmark different contexts separately; to benchmark cpu, compile without CUDA |
| # benchmarks dns_to_csr, dns_to_rsp |
| m = [ 512, 512] |
| n = [50000, 100000] |
| density = [1.00, 0.80, 0.60, 0.40, 0.20, 0.10, 0.05, 0.02, 0.01] |
| num_repeat = 10 |
| contexts = [mx.gpu()] |
| benchmarks = ["dns_to_csr", "dns_to_rsp"] |
| |
| # run benchmark |
| for b in benchmarks: |
| stype = '' |
| print("==================================================") |
| if b is "dns_to_csr": |
| stype = 'csr' |
| print(" cast_storage benchmark: dense to csr, size m x n ") |
| elif b is "dns_to_rsp": |
| stype = 'row_sparse' |
| print(" cast_storage benchmark: dense to rsp, size m x n ") |
| else: |
| print(f"invalid benchmark: {b}") |
| continue |
| print("==================================================") |
| headline = f"{'density(%)':>10} {'context':>10} {'m':>8} {'n':>8} {'time(ms)':>10}" |
| print(headline) |
| for i in range(len(n)): |
| for ctx in contexts: |
| for den in density: |
| dense_to_sparse(m[i], n[i], den, ctx, num_repeat, stype) |
| print("") |
| print("") |
| |
| |
| if __name__ == "__main__": |
| run_cast_storage_synthetic() |