blob: 8fc009fe4652947fbbde10e307fd2584c9bd1782 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import csv
import os
import sys
import numpy as np
import mxnet as mx
mx.test_utils.set_default_device(mx.gpu(0))
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
sys.path.insert(0, os.path.join(curr_path, '../unittest'))
# We import all tests from ../unittest/test_profiler.py
# They will be detected by test framework, as long as the current file has a different filename
from test_profiler import *
@pytest.mark.skip(reason='https://github.com/apache/incubator-mxnet/issues/18564')
def test_gpu_memory_profiler_symbolic():
enable_profiler('test_profiler.json')
profiler.set_state('run')
with profiler.scope("tensordot"):
A = mx.sym.Variable('A')
B = mx.sym.Variable('B')
C = mx.symbol.dot(A, B, name="dot")
executor = C._simple_bind(mx.gpu(), 'write', A=(1024, 2048), B=(2048, 4096))
with profiler.scope("init"):
a = mx.random.uniform(-1.0, 1.0, shape=(1024, 2048))
b = mx.random.uniform(-1.0, 1.0, shape=(2048, 4096))
a.copyto(executor.arg_dict['A'])
b.copyto(executor.arg_dict['B'])
executor.forward()
executor.backward()
c = executor.outputs[0]
mx.nd.waitall()
profiler.set_state('stop')
profiler.dump(True)
expected_alloc_entries = [
{'Attribute Name' : 'tensordot:in_arg:A',
'Requested Size' : str(4 * a.size)},
{'Attribute Name' : 'tensordot:in_arg:B',
'Requested Size' : str(4 * b.size)},
{'Attribute Name' : 'tensordot:dot',
'Requested Size' : str(4 * c.size)},
{'Attribute Name' : 'tensordot:dot_backward',
'Requested Size' : str(4 * a.size)},
{'Attribute Name' : 'tensordot:dot_backward',
'Requested Size' : str(4 * b.size)},
{'Attribute Name' : 'init:_random_uniform',
'Requested Size' : str(4 * a.size)},
{'Attribute Name' : 'init:_random_uniform',
'Requested Size' : str(4 * b.size)}]
# Sample gpu_memory_profile.csv:
# "Attribute Name","Requested Size","Device","Actual Size","Reuse?"
# <unk>:_head_grad_0,16777216,0,16777216,0
# init:_random_uniform,33554432,0,33554432,1
# init:_random_uniform,8388608,0,8388608,1
# resource:temp_space (sample_op.h +365),8,0,4096,0
# symbol:arg_grad:unknown,8388608,0,8388608,0
# symbol:arg_grad:unknown,33554432,0,33554432,0
# tensordot:dot,16777216,0,16777216,0
# tensordot:dot_backward,8388608,0,8388608,0
# tensordot:dot_backward,33554432,0,33554432,0
# tensordot:in_arg:A,8388608,0,8388608,0
# tensordot:in_arg:B,33554432,0,33554432,0
with open(f'gpu_memory_profile-pid_{os.getpid()}.csv', mode='r') as csv_file:
csv_reader = csv.DictReader(csv_file)
for row in csv_reader:
print(",".join(list(row.values())))
for expected_alloc_entry in expected_alloc_entries:
csv_file.seek(0)
entry_found = False
for row in csv_reader:
if row['Attribute Name'] == expected_alloc_entry['Attribute Name'] and \
row['Requested Size'] == expected_alloc_entry['Requested Size']:
entry_found = True
break
assert entry_found, \
"Entry for (attr_name={}, alloc_size={}) has not been found" \
.format(expected_alloc_entry['Attribute Name'],
expected_alloc_entry['Requested Size'])
# Make sure that there is no unknown allocation entry.
csv_file.seek(0)
for row in csv_reader:
if row['Attribute Name'] == "<unk>:unknown" or \
row['Attribute Name'] == "<unk>:":
assert False, "Unknown allocation entry has been encountered"
@pytest.mark.skip(reason='https://github.com/apache/incubator-mxnet/issues/18564')
def test_gpu_memory_profiler_gluon():
enable_profiler(profile_filename='test_profiler.json')
profiler.set_state('run')
model = nn.HybridSequential()
model.add(nn.Dense(128, activation='tanh'))
model.add(nn.Dropout(0.5))
model.add(nn.Dense(64, activation='tanh'),
nn.Dense(32, in_units=64))
model.add(nn.Activation('relu'))
model.initialize(device=mx.gpu())
model.hybridize()
with mx.autograd.record():
out = model(mx.np.zeros((16, 10), device=mx.gpu()))
out.backward()
mx.npx.waitall()
profiler.set_state('stop')
profiler.dump(True)
# Sample gpu_memory_profile.csv:
# "Attribute Name","Requested Size","Device","Actual Size","Reuse?"
# <unk>:in_arg:data,640,0,4096,0
# hybridsequential:activation0:hybridsequential_activation0_fwd,2048,0,4096,0
# hybridsequential:activation0:hybridsequential_activation0_fwd_backward,8192,0,8192,0
# hybridsequential:activation0:hybridsequential_activation0_fwd_head_grad,2048,0,4096,0
# hybridsequential:dense0:activation0:hybridsequential_dense0_activation0_fwd,8192,0,8192,0
# hybridsequential:dense0:arg_grad:bias,512,0,4096,0
# hybridsequential:dense0:arg_grad:weight,5120,0,8192,0
# hybridsequential:dense0:hybridsequential_dense0_fwd,8192,0,8192,0
# hybridsequential:dense0:in_arg:bias,512,0,4096,0
# hybridsequential:dense0:in_arg:weight,5120,0,8192,0
# hybridsequential:dense1:activation0:hybridsequential_dense1_activation0_fwd,4096,0,4096,0
# hybridsequential:dense1:arg_grad:bias,256,0,4096,0
# hybridsequential:dense1:arg_grad:weight,32768,0,32768,0
# hybridsequential:dense1:hybridsequential_dense1_fwd,4096,0,4096,0
# hybridsequential:dense1:in_arg:bias,256,0,4096,0
# hybridsequential:dense1:in_arg:weight,32768,0,32768,0
# hybridsequential:dense2:arg_grad:bias,128,0,4096,0
# hybridsequential:dense2:arg_grad:weight,8192,0,8192,0
# hybridsequential:dense2:hybridsequential_dense2_fwd_backward,4096,0,4096,1
# hybridsequential:dense2:in_arg:bias,128,0,4096,0
# hybridsequential:dense2:in_arg:weight,8192,0,8192,0
# hybridsequential:dropout0:hybridsequential_dropout0_fwd,8192,0,8192,0
# hybridsequential:dropout0:hybridsequential_dropout0_fwd,8192,0,8192,0
# resource:cudnn_dropout_state (dropout-inl.h +256),1474560,0,1474560,0
# resource:temp_space (fully_connected-inl.h +316),15360,0,16384,0
# We are only checking for weight parameters here, also making sure that
# there is no unknown entries in the memory profile.
with open(f'gpu_memory_profile-pid_{os.getpid()}.csv', mode='r') as csv_file:
csv_reader = csv.DictReader(csv_file)
for row in csv_reader:
print(",".join(list(row.values())))
for param in model.collect_params().values():
expected_arg_name = f"{param.var().attr('__profiler_scope__')}in_arg:" + \
param.name
expected_arg_size = str(4 * np.prod(param.shape))
csv_file.seek(0)
entry_found = False
for row in csv_reader:
if row['Attribute Name'] == expected_arg_name and \
row['Requested Size'] == expected_arg_size:
entry_found = True
break
assert entry_found, \
"Entry for (attr_name={}, alloc_size={}) has not been found" \
.format(expected_arg_name,
expected_arg_size)
# Make sure that there is no unknown allocation entry.
csv_file.seek(0)
for row in csv_reader:
if row['Attribute Name'] == "<unk>:unknown" or \
row['Attribute Name'] == "<unk>:":
assert False, "Unknown allocation entry has been encountered"