tests/python/gpu/test_profiler_gpu.py - mxnet - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 import csv
 import os
 import sys

 import numpy as np
 import mxnet as mx
 mx.test_utils.set_default_device(mx.gpu(0))

 curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
 sys.path.insert(0, os.path.join(curr_path, '../unittest'))
 # We import all tests from ../unittest/test_profiler.py
 # They will be detected by test framework, as long as the current file has a different filename
 from test_profiler import *

 @pytest.mark.skip(reason='https://github.com/apache/incubator-mxnet/issues/18564')
 def test_gpu_memory_profiler_symbolic():
     enable_profiler('test_profiler.json')
     profiler.set_state('run')

     with profiler.scope("tensordot"):
         A = mx.sym.Variable('A')
         B = mx.sym.Variable('B')
         C = mx.symbol.dot(A, B, name="dot")

     executor = C._simple_bind(mx.gpu(), 'write', A=(1024, 2048), B=(2048, 4096))

     with profiler.scope("init"):
         a = mx.random.uniform(-1.0, 1.0, shape=(1024, 2048))
         b = mx.random.uniform(-1.0, 1.0, shape=(2048, 4096))

     a.copyto(executor.arg_dict['A'])
     b.copyto(executor.arg_dict['B'])

     executor.forward()
     executor.backward()
     c = executor.outputs[0]
     mx.nd.waitall()
     profiler.set_state('stop')
     profiler.dump(True)

     expected_alloc_entries = [
             {'Attribute Name' : 'tensordot:in_arg:A',
              'Requested Size' : str(4 * a.size)},
             {'Attribute Name' : 'tensordot:in_arg:B',
              'Requested Size' : str(4 * b.size)},
             {'Attribute Name' : 'tensordot:dot',
              'Requested Size' : str(4 * c.size)},
             {'Attribute Name' : 'tensordot:dot_backward',
              'Requested Size' : str(4 * a.size)},
             {'Attribute Name' : 'tensordot:dot_backward',
              'Requested Size' : str(4 * b.size)},
             {'Attribute Name' : 'init:_random_uniform',
              'Requested Size' : str(4 * a.size)},
             {'Attribute Name' : 'init:_random_uniform',
              'Requested Size' : str(4 * b.size)}]

     # Sample gpu_memory_profile.csv:
     # "Attribute Name","Requested Size","Device","Actual Size","Reuse?"
     # <unk>:_head_grad_0,16777216,0,16777216,0
     # init:_random_uniform,33554432,0,33554432,1
     # init:_random_uniform,8388608,0,8388608,1
     # resource:temp_space (sample_op.h +365),8,0,4096,0
     # symbol:arg_grad:unknown,8388608,0,8388608,0
     # symbol:arg_grad:unknown,33554432,0,33554432,0
     # tensordot:dot,16777216,0,16777216,0
     # tensordot:dot_backward,8388608,0,8388608,0
     # tensordot:dot_backward,33554432,0,33554432,0
     # tensordot:in_arg:A,8388608,0,8388608,0
     # tensordot:in_arg:B,33554432,0,33554432,0

     with open(f'gpu_memory_profile-pid_{os.getpid()}.csv', mode='r') as csv_file:
         csv_reader = csv.DictReader(csv_file)
         for row in csv_reader:
             print(",".join(list(row.values())))
         for expected_alloc_entry in expected_alloc_entries:
             csv_file.seek(0)
             entry_found = False
             for row in csv_reader:
                 if row['Attribute Name'] == expected_alloc_entry['Attribute Name'] and \
                    row['Requested Size'] == expected_alloc_entry['Requested Size']:
                     entry_found = True
                     break
             assert entry_found, \
                     "Entry for (attr_name={}, alloc_size={}) has not been found" \
                     .format(expected_alloc_entry['Attribute Name'],
                             expected_alloc_entry['Requested Size'])
         # Make sure that there is no unknown allocation entry.
         csv_file.seek(0)
         for row in csv_reader:
             if row['Attribute Name'] == "<unk>:unknown" or \
                row['Attribute Name'] == "<unk>:":
                 assert False, "Unknown allocation entry has been encountered"


 @pytest.mark.skip(reason='https://github.com/apache/incubator-mxnet/issues/18564')
 def test_gpu_memory_profiler_gluon():
     enable_profiler(profile_filename='test_profiler.json')
     profiler.set_state('run')

     model = nn.HybridSequential()
     model.add(nn.Dense(128, activation='tanh'))
     model.add(nn.Dropout(0.5))
     model.add(nn.Dense(64, activation='tanh'),
               nn.Dense(32, in_units=64))
     model.add(nn.Activation('relu'))
     model.initialize(device=mx.gpu())
     model.hybridize()

     with mx.autograd.record():
         out = model(mx.np.zeros((16, 10), device=mx.gpu()))
     out.backward()
     mx.npx.waitall()
     profiler.set_state('stop')
     profiler.dump(True)

     # Sample gpu_memory_profile.csv:
     # "Attribute Name","Requested Size","Device","Actual Size","Reuse?"
     # <unk>:in_arg:data,640,0,4096,0
     # hybridsequential:activation0:hybridsequential_activation0_fwd,2048,0,4096,0
     # hybridsequential:activation0:hybridsequential_activation0_fwd_backward,8192,0,8192,0
     # hybridsequential:activation0:hybridsequential_activation0_fwd_head_grad,2048,0,4096,0
     # hybridsequential:dense0:activation0:hybridsequential_dense0_activation0_fwd,8192,0,8192,0
     # hybridsequential:dense0:arg_grad:bias,512,0,4096,0
     # hybridsequential:dense0:arg_grad:weight,5120,0,8192,0
     # hybridsequential:dense0:hybridsequential_dense0_fwd,8192,0,8192,0
     # hybridsequential:dense0:in_arg:bias,512,0,4096,0
     # hybridsequential:dense0:in_arg:weight,5120,0,8192,0
     # hybridsequential:dense1:activation0:hybridsequential_dense1_activation0_fwd,4096,0,4096,0
     # hybridsequential:dense1:arg_grad:bias,256,0,4096,0
     # hybridsequential:dense1:arg_grad:weight,32768,0,32768,0
     # hybridsequential:dense1:hybridsequential_dense1_fwd,4096,0,4096,0
     # hybridsequential:dense1:in_arg:bias,256,0,4096,0
     # hybridsequential:dense1:in_arg:weight,32768,0,32768,0
     # hybridsequential:dense2:arg_grad:bias,128,0,4096,0
     # hybridsequential:dense2:arg_grad:weight,8192,0,8192,0
     # hybridsequential:dense2:hybridsequential_dense2_fwd_backward,4096,0,4096,1
     # hybridsequential:dense2:in_arg:bias,128,0,4096,0
     # hybridsequential:dense2:in_arg:weight,8192,0,8192,0
     # hybridsequential:dropout0:hybridsequential_dropout0_fwd,8192,0,8192,0
     # hybridsequential:dropout0:hybridsequential_dropout0_fwd,8192,0,8192,0
     # resource:cudnn_dropout_state (dropout-inl.h +256),1474560,0,1474560,0
     # resource:temp_space (fully_connected-inl.h +316),15360,0,16384,0

     # We are only checking for weight parameters here, also making sure that
     # there is no unknown entries in the memory profile.
     with open(f'gpu_memory_profile-pid_{os.getpid()}.csv', mode='r') as csv_file:
         csv_reader = csv.DictReader(csv_file)
         for row in csv_reader:
             print(",".join(list(row.values())))
         for param in model.collect_params().values():
             expected_arg_name = f"{param.var().attr('__profiler_scope__')}in_arg:" + \
                                 param.name
             expected_arg_size = str(4 * np.prod(param.shape))
             csv_file.seek(0)
             entry_found = False
             for row in csv_reader:
                 if row['Attribute Name'] == expected_arg_name and \
                    row['Requested Size'] == expected_arg_size:
                     entry_found = True
                     break
             assert entry_found, \
                     "Entry for (attr_name={}, alloc_size={}) has not been found" \
                         .format(expected_arg_name,
                                 expected_arg_size)
         # Make sure that there is no unknown allocation entry.
         csv_file.seek(0)
         for row in csv_reader:
             if row['Attribute Name'] == "<unk>:unknown" or \
                row['Attribute Name'] == "<unk>:":
                 assert False, "Unknown allocation entry has been encountered"
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	import csv
	import os
	import sys

	import numpy as np
	import mxnet as mx
	mx.test_utils.set_default_device(mx.gpu(0))

	curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
	sys.path.insert(0, os.path.join(curr_path, '../unittest'))
	# We import all tests from ../unittest/test_profiler.py
	# They will be detected by test framework, as long as the current file has a different filename
	from test_profiler import *

	@pytest.mark.skip(reason='https://github.com/apache/incubator-mxnet/issues/18564')
	def test_gpu_memory_profiler_symbolic():
	enable_profiler('test_profiler.json')
	profiler.set_state('run')

	with profiler.scope("tensordot"):
	A = mx.sym.Variable('A')
	B = mx.sym.Variable('B')
	C = mx.symbol.dot(A, B, name="dot")

	executor = C._simple_bind(mx.gpu(), 'write', A=(1024, 2048), B=(2048, 4096))

	with profiler.scope("init"):
	a = mx.random.uniform(-1.0, 1.0, shape=(1024, 2048))
	b = mx.random.uniform(-1.0, 1.0, shape=(2048, 4096))

	a.copyto(executor.arg_dict['A'])
	b.copyto(executor.arg_dict['B'])

	executor.forward()
	executor.backward()
	c = executor.outputs[0]
	mx.nd.waitall()
	profiler.set_state('stop')
	profiler.dump(True)

	expected_alloc_entries = [
	{'Attribute Name' : 'tensordot:in_arg:A',
	'Requested Size' : str(4 * a.size)},
	{'Attribute Name' : 'tensordot:in_arg:B',
	'Requested Size' : str(4 * b.size)},
	{'Attribute Name' : 'tensordot:dot',
	'Requested Size' : str(4 * c.size)},
	{'Attribute Name' : 'tensordot:dot_backward',
	'Requested Size' : str(4 * a.size)},
	{'Attribute Name' : 'tensordot:dot_backward',
	'Requested Size' : str(4 * b.size)},
	{'Attribute Name' : 'init:_random_uniform',
	'Requested Size' : str(4 * a.size)},
	{'Attribute Name' : 'init:_random_uniform',
	'Requested Size' : str(4 * b.size)}]

	# Sample gpu_memory_profile.csv:
	# "Attribute Name","Requested Size","Device","Actual Size","Reuse?"
	# <unk>:_head_grad_0,16777216,0,16777216,0
	# init:_random_uniform,33554432,0,33554432,1
	# init:_random_uniform,8388608,0,8388608,1
	# resource:temp_space (sample_op.h +365),8,0,4096,0
	# symbol:arg_grad:unknown,8388608,0,8388608,0
	# symbol:arg_grad:unknown,33554432,0,33554432,0
	# tensordot:dot,16777216,0,16777216,0
	# tensordot:dot_backward,8388608,0,8388608,0
	# tensordot:dot_backward,33554432,0,33554432,0
	# tensordot:in_arg:A,8388608,0,8388608,0
	# tensordot:in_arg:B,33554432,0,33554432,0

	with open(f'gpu_memory_profile-pid_{os.getpid()}.csv', mode='r') as csv_file:
	csv_reader = csv.DictReader(csv_file)
	for row in csv_reader:
	print(",".join(list(row.values())))
	for expected_alloc_entry in expected_alloc_entries:
	csv_file.seek(0)
	entry_found = False
	for row in csv_reader:
	if row['Attribute Name'] == expected_alloc_entry['Attribute Name'] and \
	row['Requested Size'] == expected_alloc_entry['Requested Size']:
	entry_found = True
	break
	assert entry_found, \
	"Entry for (attr_name={}, alloc_size={}) has not been found" \
	.format(expected_alloc_entry['Attribute Name'],
	expected_alloc_entry['Requested Size'])
	# Make sure that there is no unknown allocation entry.
	csv_file.seek(0)
	for row in csv_reader:
	if row['Attribute Name'] == "<unk>:unknown" or \
	row['Attribute Name'] == "<unk>:":
	assert False, "Unknown allocation entry has been encountered"


	@pytest.mark.skip(reason='https://github.com/apache/incubator-mxnet/issues/18564')
	def test_gpu_memory_profiler_gluon():
	enable_profiler(profile_filename='test_profiler.json')
	profiler.set_state('run')

	model = nn.HybridSequential()
	model.add(nn.Dense(128, activation='tanh'))
	model.add(nn.Dropout(0.5))
	model.add(nn.Dense(64, activation='tanh'),
	nn.Dense(32, in_units=64))
	model.add(nn.Activation('relu'))
	model.initialize(device=mx.gpu())
	model.hybridize()

	with mx.autograd.record():
	out = model(mx.np.zeros((16, 10), device=mx.gpu()))
	out.backward()
	mx.npx.waitall()
	profiler.set_state('stop')
	profiler.dump(True)

	# Sample gpu_memory_profile.csv:
	# "Attribute Name","Requested Size","Device","Actual Size","Reuse?"
	# <unk>:in_arg:data,640,0,4096,0
	# hybridsequential:activation0:hybridsequential_activation0_fwd,2048,0,4096,0
	# hybridsequential:activation0:hybridsequential_activation0_fwd_backward,8192,0,8192,0
	# hybridsequential:activation0:hybridsequential_activation0_fwd_head_grad,2048,0,4096,0
	# hybridsequential:dense0:activation0:hybridsequential_dense0_activation0_fwd,8192,0,8192,0
	# hybridsequential:dense0:arg_grad:bias,512,0,4096,0
	# hybridsequential:dense0:arg_grad:weight,5120,0,8192,0
	# hybridsequential:dense0:hybridsequential_dense0_fwd,8192,0,8192,0
	# hybridsequential:dense0:in_arg:bias,512,0,4096,0
	# hybridsequential:dense0:in_arg:weight,5120,0,8192,0
	# hybridsequential:dense1:activation0:hybridsequential_dense1_activation0_fwd,4096,0,4096,0
	# hybridsequential:dense1:arg_grad:bias,256,0,4096,0
	# hybridsequential:dense1:arg_grad:weight,32768,0,32768,0
	# hybridsequential:dense1:hybridsequential_dense1_fwd,4096,0,4096,0
	# hybridsequential:dense1:in_arg:bias,256,0,4096,0
	# hybridsequential:dense1:in_arg:weight,32768,0,32768,0
	# hybridsequential:dense2:arg_grad:bias,128,0,4096,0
	# hybridsequential:dense2:arg_grad:weight,8192,0,8192,0
	# hybridsequential:dense2:hybridsequential_dense2_fwd_backward,4096,0,4096,1
	# hybridsequential:dense2:in_arg:bias,128,0,4096,0
	# hybridsequential:dense2:in_arg:weight,8192,0,8192,0
	# hybridsequential:dropout0:hybridsequential_dropout0_fwd,8192,0,8192,0
	# hybridsequential:dropout0:hybridsequential_dropout0_fwd,8192,0,8192,0
	# resource:cudnn_dropout_state (dropout-inl.h +256),1474560,0,1474560,0
	# resource:temp_space (fully_connected-inl.h +316),15360,0,16384,0

	# We are only checking for weight parameters here, also making sure that
	# there is no unknown entries in the memory profile.
	with open(f'gpu_memory_profile-pid_{os.getpid()}.csv', mode='r') as csv_file:
	csv_reader = csv.DictReader(csv_file)
	for row in csv_reader:
	print(",".join(list(row.values())))
	for param in model.collect_params().values():
	expected_arg_name = f"{param.var().attr('__profiler_scope__')}in_arg:" + \
	param.name
	expected_arg_size = str(4 * np.prod(param.shape))
	csv_file.seek(0)
	entry_found = False
	for row in csv_reader:
	if row['Attribute Name'] == expected_arg_name and \
	row['Requested Size'] == expected_arg_size:
	entry_found = True
	break
	assert entry_found, \
	"Entry for (attr_name={}, alloc_size={}) has not been found" \
	.format(expected_arg_name,
	expected_arg_size)
	# Make sure that there is no unknown allocation entry.
	csv_file.seek(0)
	for row in csv_reader:
	if row['Attribute Name'] == "<unk>:unknown" or \
	row['Attribute Name'] == "<unk>:":
	assert False, "Unknown allocation entry has been encountered"