tests/python/runtime/test_runtime_graph_cuda_graph.py - tvm - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 import json
 import os
 import re
 import sys
 import time

 import pytest

 import tvm
 import tvm.testing
 from tvm import te
 import numpy as np

 from tvm.contrib import utils, graph_executor
 from tvm.contrib.cuda_graph import cuda_graph_executor


 bx = te.thread_axis("blockIdx.x")
 tx = te.thread_axis("threadIdx.x")


 @tvm.testing.requires_cudagraph
 def test_graph_simple():
     n = 32
     A = te.placeholder((n,), name="A")
     B = te.compute(A.shape, lambda *i: A(*i) + 1.0, name="B")
     s = te.create_schedule(B.op)
     xo, xi = s[B].split(B.op.axis[0], factor=8)
     s[B].bind(xo, bx)
     s[B].bind(xi, tx)

     node0 = {"op": "null", "name": "x", "inputs": []}
     node1 = {
         "op": "tvm_op",
         "name": "add",
         "inputs": [[0, 0, 0]],
         "attrs": {"func_name": "myadd", "flatten_data": "1", "num_inputs": "1", "num_outputs": "1"},
     }
     nodes = [node0, node1]
     arg_nodes = [0]
     node_row_ptr = [0, 1, 2]
     outputs = [[1, 0, 0]]
     shape = (n,)
     attrs = {
         "shape": ["list_shape", [shape, shape]],
         "dltype": ["list_str", ["float32", "float32"]],
         "storage_id": ["list_int", [0, 1]],
     }
     graph = {
         "nodes": nodes,
         "arg_nodes": arg_nodes,
         "node_row_ptr": node_row_ptr,
         "heads": outputs,
         "attrs": attrs,
     }
     graph = json.dumps(graph)

     def check_verify():
         mlib = tvm.build(s, [A, B], "cuda", name="myadd")
         dev = tvm.cuda(0)
         try:
             mod = cuda_graph_executor.create(graph, mlib, dev)
         except ValueError:
             return

         for i in range(3):
             a = np.random.uniform(size=(n,)).astype(A.dtype)
             mod.run(x=a)  # The first run captured a CUDA graph
             out = mod.get_output(0, tvm.nd.empty((n,)))
             np.testing.assert_equal(out.numpy(), a + 1)

         # capture / run CUDA graph manually
         mod.capture_cuda_graph()
         a = np.random.uniform(size=(n,)).astype(A.dtype)
         mod.set_input(x=a)
         mod.run_cuda_graph()
         out = mod.get_output(0, tvm.nd.empty((n,)))
         np.testing.assert_equal(out.numpy(), a + 1)

     check_verify()


 if __name__ == "__main__":
     test_graph_simple()
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	import json
	import os
	import re
	import sys
	import time

	import pytest

	import tvm
	import tvm.testing
	from tvm import te
	import numpy as np

	from tvm.contrib import utils, graph_executor
	from tvm.contrib.cuda_graph import cuda_graph_executor


	bx = te.thread_axis("blockIdx.x")
	tx = te.thread_axis("threadIdx.x")


	@tvm.testing.requires_cudagraph
	def test_graph_simple():
	n = 32
	A = te.placeholder((n,), name="A")
	B = te.compute(A.shape, lambda i: A(i) + 1.0, name="B")
	s = te.create_schedule(B.op)
	xo, xi = s[B].split(B.op.axis[0], factor=8)
	s[B].bind(xo, bx)
	s[B].bind(xi, tx)

	node0 = {"op": "null", "name": "x", "inputs": []}
	node1 = {
	"op": "tvm_op",
	"name": "add",
	"inputs": [[0, 0, 0]],
	"attrs": {"func_name": "myadd", "flatten_data": "1", "num_inputs": "1", "num_outputs": "1"},
	}
	nodes = [node0, node1]
	arg_nodes = [0]
	node_row_ptr = [0, 1, 2]
	outputs = [[1, 0, 0]]
	shape = (n,)
	attrs = {
	"shape": ["list_shape", [shape, shape]],
	"dltype": ["list_str", ["float32", "float32"]],
	"storage_id": ["list_int", [0, 1]],
	}
	graph = {
	"nodes": nodes,
	"arg_nodes": arg_nodes,
	"node_row_ptr": node_row_ptr,
	"heads": outputs,
	"attrs": attrs,
	}
	graph = json.dumps(graph)

	def check_verify():
	mlib = tvm.build(s, [A, B], "cuda", name="myadd")
	dev = tvm.cuda(0)
	try:
	mod = cuda_graph_executor.create(graph, mlib, dev)
	except ValueError:
	return

	for i in range(3):
	a = np.random.uniform(size=(n,)).astype(A.dtype)
	mod.run(x=a) # The first run captured a CUDA graph
	out = mod.get_output(0, tvm.nd.empty((n,)))
	np.testing.assert_equal(out.numpy(), a + 1)

	# capture / run CUDA graph manually
	mod.capture_cuda_graph()
	a = np.random.uniform(size=(n,)).astype(A.dtype)
	mod.set_input(x=a)
	mod.run_cuda_graph()
	out = mod.get_output(0, tvm.nd.empty((n,)))
	np.testing.assert_equal(out.numpy(), a + 1)

	check_verify()


	if __name__ == "__main__":
	test_graph_simple()