blob: 6c23368f840a92a200c2af6320f6053034d0eca1 [file] [log] [blame]
"""
.. _tutorial-cross-compilation-and-rpc:
Cross Compilation and RPC
=========================
**Author**: `Ziheng Jiang <https://github.com/ZihengJiang/>`_, `Lianmin Zheng <https://github.com/merrymercy/>`_
This tutorial introduces cross compilation and remote device
execution with RPC in TVM.
With cross compilation and RPC, you can **compile program on your
local machine then run it on the remote device**. It is useful when
the resource of remote devices is limited, like Raspberry Pi and mobile
platforms. In this tutorial, we will take Raspberry Pi for CPU example
and Firefly-RK3399 for opencl example.
"""
######################################################################
# Build TVM Runtime on Device
# ---------------------------
#
# The first step is to build tvm runtime on the remote device.
#
# .. note::
#
# All instructions in both this section and next section should be
# executed on the target device, e.g. Raspberry Pi. And we assume it
# has Linux running.
#
# Since we do compilation on local machine, the remote device is only used
# for running the generated code. We only need to build tvm runtime on
# the remote device.
#
# .. code-block:: bash
#
# git clone --recursive https://github.com/dmlc/tvm
# cd tvm
# make runtime -j2
#
# After building runtime successfully, we need to set environment variables
# in :code:`~/.bashrc` file. We can edit :code:`~/.bashrc`
# using :code:`vi ~/.bashrc` and add the line below (Assuming your TVM
# directory is in :code:`~/tvm`):
#
# .. code-block:: bash
#
# export PYTHONPATH=$PYTHONPATH:~/tvm/python
#
# To update the environment variables, execute :code:`source ~/.bashrc`.
######################################################################
# Set Up RPC Server on Device
# ---------------------------
# To start an RPC server, run the following command on your remote device
# (Which is Raspberry Pi in this example).
#
# .. code-block:: bash
#
# python -m tvm.exec.rpc_server --host 0.0.0.0 --port=9090
#
# If you see the line below, it means the RPC server started
# successfully on your device.
#
# .. code-block:: bash
#
# INFO:root:RPCServer: bind to 0.0.0.0:9090
#
######################################################################
# Declare and Cross Compile Kernel on Local Machine
# -------------------------------------------------
#
# .. note::
#
# Now we back to the local machine, which has a full TVM installed
# (with LLVM).
#
# Here we will declare a simple kernel on the local machine:
import numpy as np
import tvm
from tvm import rpc
from tvm.contrib import util
n = tvm.convert(1024)
A = tvm.placeholder((n,), name='A')
B = tvm.compute((n,), lambda i: A[i] + 1.0, name='B')
s = tvm.create_schedule(B.op)
######################################################################
# Then we cross compile the kernel.
# The target should be 'llvm -target=armv7l-linux-gnueabihf' for
# Raspberry Pi 3B, but we use 'llvm' here to make this tutorial runnable
# on our webpage building server. See the detailed note in the following block.
local_demo = True
if local_demo:
target = 'llvm'
else:
target = 'llvm -target=armv7l-linux-gnueabihf'
func = tvm.build(s, [A, B], target=target, name='add_one')
# save the lib at a local temp folder
temp = util.tempdir()
path = temp.relpath('lib.tar')
func.export_library(path)
######################################################################
# .. note::
#
# To run this tutorial with a real remote device, change :code:`local_demo`
# to False and replace :code:`target` in :code:`build` with the true
# target triple of your device. The target triple which might be
# different for different devices. For example, it is
# :code:`'llvm -target=armv7l-linux-gnueabihf'` for Raspberry Pi 3B and
# :code:`'llvm -target=aarch64-linux-gnu'` for RK3399.
#
# Usually, you can query the target by execute :code:`gcc -v` on your
# device, and look for the line starting with :code:`Target:`
# (Though it may be still a loose configuration.)
#
# Besides :code:`-target`, you can also set other compilation options
# like:
#
# * -mcpu=<cpuname>
# Specify a specific chip in the current architecture to generate code for. By default this is inferred from the target triple and autodetected to the current architecture.
# * -mattr=a1,+a2,-a3,...
# Override or control specific attributes of the target, such as whether SIMD operations are enabled or not. The default set of attributes is set by the current CPU.
# To get the list of available attributes, you can do:
#
# .. code-block:: bash
#
# llc -mtriple=<your device target triple> -mattr=help
#
# These options are consistent with `llc <http://llvm.org/docs/CommandGuide/llc.html>`_.
# It is recommended to set target triple and feature set to contain specific
# feature available, so we can take full advantage of the features of the
# board.
# You can find more details about cross compilation attributes from
# `LLVM guide of cross compilation <https://clang.llvm.org/docs/CrossCompilation.html>`_.
######################################################################
# Run CPU Kernel Remotely by RPC
# ------------------------------
# We show how to run the generated cpu kernel on the remote device.
# First we obtain an RPC session from remote device.
if local_demo:
remote = rpc.LocalSession()
else:
# The following is my environment, change this to the IP address of your target device
host = '10.77.1.162'
port = 9090
remote = rpc.connect(host, port)
######################################################################
# Upload the lib to the remote device, then invoke a device local
# compiler to relink them. Now `func` is a remote module object.
remote.upload(path)
func = remote.load_module('lib.tar')
# create arrays on the remote device
ctx = remote.cpu()
a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
# the function will run on the remote device
func(a, b)
np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
######################################################################
# When you want to evaluate the performance of the kernel on the remote
# device, it is important to avoid the overhead of network.
# :code:`time_evaluator` will returns a remote function that runs the
# function over number times, measures the cost per run on the remote
# device and returns the measured cost. Network overhead is excluded.
time_f = func.time_evaluator(func.entry_name, ctx, number=10)
cost = time_f(a, b).mean
print('%g secs/op' % cost)
#########################################################################
# Run OpenCL Kernel Remotely by RPC
# ---------------------------------
# As for remote OpenCL devices, the workflow is almost the same as above.
# You can define the kernel, upload files, and run by RPC.
#
# .. note::
#
# Raspberry Pi does not support OpenCL, the following code is tested on
# Firefly-RK3399. You may follow this `tutorial <https://gist.github.com/mli/585aed2cec0b5178b1a510f9f236afa2>`_
# to setup the OS and OpenCL driver for RK3399.
#
# Also we need to build the runtime with OpenCL enabled on rk3399 board. In the tvm
# root directory, execute
#
# .. code-block:: bash
#
# cp cmake/config.cmake .
# sed -i "s/USE_OPENCL OFF/USE_OPENCL ON/" config.cmake
# make runtime -j4
#
# The following function shows how we run OpenCL kernel remotely
def run_opencl():
# NOTE: This is the setting for my rk3399 board. You need to modify
# them according to your environment.
target_host = "llvm -target=aarch64-linux-gnu"
opencl_device_host = '10.77.1.145'
opencl_device_port = 9090
# create schedule for the above "add one" compute declaration
s = tvm.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=32)
s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
func = tvm.build(s, [A, B], "opencl", target_host=target_host)
remote = rpc.connect(opencl_device_host, opencl_device_port)
# export and upload
path = temp.relpath('lib_cl.tar')
func.export_library(path)
remote.upload(path)
func = remote.load_module('lib_cl.tar')
# run
ctx = remote.cl()
a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
func(a, b)
np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
print("OpenCP test passed!")
######################################################################
# Summary
# -------
# This tutorial provides a walk through of cross compilation and RPC
# features in TVM.
#
# - Set up RPC server on the remote device.
# - Set up target device configuration to cross compile kernel on the
# local machine.
# - Upload and run the kernel remotely by RPC API.