| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| """ |
| .. _tutorial-cross-compilation-and-rpc: |
| |
| Cross Compilation and RPC |
| ========================= |
| **Author**: `Ziheng Jiang <https://github.com/ZihengJiang/>`_, `Lianmin Zheng <https://github.com/merrymercy/>`_ |
| |
| This tutorial introduces cross compilation and remote device |
| execution with RPC in TVM. |
| |
| With cross compilation and RPC, you can **compile a program on your |
| local machine then run it on the remote device**. It is useful when |
| the remote device resource are limited, like Raspberry Pi and mobile |
| platforms. In this tutorial, we will use the Raspberry Pi for a CPU example |
| and the Firefly-RK3399 for an OpenCL example. |
| """ |
| |
| ###################################################################### |
| # Build TVM Runtime on Device |
| # --------------------------- |
| # |
| # The first step is to build the TVM runtime on the remote device. |
| # |
| # .. note:: |
| # |
| # All instructions in both this section and the next section should be |
| # executed on the target device, e.g. Raspberry Pi. We assume the target |
| # is running Linux. |
| # |
| # Since we do compilation on the local machine, the remote device is only used |
| # for running the generated code. We only need to build the TVM runtime on |
| # the remote device. |
| # |
| # .. code-block:: bash |
| # |
| # git clone --recursive https://github.com/apache/incubator-tvm tvm |
| # cd tvm |
| # make runtime -j2 |
| # |
| # After building the runtime successfully, we need to set environment variables |
| # in :code:`~/.bashrc` file. We can edit :code:`~/.bashrc` |
| # using :code:`vi ~/.bashrc` and add the line below (Assuming your TVM |
| # directory is in :code:`~/tvm`): |
| # |
| # .. code-block:: bash |
| # |
| # export PYTHONPATH=$PYTHONPATH:~/tvm/python |
| # |
| # To update the environment variables, execute :code:`source ~/.bashrc`. |
| |
| ###################################################################### |
| # Set Up RPC Server on Device |
| # --------------------------- |
| # To start an RPC server, run the following command on your remote device |
| # (Which is Raspberry Pi in this example). |
| # |
| # .. code-block:: bash |
| # |
| # python -m tvm.exec.rpc_server --host 0.0.0.0 --port=9090 |
| # |
| # If you see the line below, it means the RPC server started |
| # successfully on your device. |
| # |
| # .. code-block:: bash |
| # |
| # INFO:root:RPCServer: bind to 0.0.0.0:9090 |
| # |
| |
| ###################################################################### |
| # Declare and Cross Compile Kernel on Local Machine |
| # ------------------------------------------------- |
| # |
| # .. note:: |
| # |
| # Now we go back to the local machine, which has a full TVM installed |
| # (with LLVM). |
| # |
| # Here we will declare a simple kernel on the local machine: |
| |
| import numpy as np |
| |
| import tvm |
| from tvm import te |
| from tvm import rpc |
| from tvm.contrib import util |
| |
| n = tvm.runtime.convert(1024) |
| A = te.placeholder((n,), name="A") |
| B = te.compute((n,), lambda i: A[i] + 1.0, name="B") |
| s = te.create_schedule(B.op) |
| |
| ###################################################################### |
| # Then we cross compile the kernel. |
| # The target should be 'llvm -mtriple=armv7l-linux-gnueabihf' for |
| # Raspberry Pi 3B, but we use 'llvm' here to make this tutorial runnable |
| # on our webpage building server. See the detailed note in the following block. |
| |
| local_demo = True |
| |
| if local_demo: |
| target = "llvm" |
| else: |
| target = "llvm -mtriple=armv7l-linux-gnueabihf" |
| |
| func = tvm.build(s, [A, B], target=target, name="add_one") |
| # save the lib at a local temp folder |
| temp = util.tempdir() |
| path = temp.relpath("lib.tar") |
| func.export_library(path) |
| |
| ###################################################################### |
| # .. note:: |
| # |
| # To run this tutorial with a real remote device, change :code:`local_demo` |
| # to False and replace :code:`target` in :code:`build` with the appropriate |
| # target triple for your device. The target triple which might be |
| # different for different devices. For example, it is |
| # :code:`'llvm -mtriple=armv7l-linux-gnueabihf'` for Raspberry Pi 3B and |
| # :code:`'llvm -mtriple=aarch64-linux-gnu'` for RK3399. |
| # |
| # Usually, you can query the target by running :code:`gcc -v` on your |
| # device, and looking for the line starting with :code:`Target:` |
| # (Though it may still be a loose configuration.) |
| # |
| # Besides :code:`-mtriple`, you can also set other compilation options |
| # like: |
| # |
| # * -mcpu=<cpuname> |
| # Specify a specific chip in the current architecture to generate code for. By default this is inferred from the target triple and autodetected to the current architecture. |
| # * -mattr=a1,+a2,-a3,... |
| # Override or control specific attributes of the target, such as whether SIMD operations are enabled or not. The default set of attributes is set by the current CPU. |
| # To get the list of available attributes, you can do: |
| # |
| # .. code-block:: bash |
| # |
| # llc -mtriple=<your device target triple> -mattr=help |
| # |
| # These options are consistent with `llc <http://llvm.org/docs/CommandGuide/llc.html>`_. |
| # It is recommended to set target triple and feature set to contain specific |
| # feature available, so we can take full advantage of the features of the |
| # board. |
| # You can find more details about cross compilation attributes from |
| # `LLVM guide of cross compilation <https://clang.llvm.org/docs/CrossCompilation.html>`_. |
| |
| ###################################################################### |
| # Run CPU Kernel Remotely by RPC |
| # ------------------------------ |
| # We show how to run the generated CPU kernel on the remote device. |
| # First we obtain an RPC session from remote device. |
| |
| if local_demo: |
| remote = rpc.LocalSession() |
| else: |
| # The following is my environment, change this to the IP address of your target device |
| host = "10.77.1.162" |
| port = 9090 |
| remote = rpc.connect(host, port) |
| |
| ###################################################################### |
| # Upload the lib to the remote device, then invoke a device local |
| # compiler to relink them. Now `func` is a remote module object. |
| |
| remote.upload(path) |
| func = remote.load_module("lib.tar") |
| |
| # create arrays on the remote device |
| ctx = remote.cpu() |
| a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx) |
| b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx) |
| # the function will run on the remote device |
| func(a, b) |
| np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1) |
| |
| ###################################################################### |
| # When you want to evaluate the performance of the kernel on the remote |
| # device, it is important to avoid the overhead of network. |
| # :code:`time_evaluator` will returns a remote function that runs the |
| # function over number times, measures the cost per run on the remote |
| # device and returns the measured cost. Network overhead is excluded. |
| |
| time_f = func.time_evaluator(func.entry_name, ctx, number=10) |
| cost = time_f(a, b).mean |
| print("%g secs/op" % cost) |
| |
| ######################################################################### |
| # Run OpenCL Kernel Remotely by RPC |
| # --------------------------------- |
| # For remote OpenCL devices, the workflow is almost the same as above. |
| # You can define the kernel, upload files, and run via RPC. |
| # |
| # .. note:: |
| # |
| # Raspberry Pi does not support OpenCL, the following code is tested on |
| # Firefly-RK3399. You may follow this `tutorial <https://gist.github.com/mli/585aed2cec0b5178b1a510f9f236afa2>`_ |
| # to setup the OS and OpenCL driver for RK3399. |
| # |
| # Also we need to build the runtime with OpenCL enabled on rk3399 board. In the TVM |
| # root directory, execute |
| # |
| # .. code-block:: bash |
| # |
| # cp cmake/config.cmake . |
| # sed -i "s/USE_OPENCL OFF/USE_OPENCL ON/" config.cmake |
| # make runtime -j4 |
| # |
| # The following function shows how we run an OpenCL kernel remotely |
| |
| |
| def run_opencl(): |
| # NOTE: This is the setting for my rk3399 board. You need to modify |
| # them according to your environment. |
| target_host = "llvm -mtriple=aarch64-linux-gnu" |
| opencl_device_host = "10.77.1.145" |
| opencl_device_port = 9090 |
| |
| # create schedule for the above "add one" compute declaration |
| s = te.create_schedule(B.op) |
| xo, xi = s[B].split(B.op.axis[0], factor=32) |
| s[B].bind(xo, te.thread_axis("blockIdx.x")) |
| s[B].bind(xi, te.thread_axis("threadIdx.x")) |
| func = tvm.build(s, [A, B], "opencl", target_host=target_host) |
| |
| remote = rpc.connect(opencl_device_host, opencl_device_port) |
| |
| # export and upload |
| path = temp.relpath("lib_cl.tar") |
| func.export_library(path) |
| remote.upload(path) |
| func = remote.load_module("lib_cl.tar") |
| |
| # run |
| ctx = remote.cl() |
| a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx) |
| b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx) |
| func(a, b) |
| np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1) |
| print("OpenCL test passed!") |
| |
| |
| ###################################################################### |
| # Summary |
| # ------- |
| # This tutorial provides a walk through of cross compilation and RPC |
| # features in TVM. |
| # |
| # - Set up an RPC server on the remote device. |
| # - Set up the target device configuration to cross compile the kernels on the |
| # local machine. |
| # - Upload and run the kernels remotely via the RPC API. |