| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| /*! |
| * \file tvm/runtime/threading_backend.h |
| * \brief Utilities for manipulating thread pool threads. |
| */ |
| #ifndef TVM_RUNTIME_THREADING_BACKEND_H_ |
| #define TVM_RUNTIME_THREADING_BACKEND_H_ |
| |
| #include <tvm/runtime/c_backend_api.h> |
| |
| #include <algorithm> |
| #include <functional> |
| #include <memory> |
| #include <vector> |
| |
| #if defined(__linux__) || defined(__ANDROID__) |
| #if defined(__ANDROID__) |
| #ifndef CPU_SET |
| #define CPU_SETSIZE 1024 |
| #define __NCPUBITS (8 * sizeof(uint64_t)) |
| typedef struct { |
| uint64_t __bits[CPU_SETSIZE / __NCPUBITS]; |
| } cpu_set_t; |
| |
| #define CPU_SET(cpu, cpusetp) \ |
| ((cpusetp)->__bits[(cpu) / __NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS))) |
| #define CPU_ZERO(cpusetp) memset((cpusetp), 0, sizeof(cpu_set_t)) |
| #define CPU_ISSET(cpu, cpusetp) \ |
| (1UL << ((cpu) % __NCPUBITS)) == \ |
| ((cpusetp)->__bits[(cpu) / __NCPUBITS] & (1UL << ((cpu) % __NCPUBITS))) |
| #define CPU_EQUAL(left, right) (memcmp(&left, &right, sizeof(cpu_set_t)) == 0) |
| |
| #endif |
| #endif |
| #endif |
| |
| namespace tvm { |
| namespace runtime { |
| namespace threading { |
| |
| /*! |
| * \brief A platform-agnostic abstraction for managing a collection of |
| * thread pool threads. |
| */ |
| class ThreadGroup { |
| public: |
| class Impl; |
| |
| /*! |
| * \brief Creates a collection of threads which run a provided function. |
| * |
| * \param num_workers The total number of worker threads in this group. |
| Includes main thread if `exclude_worker0 = true` |
| * \param worker_callback A callback which is run in its own thread. |
| Receives the worker_id as an argument. |
| * \param exclude_worker0 Whether to use the main thread as a worker. |
| * If `true`, worker0 will not be launched in a new thread and |
| * `worker_callback` will only be called for values >= 1. This |
| * allows use of the main thread as a worker. |
| */ |
| TVM_RUNTIME_DLL ThreadGroup(int num_workers, std::function<void(int)> worker_callback, |
| bool exclude_worker0 = false); |
| TVM_RUNTIME_DLL ~ThreadGroup(); |
| |
| /*! |
| * \brief Blocks until all non-main threads in the pool finish. |
| */ |
| TVM_RUNTIME_DLL void Join(); |
| |
| enum AffinityMode : int { |
| kBig = 1, |
| kLittle = -1, |
| /*Different threads will get different affinities.*/ |
| kSpecifyOneCorePerThread = -2, |
| /*All threads will get the same core group affinity.*/ |
| kSpecifyThreadShareAllCore = -3, |
| }; |
| /*! |
| * \brief configure the CPU id affinity |
| * |
| * \param mode The preferred CPU type (1 = big, -1 = little ...). |
| * \param nthreads The number of threads to use (0 = use all). |
| * \param exclude_worker0 Whether to use the main thread as a worker. |
| * If `true`, worker0 will not be launched in a new thread and |
| * `worker_callback` will only be called for values >= 1. This |
| * allows use of the main thread as a worker. |
| * \param cpus A list of CPU used to set 'cpu affinity'. |
| * |
| * \return The number of workers to use. |
| */ |
| TVM_RUNTIME_DLL int Configure(AffinityMode mode, int nthreads, bool exclude_worker0, |
| std::vector<unsigned int> cpus = {}); |
| |
| private: |
| Impl* impl_; |
| }; |
| |
| /*! |
| * \brief Platform-agnostic no-op. |
| */ |
| TVM_RUNTIME_DLL void YieldThread(); |
| /*! |
| * \return the maximum number of effective workers for this system. |
| */ |
| TVM_RUNTIME_DLL int MaxConcurrency(); |
| /*! |
| * \brief Setting the maximum number of available cores. |
| */ |
| TVM_RUNTIME_DLL void SetMaxConcurrency(int value); |
| /*! |
| * \brief Reset the threads in the pool. All current threads are destroyed and |
| * new ones are created. |
| * |
| * Note that this does nothing when openmp is used. |
| */ |
| TVM_RUNTIME_DLL void ResetThreadPool(); |
| |
| /*! |
| * \brief Configuring the CPU affinity mode for the working threads. |
| * \param mode The preferred CPU type (1 = big, -1 = little, -2 = kSpecifyOneCorePerThread, |
| * -3 = kSpecifyThreadShareAllCore). |
| * \param nthreads The number of threads to use (0 = use all). |
| * \param cpus A list of CPUs is used to set the 'cpu affinity' for the worker threads. |
| */ |
| TVM_RUNTIME_DLL void Configure(tvm::runtime::threading::ThreadGroup::AffinityMode mode, |
| int nthreads, std::vector<unsigned int> cpus); |
| |
| /*! |
| * \brief Get the number of threads being used by the TVM runtime |
| * \returns The number of threads used. |
| */ |
| TVM_RUNTIME_DLL int32_t NumThreads(); |
| |
| } // namespace threading |
| |
| /*! |
| * \brief Execute the given lambda function in parallel with |
| * threading backend in TVM. |
| * \tparam T The type of the lambda: "void (int i)". |
| * \param flambda The lambda to be executed in parallel. |
| * It should have the signature "void (int i)". |
| * \param begin The start index of this parallel loop (inclusive). |
| * \param end The end index of this parallel loop (exclusive). |
| * \example |
| * |
| * The for loop |
| * for (int i = 0; i < 10; i++) { |
| * a[i] = i; |
| * } |
| * should work the same as: |
| * parallel_for_with_threading_backend([&a](int i) { |
| * a[i] = i; |
| * }, 0, 10); |
| */ |
| template <typename T> |
| inline void parallel_for_with_threading_backend(T flambda, int64_t begin, int64_t end); |
| |
| namespace detail { |
| |
| // The detailed implementation of `parallel_for_with_threading_backend`. |
| // To avoid template expansion, the implementation cannot be placed |
| // in .cc files. |
| |
| template <typename T> |
| struct ParallelForWithThreadingBackendLambdaInvoker { |
| static int TVMParallelLambdaInvoke(int task_id, TVMParallelGroupEnv* penv, void* cdata) { |
| int num_task = penv->num_task; |
| // Convert void* back to lambda type. |
| T* lambda_ptr = static_cast<T*>(cdata); |
| // Invoke the lambda with the task id (thread id). |
| (*lambda_ptr)(task_id, num_task); |
| return 0; |
| } |
| }; |
| |
| template <typename T> |
| inline void parallel_launch_with_threading_backend(T flambda) { |
| // Launch the lambda by passing its address. |
| void* cdata = &flambda; |
| TVMBackendParallelLaunch(ParallelForWithThreadingBackendLambdaInvoker<T>::TVMParallelLambdaInvoke, |
| cdata, /*num_task=*/0); |
| } |
| |
| } // namespace detail |
| |
| template <typename T> |
| inline void parallel_for_with_threading_backend(T flambda, int64_t begin, int64_t end) { |
| if (end - begin == 1) { |
| flambda(begin); |
| return; |
| } |
| |
| auto flaunch = [begin, end, flambda](int task_id, int num_task) { |
| // For each thread, do static division and call into flambda. |
| int64_t total_len = end - begin; |
| int64_t step = (total_len + num_task - 1) / num_task; |
| int64_t local_begin = std::min(begin + step * task_id, end); |
| int64_t local_end = std::min(local_begin + step, end); |
| for (int64_t i = local_begin; i < local_end; ++i) { |
| flambda(i); |
| } |
| }; |
| // Launch with all threads. |
| detail::parallel_launch_with_threading_backend(flaunch); |
| } |
| |
| } // namespace runtime |
| } // namespace tvm |
| |
| #endif // TVM_RUNTIME_THREADING_BACKEND_H_ |