blob: 3490d290fb43fdc73d08669e36e789260f980682 [file] [log] [blame]
/************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*************************************************************/
#ifndef SINGA_UTILS_CONTEXT_H_
#define SINGA_UTILS_CONTEXT_H_
#include <glog/logging.h>
#include <chrono>
#include <random>
#include <thread>
#include <unordered_map>
#include <vector>
#ifdef USE_GPU
#include "singa/utils/cuda_utils.h"
#ifdef USE_CUDNN
#include <cudnn.h>
#endif
#endif
namespace singa {
/**
* Context is used as a global singleton, which stores the mapping from CPU
* thread id to GPU device id. If a thread has no GPU, then its associated
* device id is -1. It manages (e.g., creating) the handlers for GPU
* devices. It also manages the GPU and CPU random generators, which are created
* when accessed. One CPU thread has a CPU random generator. A GPU device
* has a GPU random generator, which is accessible after assigning the GPU
* device with a CPU thread via SetupDevice.
*/
class Context {
public:
/**
* Destructor, release random generators and handlers.
*/
~Context() {
#ifdef USE_GPU
for (auto& entry : device_id_) {
if (entry.second != -1) {
cudaSetDevice(entry.second);
if (cublas_handle_[entry.second] != nullptr) {
cublasDestroy(cublas_handle_[entry.second]);
cublas_handle_[entry.second] = nullptr;
}
if (curand_generator_[entry.second] != nullptr) {
curandDestroyGenerator(curand_generator_[entry.second]);
curand_generator_[entry.second] = nullptr;
}
}
}
#ifdef USE_CUDNN
for (auto& handle : cudnn_handle_) {
if (handle != nullptr)
CHECK_EQ(cudnnDestroy(handle), CUDNN_STATUS_SUCCESS);
handle = nullptr;
}
#endif
#endif
for (auto& entry : rand_generator_) {
if (entry.second != nullptr) {
delete entry.second;
entry.second = nullptr;
}
}
}
/**
* Constructor, init handlers and GPU rand generators to nullptr.
*/
Context() {
for (int i = 0; i < kMaxNumGPU; i++) {
#ifdef USE_GPU
cublas_handle_.push_back(nullptr);
curand_generator_.push_back(nullptr);
#ifdef USE_CUDNN
cudnn_handle_.push_back(nullptr);
#endif
#endif
}
}
/**
* @return the device ID of the current thread.
*/
int device_id() {
return device_id(std::this_thread::get_id());
}
/**
* @return the ID of the device attached to a given CPU thread, or -1 if this
* thread has not been attached GPU device.
*/
int device_id(const std::thread::id& tid) {
if (device_id_.find(tid) != device_id_.end())
return device_id_[tid];
else
return -2;
}
/**
* Setup the CPU thread, which may be assigned a GPU device.
* If there is no GPU device, then set did to -1.
* Set the random seed to -1.
* @param[in] thread::id CPU thread ID
* @param[in] device_id GPU device ID
*/
void SetupDevice(const std::thread::id& tid, const int did) {
SetupDevice(tid, did, -1);
}
/**
* @copy SetupDevice(const int, const int);
* @param[in] seed random seed
*/
void SetupDevice(const std::thread::id& tid, const int did, const int seed) {
device_id_[tid] = did;
seed_[tid] = seed;
}
/**
* Activate the GPU device by calling cudaSetDevice.
*/
void ActivateDevice(const int device_id) {
CHECK_GE(device_id, 0);
#ifdef USE_GPU
cudaSetDevice(device_id);
#endif
}
/**
* \copybreif rand_generator(const std::thread::id&);
* @return the CPU random generator for the calling thread.
*/
std::mt19937* rand_generator() {
return rand_generator(std::this_thread::get_id());
}
/**
* Get the CPU random generator.
* If the generator does not exist, then create it now.
* If the seed is not set, i.e., seed=-1, then get a seed from system time.
* @param[in] thread::id CPU thread ID
* @return the CPU random generator
*/
std::mt19937* rand_generator(const std::thread::id& tid) {
if (rand_generator_.find(tid) == rand_generator_.end()) {
// CHECK(seed_.find(tid) != seed_.end());
auto seed = static_cast<unsigned>(seed_[tid]);
if (seed_.find(tid) == seed_.end() || seed_.at(tid) == -1)
seed = std::chrono::system_clock::now().time_since_epoch().count();
rand_generator_[tid] = new std::mt19937(seed);
}
return rand_generator_[tid];
}
#ifdef USE_GPU
/**
* \copybreif cublas_handle_(const std::thread::id&);
* @return cublas handle for the calling thread.
*/
cublasHandle_t cublas_handle() {
return cublas_handle(std::this_thread::get_id());
}
/**
* Get the handler of the GPU which is assigned to the given thread.
* Calls cublas_handle(const int);
*/
cublasHandle_t cublas_handle(const std::thread::id thread_id) {
return cublas_handle(device_id(thread_id));
}
/**
* Get the handler of the GPU device given its device ID. The device
* must be set up via SetupDevice(const std::thread::id, const int) before
* calling this function.
* @param[in] device_id GPU device ID
* @return the GPU handler
*/
cublasHandle_t cublas_handle(const int device_id) {
CHECK_GE(device_id, 0);
if (cublas_handle_.at(device_id) == nullptr) {
cudaSetDevice(device_id);
cublasCreate(&cublas_handle_[device_id]);
}
return cublas_handle_[device_id];
}
/**
* Get the rand generator of the GPU device assigned to the given thread.
*/
curandGenerator_t curand_generator(const std::thread::id thread_id) {
return curand_generator(device_id(thread_id));
}
/**
* Get the random generator of the GPU device given the device id.
* @param[in] device_id GPU device ID
* @return random generator. If it does not exist, then create one.
* The random seed will be set to CURAND_RNG_PSEUDO_DEFAULT if it is not set.
*/
curandGenerator_t curand_generator(const int device_id) {
CHECK_GE(device_id, 0);
CHECK_LT(device_id, cudnn_handle_.size());
if (curand_generator_.at(device_id) == nullptr) {
// TODO(wangwei) handle user set seed
/*
CHECK(seed_.find(tid) != seed_.end());
auto seed = seed_[tid];
*/
ActivateDevice(device_id);
curandCreateGenerator(&curand_generator_[device_id],
CURAND_RNG_PSEUDO_DEFAULT);
}
return curand_generator_[device_id];
}
#ifdef USE_CUDNN
cudnnHandle_t cudnn_handle() {
return cudnn_handle(std::this_thread::get_id());
}
cudnnHandle_t cudnn_handle(const std::thread::id thread_id) {
return cudnn_handle(device_id(thread_id));
}
cudnnHandle_t cudnn_handle(const int device_id) {
CHECK_GE(device_id, 0);
CHECK_LT(device_id, cudnn_handle_.size());
if (cudnn_handle_.at(device_id) == nullptr) {
ActivateDevice(device_id);
// LOG(ERROR) << "create cudnn handle for device " << device_id;
CHECK_EQ(cudnnCreate(&cudnn_handle_[device_id]), CUDNN_STATUS_SUCCESS);
}
// LOG(ERROR) << "use cudnn handle from device " << device_id;
return cudnn_handle_[device_id];
}
#endif
#endif
protected:
//!< max num of GPUs per process
const int kMaxNumGPU = 64;
//!< map from thread id to device id
std::unordered_map<std::thread::id, int> device_id_;
//!< map from thread id to cpu rand generator
std::unordered_map<std::thread::id, std::mt19937 *> rand_generator_;
//!< map from thread id to cpu rand generator seed
std::unordered_map<std::thread::id, int> seed_;
#ifdef USE_GPU
//!< cublas handler indexed by GPU device ID
std::vector<cublasHandle_t> cublas_handle_;
//!< cublas rand generator indexed by GPU device ID
std::vector<curandGenerator_t> curand_generator_;
#ifdef USE_CUDNN
std::vector<cudnnHandle_t> cudnn_handle_;
#endif
#endif
};
} // namespace singa
#endif // SINGA_UTILS_CONTEXT_H_