include/singa/utils/context.h - singa - Git at Google

 /************************************************************
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 *************************************************************/

 #ifndef SINGA_UTILS_CONTEXT_H_
 #define SINGA_UTILS_CONTEXT_H_

 #include <glog/logging.h>
 #include <chrono>
 #include <random>
 #include <thread>
 #include <unordered_map>
 #include <vector>

 #ifdef USE_GPU
 #include "singa/utils/cuda_utils.h"

 #ifdef USE_CUDNN
 #include <cudnn.h>
 #endif

 #endif

 namespace singa {

 /**
  * Context is used as a global singleton, which stores the mapping from CPU
  * thread id to GPU device id. If a thread has no GPU, then its associated
  * device id is -1. It manages (e.g., creating) the handlers for GPU
  * devices. It also manages the GPU and CPU random generators, which are created
  * when accessed. One CPU thread has a CPU random generator. A GPU device
  * has a GPU random generator, which is accessible after assigning the GPU
  * device with a CPU thread via SetupDevice.
  */
 class Context {
  public:
    /**
     * Destructor, release random generators and handlers.
     */
   ~Context() {
 #ifdef USE_GPU
     for (auto& entry : device_id_) {
       if (entry.second != -1) {
         cudaSetDevice(entry.second);
         if (cublas_handle_[entry.second] != nullptr) {
           cublasDestroy(cublas_handle_[entry.second]);
           cublas_handle_[entry.second] = nullptr;
         }
         if (curand_generator_[entry.second] != nullptr) {
           curandDestroyGenerator(curand_generator_[entry.second]);
           curand_generator_[entry.second] = nullptr;
         }
       }
     }
 #ifdef USE_CUDNN
     for (auto& handle : cudnn_handle_) {
       if (handle != nullptr)
         CHECK_EQ(cudnnDestroy(handle), CUDNN_STATUS_SUCCESS);
       handle = nullptr;
     }
 #endif
 #endif
     for (auto& entry : rand_generator_) {
       if (entry.second != nullptr) {
         delete entry.second;
         entry.second = nullptr;
       }
     }
   }
   /**
    * Constructor, init handlers and GPU rand generators to nullptr.
    */
   Context() {
     for (int i = 0; i < kMaxNumGPU; i++) {
 #ifdef USE_GPU
       cublas_handle_.push_back(nullptr);
       curand_generator_.push_back(nullptr);
 #ifdef USE_CUDNN
       cudnn_handle_.push_back(nullptr);
 #endif
 #endif
     }
   }

   /**
    * @return the device ID of the current thread.
    */
   int device_id() {
     return device_id(std::this_thread::get_id());
   }
   /**
    * @return the ID of the device attached to a given CPU thread, or -1 if this
    * thread has not been attached GPU device.
    */
   int device_id(const std::thread::id& tid) {
     if (device_id_.find(tid) != device_id_.end())
       return device_id_[tid];
     else
       return -2;
   }
   /**
    * Setup the CPU thread, which may be assigned a GPU device.
    * If there is no GPU device, then set did to -1.
    * Set the random seed to -1.
    * @param[in] thread::id CPU thread ID
    * @param[in] device_id GPU device ID
    */
   void SetupDevice(const std::thread::id& tid, const int did) {
     SetupDevice(tid, did, -1);
   }
   /**
    * @copy SetupDevice(const int, const int);
    * @param[in] seed random seed
    */
   void SetupDevice(const std::thread::id& tid, const int did, const int seed) {
     device_id_[tid] = did;
     seed_[tid] = seed;
   }

   /**
    * Activate the GPU device by calling cudaSetDevice.
    */
   void ActivateDevice(const int device_id) {
     CHECK_GE(device_id, 0);
 #ifdef USE_GPU
     cudaSetDevice(device_id);
 #endif
   }

   /**
    * \copybreif rand_generator(const std::thread::id&);
    * @return the CPU random generator for the calling thread.
    */
   std::mt19937* rand_generator() {
     return rand_generator(std::this_thread::get_id());
   }
   /**
    * Get the CPU random generator.
    * If the generator does not exist, then create it now.
    * If the seed is not set, i.e., seed=-1, then get a seed from system time.
    * @param[in] thread::id CPU thread ID
    * @return the CPU random generator
    */
   std::mt19937* rand_generator(const std::thread::id& tid) {
     if (rand_generator_.find(tid) == rand_generator_.end()) {
       // CHECK(seed_.find(tid) != seed_.end());
       auto seed = static_cast<unsigned>(seed_[tid]);
       if (seed_.find(tid) == seed_.end() || seed_.at(tid) == -1)
         seed = std::chrono::system_clock::now().time_since_epoch().count();
       rand_generator_[tid] = new std::mt19937(seed);
     }
     return rand_generator_[tid];
   }
 #ifdef USE_GPU
   /**
    * \copybreif cublas_handle_(const std::thread::id&);
    * @return cublas handle for the calling thread.
    */
   cublasHandle_t cublas_handle() {
     return cublas_handle(std::this_thread::get_id());
   }
   /**
    * Get the handler of the GPU which is assigned to the given thread.
    * Calls cublas_handle(const int);
    */
   cublasHandle_t cublas_handle(const std::thread::id thread_id) {
     return cublas_handle(device_id(thread_id));
   }
   /**
    * Get the handler of the GPU device given its device ID. The device
    * must be set up via SetupDevice(const std::thread::id, const int) before
    * calling this function.
    * @param[in] device_id GPU device ID
    * @return the GPU handler
    */
   cublasHandle_t cublas_handle(const int device_id) {
     CHECK_GE(device_id, 0);
     if (cublas_handle_.at(device_id) == nullptr) {
       cudaSetDevice(device_id);
       cublasCreate(&cublas_handle_[device_id]);
     }
     return cublas_handle_[device_id];
   }
   /**
    * Get the rand generator of the GPU device assigned to the given thread.
    */
   curandGenerator_t curand_generator(const std::thread::id thread_id) {
     return curand_generator(device_id(thread_id));
   }
   /**
    * Get the random generator of the GPU device given the device id.
    * @param[in] device_id GPU device ID
    * @return random generator. If it does not exist, then create one.
    * The random seed will be set to CURAND_RNG_PSEUDO_DEFAULT if it is not set.
    */
   curandGenerator_t curand_generator(const int device_id) {
     CHECK_GE(device_id, 0);
     CHECK_LT(device_id, cudnn_handle_.size());
     if (curand_generator_.at(device_id) == nullptr) {
       // TODO(wangwei) handle user set seed
       /*
       CHECK(seed_.find(tid) != seed_.end());
       auto seed = seed_[tid];
       */
       ActivateDevice(device_id);
       curandCreateGenerator(&curand_generator_[device_id],
           CURAND_RNG_PSEUDO_DEFAULT);
     }
     return curand_generator_[device_id];
   }

 #ifdef USE_CUDNN
   cudnnHandle_t cudnn_handle() {
     return cudnn_handle(std::this_thread::get_id());
   }

   cudnnHandle_t cudnn_handle(const std::thread::id thread_id) {
     return cudnn_handle(device_id(thread_id));
   }

   cudnnHandle_t cudnn_handle(const int device_id) {
     CHECK_GE(device_id, 0);
     CHECK_LT(device_id, cudnn_handle_.size());
     if (cudnn_handle_.at(device_id) == nullptr) {
       ActivateDevice(device_id);
       // LOG(ERROR) << "create cudnn handle for device " << device_id;
       CHECK_EQ(cudnnCreate(&cudnn_handle_[device_id]), CUDNN_STATUS_SUCCESS);
     }
     // LOG(ERROR) << "use cudnn handle from device " << device_id;
     return cudnn_handle_[device_id];
   }
 #endif

 #endif

  protected:
   //!< max num of GPUs per process
   const int kMaxNumGPU = 64;
   //!< map from thread id to device id
   std::unordered_map<std::thread::id, int> device_id_;
   //!< map from thread id to cpu rand generator
   std::unordered_map<std::thread::id, std::mt19937 *> rand_generator_;
   //!< map from thread id to cpu rand generator seed
   std::unordered_map<std::thread::id, int> seed_;
 #ifdef USE_GPU
   //!< cublas handler indexed by GPU device ID
   std::vector<cublasHandle_t> cublas_handle_;
   //!< cublas rand generator indexed by GPU device ID
   std::vector<curandGenerator_t> curand_generator_;

 #ifdef USE_CUDNN
   std::vector<cudnnHandle_t> cudnn_handle_;
 #endif
 #endif
 };

 }  // namespace singa

 #endif  // SINGA_UTILS_CONTEXT_H_
	/************************************************************
	*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*
	*************************************************************/

	#ifndef SINGA_UTILS_CONTEXT_H_
	#define SINGA_UTILS_CONTEXT_H_

	#include <glog/logging.h>
	#include <chrono>
	#include <random>
	#include <thread>
	#include <unordered_map>
	#include <vector>

	#ifdef USE_GPU
	#include "singa/utils/cuda_utils.h"

	#ifdef USE_CUDNN
	#include <cudnn.h>
	#endif

	#endif

	namespace singa {

	/**
	* Context is used as a global singleton, which stores the mapping from CPU
	* thread id to GPU device id. If a thread has no GPU, then its associated
	* device id is -1. It manages (e.g., creating) the handlers for GPU
	* devices. It also manages the GPU and CPU random generators, which are created
	* when accessed. One CPU thread has a CPU random generator. A GPU device
	* has a GPU random generator, which is accessible after assigning the GPU
	* device with a CPU thread via SetupDevice.
	*/
	class Context {
	public:
	/**
	* Destructor, release random generators and handlers.
	*/
	~Context() {
	#ifdef USE_GPU
	for (auto& entry : device_id_) {
	if (entry.second != -1) {
	cudaSetDevice(entry.second);
	if (cublas_handle_[entry.second] != nullptr) {
	cublasDestroy(cublas_handle_[entry.second]);
	cublas_handle_[entry.second] = nullptr;
	}
	if (curand_generator_[entry.second] != nullptr) {
	curandDestroyGenerator(curand_generator_[entry.second]);
	curand_generator_[entry.second] = nullptr;
	}
	}
	}
	#ifdef USE_CUDNN
	for (auto& handle : cudnn_handle_) {
	if (handle != nullptr)
	CHECK_EQ(cudnnDestroy(handle), CUDNN_STATUS_SUCCESS);
	handle = nullptr;
	}
	#endif
	#endif
	for (auto& entry : rand_generator_) {
	if (entry.second != nullptr) {
	delete entry.second;
	entry.second = nullptr;
	}
	}
	}
	/**
	* Constructor, init handlers and GPU rand generators to nullptr.
	*/
	Context() {
	for (int i = 0; i < kMaxNumGPU; i++) {
	#ifdef USE_GPU
	cublas_handle_.push_back(nullptr);
	curand_generator_.push_back(nullptr);
	#ifdef USE_CUDNN
	cudnn_handle_.push_back(nullptr);
	#endif
	#endif
	}
	}

	/**
	* @return the device ID of the current thread.
	*/
	int device_id() {
	return device_id(std::this_thread::get_id());
	}
	/**
	* @return the ID of the device attached to a given CPU thread, or -1 if this
	* thread has not been attached GPU device.
	*/
	int device_id(const std::thread::id& tid) {
	if (device_id_.find(tid) != device_id_.end())
	return device_id_[tid];
	else
	return -2;
	}
	/**
	* Setup the CPU thread, which may be assigned a GPU device.
	* If there is no GPU device, then set did to -1.
	* Set the random seed to -1.
	* @param[in] thread::id CPU thread ID
	* @param[in] device_id GPU device ID
	*/
	void SetupDevice(const std::thread::id& tid, const int did) {
	SetupDevice(tid, did, -1);
	}
	/**
	* @copy SetupDevice(const int, const int);
	* @param[in] seed random seed
	*/
	void SetupDevice(const std::thread::id& tid, const int did, const int seed) {
	device_id_[tid] = did;
	seed_[tid] = seed;
	}

	/**
	* Activate the GPU device by calling cudaSetDevice.
	*/
	void ActivateDevice(const int device_id) {
	CHECK_GE(device_id, 0);
	#ifdef USE_GPU
	cudaSetDevice(device_id);
	#endif
	}

	/**
	* \copybreif rand_generator(const std::thread::id&);
	* @return the CPU random generator for the calling thread.
	*/
	std::mt19937* rand_generator() {
	return rand_generator(std::this_thread::get_id());
	}
	/**
	* Get the CPU random generator.
	* If the generator does not exist, then create it now.
	* If the seed is not set, i.e., seed=-1, then get a seed from system time.
	* @param[in] thread::id CPU thread ID
	* @return the CPU random generator
	*/
	std::mt19937* rand_generator(const std::thread::id& tid) {
	if (rand_generator_.find(tid) == rand_generator_.end()) {
	// CHECK(seed_.find(tid) != seed_.end());
	auto seed = static_cast<unsigned>(seed_[tid]);
	if (seed_.find(tid) == seed_.end() \|\| seed_.at(tid) == -1)
	seed = std::chrono::system_clock::now().time_since_epoch().count();
	rand_generator_[tid] = new std::mt19937(seed);
	}
	return rand_generator_[tid];
	}
	#ifdef USE_GPU
	/**
	* \copybreif cublas_handle_(const std::thread::id&);
	* @return cublas handle for the calling thread.
	*/
	cublasHandle_t cublas_handle() {
	return cublas_handle(std::this_thread::get_id());
	}
	/**
	* Get the handler of the GPU which is assigned to the given thread.
	* Calls cublas_handle(const int);
	*/
	cublasHandle_t cublas_handle(const std::thread::id thread_id) {
	return cublas_handle(device_id(thread_id));
	}
	/**
	* Get the handler of the GPU device given its device ID. The device
	* must be set up via SetupDevice(const std::thread::id, const int) before
	* calling this function.
	* @param[in] device_id GPU device ID
	* @return the GPU handler
	*/
	cublasHandle_t cublas_handle(const int device_id) {
	CHECK_GE(device_id, 0);
	if (cublas_handle_.at(device_id) == nullptr) {
	cudaSetDevice(device_id);
	cublasCreate(&cublas_handle_[device_id]);
	}
	return cublas_handle_[device_id];
	}
	/**
	* Get the rand generator of the GPU device assigned to the given thread.
	*/
	curandGenerator_t curand_generator(const std::thread::id thread_id) {
	return curand_generator(device_id(thread_id));
	}
	/**
	* Get the random generator of the GPU device given the device id.
	* @param[in] device_id GPU device ID
	* @return random generator. If it does not exist, then create one.
	* The random seed will be set to CURAND_RNG_PSEUDO_DEFAULT if it is not set.
	*/
	curandGenerator_t curand_generator(const int device_id) {
	CHECK_GE(device_id, 0);
	CHECK_LT(device_id, cudnn_handle_.size());
	if (curand_generator_.at(device_id) == nullptr) {
	// TODO(wangwei) handle user set seed
	/*
	CHECK(seed_.find(tid) != seed_.end());
	auto seed = seed_[tid];
	*/
	ActivateDevice(device_id);
	curandCreateGenerator(&curand_generator_[device_id],
	CURAND_RNG_PSEUDO_DEFAULT);
	}
	return curand_generator_[device_id];
	}

	#ifdef USE_CUDNN
	cudnnHandle_t cudnn_handle() {
	return cudnn_handle(std::this_thread::get_id());
	}

	cudnnHandle_t cudnn_handle(const std::thread::id thread_id) {
	return cudnn_handle(device_id(thread_id));
	}

	cudnnHandle_t cudnn_handle(const int device_id) {
	CHECK_GE(device_id, 0);
	CHECK_LT(device_id, cudnn_handle_.size());
	if (cudnn_handle_.at(device_id) == nullptr) {
	ActivateDevice(device_id);
	// LOG(ERROR) << "create cudnn handle for device " << device_id;
	CHECK_EQ(cudnnCreate(&cudnn_handle_[device_id]), CUDNN_STATUS_SUCCESS);
	}
	// LOG(ERROR) << "use cudnn handle from device " << device_id;
	return cudnn_handle_[device_id];
	}
	#endif

	#endif

	protected:
	//!< max num of GPUs per process
	const int kMaxNumGPU = 64;
	//!< map from thread id to device id
	std::unordered_map<std::thread::id, int> device_id_;
	//!< map from thread id to cpu rand generator
	std::unordered_map<std::thread::id, std::mt19937 *> rand_generator_;
	//!< map from thread id to cpu rand generator seed
	std::unordered_map<std::thread::id, int> seed_;
	#ifdef USE_GPU
	//!< cublas handler indexed by GPU device ID
	std::vector<cublasHandle_t> cublas_handle_;
	//!< cublas rand generator indexed by GPU device ID
	std::vector<curandGenerator_t> curand_generator_;

	#ifdef USE_CUDNN
	std::vector<cudnnHandle_t> cudnn_handle_;
	#endif
	#endif
	};

	} // namespace singa

	#endif // SINGA_UTILS_CONTEXT_H_