src/core/device/platform.cc - singa - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 #ifndef DISABLE_WARNINGS

 #include <iostream>

 #include "singa/core/device.h"
 #include "singa/singa_config.h"
 #include "singa/utils/opencl_utils.h"
 using namespace std;
 namespace singa {

 #ifdef USE_CUDA

 std::vector<std::shared_ptr<Device>> Platform::UsedDevice;
 std::mutex Platform::mtx_;
 int Platform::GetNumGPUs() {
   int count;
   CUDA_CHECK(cudaGetDeviceCount(&count));
   return count;
 }

 bool Platform::CheckDevice(const int device_id) {
   bool r = ((cudaSuccess == cudaSetDevice(device_id)) &&
             (cudaSuccess == cudaFree(0)));
   // reset any error that may have occurred.
   cudaGetLastError();
   return r;
 }

 /// Return the total num of free GPUs
 const vector<int> Platform::GetGPUIDs() {
   vector<int> gpus;
   int count = Platform::GetNumGPUs();
   for (int i = 0; i < count; i++) {
     if (Platform::CheckDevice(i)) {
       gpus.push_back(i);
     }
   }
   return gpus;
 }

 const std::pair<size_t, size_t> Platform::GetGPUMemSize(const int device) {
   std::pair<size_t, size_t> ret{0, 0};
   if (Platform::CheckDevice(device)) {
     CUDA_CHECK(cudaSetDevice(device));
     size_t free = 0, total = 0;
     CUDA_CHECK(cudaMemGetInfo(&free, &total));
     ret = std::make_pair(free, total);
   } else {
     LOG(ERROR) << "The device (ID = " << device << ") is not available";
   }
   return ret;
 }

 const vector<std::pair<size_t, size_t>> Platform::GetGPUMemSize() {
   vector<std::pair<size_t, size_t>> mem;
   int count = Platform::GetNumGPUs();
   for (int i = 0; i < count; i++) {
     mem.push_back(Platform::GetGPUMemSize(i));
   }
   return mem;
 }

 const string Platform::DeviceQuery(int device, bool verbose) {
   if (cudaSuccess != cudaGetDevice(&device)) {
     return "The device (ID = " + std::to_string(device) + " is not available";
   }
   cudaDeviceProp prop;
   CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
   std::ostringstream out;
   out << "Device id:                     " << device << '\n';
   out << "Total global memory:           " << prop.totalGlobalMem << '\n';
   out << "Total shared memory per block: " << prop.sharedMemPerBlock << '\n';
   out << "Maximum threads per block:     " << prop.maxThreadsPerBlock << '\n';
   out << "Maximum dimension of block:    " << prop.maxThreadsDim[0 << '\n']
       << ", " << prop.maxThreadsDim[1] << ", " << prop.maxThreadsDim[2] << '\n';
   out << "Maximum dimension of grid:     " << prop.maxGridSize[0] << ", "
       << "Concurrent copy and execution: "
       << (prop.deviceOverlap ? "Yes" : "No") << '\n';

   if (verbose) {
     out << "Major revision number:         " << prop.major << '\n';
     out << "Minor revision number:         " << prop.minor << '\n';
     out << "Name:                          " << prop.name << '\n';
     out << "Total registers per block:     " << prop.regsPerBlock << '\n';
     out << "Maximum memory pitch:          " << prop.memPitch << '\n';
     out << "Warp size:                     " << prop.warpSize
         << prop.maxGridSize[1] << ", " << prop.maxGridSize[2] << '\n';
     out << "Clock rate:                    " << prop.clockRate << '\n';
     out << "Number of multiprocessors:     " << prop.multiProcessorCount
         << '\n';
     out << "Kernel execution timeout:      "
         << (prop.kernelExecTimeoutEnabled ? "Yes" : "No") << '\n';
   }
   return out.str();
 }

 const vector<shared_ptr<Device>> Platform::CreateCudaGPUs(
     const size_t num_devices, size_t init_size) {
   const vector<int> gpus = GetGPUIDs();
   CHECK_LE(num_devices, gpus.size());
   vector<int> use_gpus(gpus.begin(), gpus.begin() + num_devices);
   return CreateCudaGPUsOn(use_gpus, init_size);
 }

 const vector<shared_ptr<Device>> Platform::CreateCudaGPUsOn(
     const vector<int>& devices, size_t init_size) {
   MemPoolConf conf;
   if (init_size > 0) conf.set_init_size(init_size);
   size_t bytes = conf.init_size() << 20;
   for (auto device : devices) {
     conf.add_device(device);
     CHECK_LE(bytes, Platform::GetGPUMemSize(device).first);
   }
   mtx_.lock();
   if (UsedDevice.size() == 0) {
     int count = Platform::GetNumGPUs();
     for (int i = 0; i < count; i++) UsedDevice.push_back(nullptr);
   }
   auto pool = std::make_shared<CnMemPool>(conf);
   vector<shared_ptr<Device>> ret;
   for (size_t i = 0; i < devices.size(); i++) {
     if (UsedDevice[devices[i]] == nullptr)
       UsedDevice[devices[i]] = std::make_shared<CudaGPU>(devices[i], pool);
     ret.push_back(UsedDevice[devices[i]]);
   }
   mtx_.unlock();
   return ret;
 }

 #endif  // USE_CUDA

 #ifdef USE_OPENCL

 const int Platform::GetNumOpenclPlatforms() {
   auto all_platforms = viennacl::ocl::get_platforms();
   return (int)all_platforms.size();
 }

 const int Platform::GetNumOpenclDevices() {
   auto all_platforms = viennacl::ocl::get_platforms();
   unsigned int total_num_devices = 0;
   for (auto plat : all_platforms) {
     auto all_devices = plat.devices(CL_DEVICE_TYPE_ALL);
     total_num_devices += all_devices.size();
   }
   return (int)total_num_devices;
 }

 const std::shared_ptr<Device> Platform::GetDefaultOpenclDevice() {
   return std::make_shared<OpenclDevice>();
 }
 /*
 static const std::vector<std::shared_ptr<Device>>
 Platform::CreateOpenclDevices(const size_t num_devices) {
   auto all_platforms = viennacl::ocl::get_platforms();
   for (auto plat : all_platforms) {
     auto all_devices = plat.devices(CL_DEVICE_TYPE_ALL);
     total_num_devices += all_devices.size();
   }
   return (int)total_num_devices;
 }

 static const std::vector<std::shared_ptr<Device>>
 Platform::CreateOpenclDevices(const std::vector<int> &id) {

 }
 */
 #endif  // USE_OPENCL

 }  // namespace singa

 #endif
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	#ifndef DISABLE_WARNINGS

	#include <iostream>

	#include "singa/core/device.h"
	#include "singa/singa_config.h"
	#include "singa/utils/opencl_utils.h"
	using namespace std;
	namespace singa {

	#ifdef USE_CUDA

	std::vector<std::shared_ptr<Device>> Platform::UsedDevice;
	std::mutex Platform::mtx_;
	int Platform::GetNumGPUs() {
	int count;
	CUDA_CHECK(cudaGetDeviceCount(&count));
	return count;
	}

	bool Platform::CheckDevice(const int device_id) {
	bool r = ((cudaSuccess == cudaSetDevice(device_id)) &&
	(cudaSuccess == cudaFree(0)));
	// reset any error that may have occurred.
	cudaGetLastError();
	return r;
	}

	/// Return the total num of free GPUs
	const vector<int> Platform::GetGPUIDs() {
	vector<int> gpus;
	int count = Platform::GetNumGPUs();
	for (int i = 0; i < count; i++) {
	if (Platform::CheckDevice(i)) {
	gpus.push_back(i);
	}
	}
	return gpus;
	}

	const std::pair<size_t, size_t> Platform::GetGPUMemSize(const int device) {
	std::pair<size_t, size_t> ret{0, 0};
	if (Platform::CheckDevice(device)) {
	CUDA_CHECK(cudaSetDevice(device));
	size_t free = 0, total = 0;
	CUDA_CHECK(cudaMemGetInfo(&free, &total));
	ret = std::make_pair(free, total);
	} else {
	LOG(ERROR) << "The device (ID = " << device << ") is not available";
	}
	return ret;
	}

	const vector<std::pair<size_t, size_t>> Platform::GetGPUMemSize() {
	vector<std::pair<size_t, size_t>> mem;
	int count = Platform::GetNumGPUs();
	for (int i = 0; i < count; i++) {
	mem.push_back(Platform::GetGPUMemSize(i));
	}
	return mem;
	}

	const string Platform::DeviceQuery(int device, bool verbose) {
	if (cudaSuccess != cudaGetDevice(&device)) {
	return "The device (ID = " + std::to_string(device) + " is not available";
	}
	cudaDeviceProp prop;
	CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
	std::ostringstream out;
	out << "Device id: " << device << '\n';
	out << "Total global memory: " << prop.totalGlobalMem << '\n';
	out << "Total shared memory per block: " << prop.sharedMemPerBlock << '\n';
	out << "Maximum threads per block: " << prop.maxThreadsPerBlock << '\n';
	out << "Maximum dimension of block: " << prop.maxThreadsDim[0 << '\n']
	<< ", " << prop.maxThreadsDim[1] << ", " << prop.maxThreadsDim[2] << '\n';
	out << "Maximum dimension of grid: " << prop.maxGridSize[0] << ", "
	<< "Concurrent copy and execution: "
	<< (prop.deviceOverlap ? "Yes" : "No") << '\n';

	if (verbose) {
	out << "Major revision number: " << prop.major << '\n';
	out << "Minor revision number: " << prop.minor << '\n';
	out << "Name: " << prop.name << '\n';
	out << "Total registers per block: " << prop.regsPerBlock << '\n';
	out << "Maximum memory pitch: " << prop.memPitch << '\n';
	out << "Warp size: " << prop.warpSize
	<< prop.maxGridSize[1] << ", " << prop.maxGridSize[2] << '\n';
	out << "Clock rate: " << prop.clockRate << '\n';
	out << "Number of multiprocessors: " << prop.multiProcessorCount
	<< '\n';
	out << "Kernel execution timeout: "
	<< (prop.kernelExecTimeoutEnabled ? "Yes" : "No") << '\n';
	}
	return out.str();
	}

	const vector<shared_ptr<Device>> Platform::CreateCudaGPUs(
	const size_t num_devices, size_t init_size) {
	const vector<int> gpus = GetGPUIDs();
	CHECK_LE(num_devices, gpus.size());
	vector<int> use_gpus(gpus.begin(), gpus.begin() + num_devices);
	return CreateCudaGPUsOn(use_gpus, init_size);
	}

	const vector<shared_ptr<Device>> Platform::CreateCudaGPUsOn(
	const vector<int>& devices, size_t init_size) {
	MemPoolConf conf;
	if (init_size > 0) conf.set_init_size(init_size);
	size_t bytes = conf.init_size() << 20;
	for (auto device : devices) {
	conf.add_device(device);
	CHECK_LE(bytes, Platform::GetGPUMemSize(device).first);
	}
	mtx_.lock();
	if (UsedDevice.size() == 0) {
	int count = Platform::GetNumGPUs();
	for (int i = 0; i < count; i++) UsedDevice.push_back(nullptr);
	}
	auto pool = std::make_shared<CnMemPool>(conf);
	vector<shared_ptr<Device>> ret;
	for (size_t i = 0; i < devices.size(); i++) {
	if (UsedDevice[devices[i]] == nullptr)
	UsedDevice[devices[i]] = std::make_shared<CudaGPU>(devices[i], pool);
	ret.push_back(UsedDevice[devices[i]]);
	}
	mtx_.unlock();
	return ret;
	}

	#endif // USE_CUDA

	#ifdef USE_OPENCL

	const int Platform::GetNumOpenclPlatforms() {
	auto all_platforms = viennacl::ocl::get_platforms();
	return (int)all_platforms.size();
	}

	const int Platform::GetNumOpenclDevices() {
	auto all_platforms = viennacl::ocl::get_platforms();
	unsigned int total_num_devices = 0;
	for (auto plat : all_platforms) {
	auto all_devices = plat.devices(CL_DEVICE_TYPE_ALL);
	total_num_devices += all_devices.size();
	}
	return (int)total_num_devices;
	}

	const std::shared_ptr<Device> Platform::GetDefaultOpenclDevice() {
	return std::make_shared<OpenclDevice>();
	}
	/*
	static const std::vector<std::shared_ptr<Device>>
	Platform::CreateOpenclDevices(const size_t num_devices) {
	auto all_platforms = viennacl::ocl::get_platforms();
	for (auto plat : all_platforms) {
	auto all_devices = plat.devices(CL_DEVICE_TYPE_ALL);
	total_num_devices += all_devices.size();
	}
	return (int)total_num_devices;
	}

	static const std::vector<std::shared_ptr<Device>>
	Platform::CreateOpenclDevices(const std::vector<int> &id) {

	}
	*/
	#endif // USE_OPENCL

	} // namespace singa

	#endif