blob: 24c7f540eca94acba5ca1b734153f7be735a707c [file] [log] [blame]
/************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*************************************************************/
/**
* The code is adapted from Caffe whose license is attached.
*
* COPYRIGHT
* All contributions by the University of California:
* Copyright (c) 2014, The Regents of the University of California (Regents)
* All rights reserved.
* All other contributions:
* Copyright (c) 2014, the respective contributors
* All rights reserved.
* Caffe uses a shared copyright model: each contributor holds copyright over
* their contributions to Caffe. The project versioning records all such
* contribution and copyright details. If a contributor wants to further mark
* their specific copyright on a particular contribution, they should indicate
* their copyright solely in the commit message of the change when it is
* committed.
* LICENSE
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* CONTRIBUTION AGREEMENT
* By contributing to the BVLC/caffe repository through pull-request, comment,
* or otherwise, the contributor releases their content to the
* license and copyright terms herein.
*/
#include "utils/blob.h"
#include <cblas.h>
#include <math.h>
#include <utility>
#define NOT_IMPLEMENTED LOG(FATAL) << "Not implemented function"
#define NO_GPU LOG(FATAL) << "CPU-only Mode: cannot make GPU call."
// Instantiate a class with float and double specifications.
#define INSTANTIATE_CLASS(classname) \
template class classname<float>; \
template class classname<double>
// Disable the copy and assignment operator for a class.
#define DISABLE_COPY_AND_ASSIGN(classname) \
private:\
classname(const classname&);\
classname& operator=(const classname&)
#ifndef CPU_ONLY
// CUDA: various checks for different function calls.
#define CUDA_CHECK(condition) \
/* Code block avoids redefinition of cudaError_t error */ \
do { \
cudaError_t error = condition; \
CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \
} while (0)
#define CUBLAS_CHECK(condition) \
do { \
cublasStatus_t status = condition; \
CHECK_EQ(status, CUBLAS_STATUS_SUCCESS) << " " \
<< caffe::cublasGetErrorString(status); \
} while (0)
#define CURAND_CHECK(condition) \
do { \
curandStatus_t status = condition; \
CHECK_EQ(status, CURAND_STATUS_SUCCESS) << " " \
<< caffe::curandGetErrorString(status); \
} while (0)
#endif // CPU_ONLY
namespace singa {
SyncedMemory::~SyncedMemory() {
if (cpu_ptr_ && own_cpu_data_) {
FreeHost(cpu_ptr_);
}
#ifndef CPU_ONLY
if (gpu_ptr_) {
CUDA_CHECK(cudaFree(gpu_ptr_));
}
#endif // CPU_ONLY
}
const void* SyncedMemory::cpu_data() {
to_cpu();
return cpu_ptr_;
}
const void* SyncedMemory::gpu_data() {
#ifndef CPU_ONLY
to_gpu();
return gpu_ptr_;
#else
NO_GPU;
#endif
return nullptr;
}
void* SyncedMemory::mutable_cpu_data() {
to_cpu();
head_ = HEAD_AT_CPU;
return cpu_ptr_;
}
void* SyncedMemory::mutable_gpu_data() {
#ifndef CPU_ONLY
to_gpu();
head_ = HEAD_AT_GPU;
return gpu_ptr_;
#else
NO_GPU;
#endif
return nullptr;
}
void SyncedMemory::set_cpu_data(void* data) {
CHECK(data);
if (own_cpu_data_) {
FreeHost(cpu_ptr_);
}
cpu_ptr_ = data;
head_ = HEAD_AT_CPU;
own_cpu_data_ = false;
}
void SyncedMemory::to_cpu() {
switch (head_) {
case UNINITIALIZED:
MallocHost(&cpu_ptr_, size_);
memset(cpu_ptr_, 0, size_);
head_ = HEAD_AT_CPU;
own_cpu_data_ = true;
break;
case HEAD_AT_GPU:
#ifndef CPU_ONLY
if (cpu_ptr_ == NULL) {
MallocHost(&cpu_ptr_, size_);
own_cpu_data_ = true;
}
CUDA_CHECK(cudaMemcpy(cpu_ptr_, gpu_ptr_, size_, cudaMemcpyDefault));
head_ = SYNCED;
#else
NO_GPU;
#endif
break;
case HEAD_AT_CPU:
case SYNCED:
break;
}
}
void SyncedMemory::to_gpu() {
#ifndef CPU_ONLY
switch (head_) {
case UNINITIALIZED:
CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
CUDA_CHECK(cudaMemset(gpu_ptr_, 0, N));
head_ = HEAD_AT_GPU;
break;
case HEAD_AT_CPU:
if (gpu_ptr_ == NULL) {
CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
}
CUDA_CHECK(cudaMemcpy(gpu_ptr_, cpu_ptr_, size_, cudaMemcpyDefault));
head_ = SYNCED;
break;
case HEAD_AT_GPU:
case SYNCED:
break;
}
#else
NO_GPU;
#endif
}
template <typename Dtype>
void Blob<Dtype>::Reshape(const std::vector<int>& shape) {
count_ = 1;
shape_ = shape;
for (size_t i = 0; i < shape.size(); ++i) {
CHECK(shape[i]);
count_ *= shape[i];
}
if (count_ > capacity_) {
capacity_ = count_;
data_.reset(new SyncedMemory(capacity_ * sizeof(Dtype)));
}
}
template <typename Dtype>
void Blob<Dtype>::ReshapeLike(const Blob<Dtype>& other) {
Reshape(other.shape());
}
template <typename Dtype>
void Blob<Dtype>::CopyFrom(const Blob& source) {
CopyFrom(source, false);
}
template <typename Dtype>
void Blob<Dtype>::CopyFrom(const Blob& source, bool reshape) {
if (!std::equal(shape_.begin(), shape_.end(), source.shape_.begin())) {
if (reshape) {
Reshape(source.shape_);
} else {
LOG(FATAL) << "Trying to copy blobs of different sizes.";
}
}
#ifndef CPU_ONLY
CUDA_CHECK(cudaMemcpy(static_cast<Dtype*>(data_->mutable_gpu_data()),
source.gpu_data(), sizeof(Dtype) * count_, cudaMemcpyDefault));
#endif
memcpy(static_cast<Dtype*>(data_->mutable_cpu_data()), source.cpu_data(),
sizeof(Dtype)*count_);
}
template <typename Dtype>
void Blob<Dtype>::FromProto(const singa::BlobProto& proto) {
std::vector<int> shape;
for (int s : proto.shape()) {
shape.push_back(s);
}
int count = count_;
Reshape(shape);
if (count != count_)
LOG(WARNING) << "Blob is reshaped to diff size " << count << ":" << count_;
// copy data
Dtype* data_vec = mutable_cpu_data();
for (int i = 0; i < count_; ++i) {
data_vec[i] = proto.data(i);
}
}
template <typename Dtype>
void Blob<Dtype>::ToProto(singa::BlobProto* proto) const {
for (int s : shape_) {
proto->add_shape(s);
}
proto->clear_data();
const Dtype* data_vec = cpu_data();
for (int i = 0; i < count_; ++i) {
proto->add_data(data_vec[i]);
}
}
template <typename Dtype>
void Blob<Dtype>::ShareData(const Blob& other) {
CHECK_EQ(count_, other.count());
data_ = other.data_;
}
template <typename Dtype>
void Blob<Dtype>::Swap(Blob& other) {
CHECK_EQ(other.count(), count());
CHECK(std::equal(shape_.begin(), shape_.end(), other.shape_.begin()));
std::swap(data_, other.data_);
std::swap(capacity_, other.capacity_);
}
template <> float Blob<float>::asum_data() const {
if (count() == 0) return 0.f;
return cblas_sasum(count(), cpu_data(), 1) / count();
}
template <> float Blob<float>::sum_data() const {
if (count() == 0) return 0.f;
float sum = 0.f;
const float* dptr = cpu_data();
for (int i = 0; i < count(); ++i)
sum += dptr[i];
return sum / count();
}
template <> unsigned int Blob<unsigned int>::asum_data() const {
NOT_IMPLEMENTED;
return 0;
}
template <> int Blob<int>::asum_data() const {
NOT_IMPLEMENTED;
return 0;
}
INSTANTIATE_CLASS(Blob);
template class Blob<int>;
template class Blob<unsigned int>;
} // namespace singa