blob: 36299c777d8967b85001d752da4c7c9efa6b615c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "SpoofCUDAContext.h"
#include <filesystem>
#include <iostream>
#include <cstdlib>
#include <sstream>
size_t SpoofCUDAContext::initialize_cuda(uint32_t device_id, const char* resource_path) {
#ifdef __DEBUG
std::cout << "initializing cuda device " << device_id << std::endl;
#endif
SpoofCUDAContext *ctx = new SpoofCUDAContext(resource_path);
// cuda device is handled by jCuda atm
//cudaSetDevice(device_id);
//cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
//cudaDeviceSynchronize();
CHECK_CUDA(cuModuleLoad(&(ctx->reductions), std::string(ctx->resource_path + std::string("/cuda/kernels/reduction.ptx")).c_str()));
CUfunction func;
// ToDo: implement a more scalable solution for these imports
// SUM
CHECK_CUDA(cuModuleGetFunction(&func, ctx->reductions, "reduce_sum_d"));
ctx->reduction_kernels.insert(std::make_pair("reduce_sum_d", func));
CHECK_CUDA(cuModuleGetFunction(&func, ctx->reductions, "reduce_sum_f"));
ctx->reduction_kernels.insert(std::make_pair("reduce_sum_f", func));
// SUM_SQ
CHECK_CUDA(cuModuleGetFunction(&func, ctx->reductions, "reduce_sum_sq_d"));
ctx->reduction_kernels.insert(std::make_pair("reduce_sum_sq_d", func));
CHECK_CUDA(cuModuleGetFunction(&func, ctx->reductions, "reduce_sum_sq_f"));
ctx->reduction_kernels.insert(std::make_pair("reduce_sum_sq_f", func));
// MIN
CHECK_CUDA(cuModuleGetFunction(&func, ctx->reductions, "reduce_min_d"));
ctx->reduction_kernels.insert(std::make_pair("reduce_min_d", func));
CHECK_CUDA(cuModuleGetFunction(&func, ctx->reductions, "reduce_min_f"));
ctx->reduction_kernels.insert(std::make_pair("reduce_min_f", func));
// MAX
CHECK_CUDA(cuModuleGetFunction(&func, ctx->reductions, "reduce_max_d"));
ctx->reduction_kernels.insert(std::make_pair("reduce_max_d", func));
CHECK_CUDA(cuModuleGetFunction(&func, ctx->reductions, "reduce_max_f"));
ctx->reduction_kernels.insert(std::make_pair("reduce_max_f", func));
return reinterpret_cast<size_t>(ctx);
}
void SpoofCUDAContext::destroy_cuda(SpoofCUDAContext *ctx, uint32_t device_id) {
delete ctx;
ctx = nullptr;
// cuda device is handled by jCuda atm
//cudaDeviceReset();
}
bool SpoofCUDAContext::compile_cuda(const std::string &src,
const std::string &name) {
std::string cuda_include_path("");
char* cdp = std::getenv("CUDA_PATH");
if(cdp != nullptr)
cuda_include_path = std::string("-I") + std::string(cdp) + "/include";
else {
std::cout << "Warning: CUDA_PATH environment variable not set. Using default include path"
"/usr/local/cuda/include" << std::endl;
cuda_include_path = std::string("-I/usr/local/cuda/include");
}
#ifdef __DEBUG
std::cout << "compiling cuda kernel " << name << std::endl;
std::cout << src << std::endl;
std::cout << "cwd: " << std::filesystem::current_path() << std::endl;
std::cout << "cuda_path: " << cuda_include_path << std::endl;
#endif
SpoofOperator::AggType type = SpoofOperator::AggType::NONE;
SpoofOperator::AggOp op = SpoofOperator::AggOp::NONE;
auto pos = 0;
if((pos = src.find("CellType")) != std::string::npos) {
if(src.substr(pos, pos+30).find("FULL_AGG") != std::string::npos)
type = SpoofOperator::AggType::FULL_AGG;
else if(src.substr(pos, pos+30).find("ROW_AGG") != std::string::npos)
type = SpoofOperator::AggType::ROW_AGG;
else if(src.substr(pos, pos+30).find("COL_AGG") != std::string::npos)
type = SpoofOperator::AggType::COL_AGG;
else if(src.substr(pos, pos+30).find("NO_AGG") != std::string::npos)
type = SpoofOperator::AggType::NO_AGG;
else {
std::cerr << "error: unknown aggregation type" << std::endl;
return false;
}
if(type != SpoofOperator::AggType::NO_AGG) {
if((pos = src.find("AggOp")) != std::string::npos) {
if(src.substr(pos, pos+30).find("AggOp.SUM") != std::string::npos)
op = SpoofOperator::AggOp::SUM;
else if(src.substr(pos, pos+30).find("AggOp.SUM_SQ") != std::string::npos)
op = SpoofOperator::AggOp::SUM_SQ;
else if(src.substr(pos, pos+30).find("AggOp.MIN") != std::string::npos)
op = SpoofOperator::AggOp::MIN;
else if(src.substr(pos, pos+30).find("AggOp.MAX") != std::string::npos)
op = SpoofOperator::AggOp::MAX;
else {
std::cerr << "error: unknown aggregation operator" << std::endl;
return false;
}
}
}
}
std::stringstream s1, s2, s3;
s1 << "-I" << resource_path << "/cuda/headers";
s2 << "-I" << resource_path << "/cuda/spoof";
jitify::Program program = kernel_cache.program(src, 0, {s1.str(), s2.str(), cuda_include_path});
ops.insert(std::make_pair(name, SpoofOperator({std::move(program), type, op})));
return true;
}