blob: 53251383f858e0711578f38f5f611b666d542bc0 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#pragma once
#ifndef CUM_SUM_H
#define CUM_SUM_H
using uint = unsigned int;
#include <cuda_runtime.h>
/**
* Do a cumulative summation over all columns of a matrix
* @param g_idata input data stored in device memory (of size n)
* @param g_odata output/temporary array stored in device memory (of size n)
* @param g_tdata temporary accumulated block offsets
* @param rows number of rows in input matrix
* @param cols number of columns in input matrix
* @param block_height number of rows processed per block
*/
/**
* Cumulative sum instantiation for double
*/
extern "C" __global__ void cumulative_sum_up_sweep_d(double *g_idata, double* g_tdata, uint rows,
uint cols, uint block_height)
{
SumOp<double> op;
cumulative_scan_up_sweep<SumOp<double>, double>(g_idata, g_tdata, rows, cols, block_height, op);
}
/**
* Cumulative sum instantiation for double
*/
extern "C" __global__ void cumulative_sum_up_sweep_f(float *g_idata, float* g_tdata, uint rows,
uint cols, uint block_height)
{
SumOp<float> op;
cumulative_scan_up_sweep<SumOp<float>, float>(g_idata, g_tdata, rows, cols, block_height, op);
}
/**
* Cumulative sum instantiation for double
*/
extern "C" __global__ void cumulative_sum_down_sweep_d(double *g_idata, double *g_odata, double* g_tdata, uint rows,
uint cols, uint block_height)
{
SumOp<double> op;
cumulative_scan_down_sweep<SumOp<double>, SumNeutralElement<double>, double>(g_idata, g_odata, g_tdata, rows, cols, block_height, op);
}
/**
* Cumulative sum instantiation for float
*/
extern "C" __global__ void cumulative_sum_down_sweep_f(float *g_idata, float *g_odata, float* g_tdata, uint rows,
uint cols, uint block_height)
{
SumOp<float> op;
cumulative_scan_down_sweep<SumOp<float>, SumNeutralElement<float>, float>(g_idata, g_odata, g_tdata, rows, cols, block_height, op);
}
#endif // CUM_SUM_H