blob: 575a109cdb61153cb67fc52996bcd9deb89fc470 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysds.runtime.matrix.data;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.concurrent.Callable;
import org.apache.sysds.hops.OptimizerUtils;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.data.SparseBlock;
import org.apache.sysds.runtime.matrix.data.LibMatrixDNNRotate180.Rotate180Worker;
import org.apache.sysds.utils.NativeHelper;
import org.apache.sysds.utils.Statistics;
/**
* This class contains the set of operators used for performing conv2d
*/
public class LibMatrixDNNConv2d
{
/**
* Factory method that returns list of callable tasks for performing conv2d
*
* @param params convolution parameters
* @return list of callable tasks for performing conv2d
*/
public static ArrayList<Callable<Long>> getConv2dWorkers(DnnParameters params) {
ArrayList<Callable<Long>> ret = new ArrayList<>();
// Try to create twice as many tasks as threads for improved load balance
// (due to constant-sized intermediates, GC works well, so the overhead per task is small)
int k = OptimizerUtils.getConstrainedNumThreads(params.numThreads);
int taskSize = (int)(Math.ceil((double)params.N / k / 2));
MatrixBlock in1 = params.input1;
boolean isEmptyDenseInput = !in1.isInSparseFormat() && in1.denseBlock == null;
boolean isTransPref = in1.sparse && !params.input2.sparse && !params.output.sparse &&
MatrixBlock.evalSparseFormatInMemory(in1.clen, in1.rlen, in1.nonZeros);
boolean applyNative = isEligibleForConv2dSparse(params)
&& !(!isEmptyDenseInput && isTransPref);
if( applyNative )
Statistics.numNativeSparseConv2dCalls.increment();
//transpose filter once for efficient sparse-dense multiplies in LoopedIm2ColConv2dTransAllChan
//in order to share the temporary object and its creation costs across threads
if( !applyNative && !isEmptyDenseInput && isTransPref ) {
params.input2 = LibMatrixReorg.transpose(params.input2,
new MatrixBlock(params.input2.clen, params.input2.rlen, false), k);
}
for(int i = 0; i*taskSize < params.N; i++) {
//note: we prefer the java backend for sparse inputs because the native
//implementation simply converts the sparse input into dense rows
if( applyNative )
ret.add(new SparseNativeConv2d(i*taskSize, Math.min((i+1)*taskSize, params.N), params));
else if(!isEmptyDenseInput && isTransPref)
ret.add(new LoopedIm2ColConv2dTransAllChan(i*taskSize, Math.min((i+1)*taskSize, params.N), params));
else if(!isEmptyDenseInput)
ret.add(new LoopedIm2ColConv2dAllChan(i*taskSize, Math.min((i+1)*taskSize, params.N), params));
else
throw new DMLRuntimeException("Unsupported operator");
}
return ret;
}
/**
* Factory method that returns list of callable tasks for performing conv2d backward filter
*
* @param params convolution parameters
* @return list of callable tasks for performing conv2d backward filter
*/
public static ArrayList<Callable<Long>> getConv2dBackwardFilterWorkers(DnnParameters params) {
ArrayList<Callable<Long>> ret = new ArrayList<>();
// Try to create as many tasks as threads.
// Creating more tasks will help in tail, but would have additional overhead of maintaining the intermediate
// data structures such as im2col blocks.
int k = OptimizerUtils.getConstrainedNumThreads(params.numThreads);
int taskSize = (int)(Math.ceil((double)params.N / k));
boolean isEmptyDenseInput = (!params.input1.isInSparseFormat() && params.input1.denseBlock == null) ||
(!params.input2.isInSparseFormat() && params.input2.denseBlock == null);
boolean applyNative = isEligibleForConv2dBackwardFilterSparseDense(params)
&& !params.input2.isInSparseFormat();
if( applyNative )
Statistics.numNativeSparseConv2dBwdFilterCalls.increment();
for(int i = 0; i*taskSize < params.N; i++) {
//note: we prefer the java backend for sparse filters because the native
//implementation simply rotates the sparse filters into dense rows
if( applyNative )
ret.add(new SparseNativeConv2dBackwardFilterDense(i*taskSize, Math.min((i+1)*taskSize, params.N), params));
else if( params.input2.sparse && params.input1.getSparsity() > params.input2.getSparsity() )
ret.add(new Conv2dBackwardFilterTrans(i*taskSize, Math.min((i+1)*taskSize, params.N), params));
else if(!isEmptyDenseInput)
ret.add(new Conv2dBackwardFilter(i*taskSize, Math.min((i+1)*taskSize, params.N), params));
else
throw new DMLRuntimeException("Unsupported operator");
}
return ret;
}
/**
* Factory method that returns list of callable tasks for performing conv2d backward data
*
* @param params convolution parameters
* @return list of callable tasks for performing conv2d backward data
*/
public static ArrayList<Callable<Long>> getConv2dBackwardDataWorkers(DnnParameters params) {
ArrayList<Callable<Long>> ret = new ArrayList<>();
// Try to create as many tasks as threads.
// Creating more tasks will help in tail, but would have additional overhead of maintaining the intermediate
// data structures such as im2col blocks.
int k = OptimizerUtils.getConstrainedNumThreads(params.numThreads);
int taskSize = (int)(Math.ceil((double)params.N / k));
boolean isEmptyDenseInput = (!params.input1.isInSparseFormat() && params.input1.denseBlock == null) ||
(!params.input2.isInSparseFormat() && params.input2.denseBlock == null);
boolean applyNative = isEligibleForConv2dBackwardDataDense(params)
&& !params.input2.isInSparseFormat();
if( applyNative )
Statistics.numNativeSparseConv2dBwdDataCalls.increment();
for(int i = 0; i*taskSize < params.N; i++) {
//note: we prefer the java backend for sparse filters because the native
//implementation simply converts the sparse filters into dense rows
if( applyNative )
ret.add(new SparseNativeConv2dBackwardDataDense(i*taskSize, Math.min((i+1)*taskSize, params.N), params));
else if(!isEmptyDenseInput)
ret.add(new Conv2dBackwardData(i*taskSize, Math.min((i+1)*taskSize, params.N), params));
else
throw new DMLRuntimeException("Unsupported operator");
}
return ret;
}
/**
* Performs convolution via: partialCopy1(filter %*% im2col(input)) = output
*/
private static class LoopedIm2ColConv2dAllChan implements Callable<Long>
{
protected final int _rl, _ru;
protected final DnnParameters _params;
public LoopedIm2ColConv2dAllChan(int rl, int ru, DnnParameters params) {
_rl = rl; _ru = ru;
_params = params;
}
@Override
public Long call() throws Exception {
final int PQ = _params.P*_params.Q, K = _params.K, CRS = _params.C*_params.R*_params.S;
MatrixBlock outIm2col = new MatrixBlock(CRS, PQ, _params.input1.sparse).allocateBlock();
LibMatrixDNNIm2Col.preallocateSparseOutput(_params.input1, outIm2col);
MatrixBlock outMM = new MatrixBlock(K, PQ, _params.output.sparse);
for(int n = _rl; n < _ru; n++) {
LibMatrixDNNIm2Col.im2col(_params.input1, outIm2col, n, _params, false);
// filter %*% _im2ColOutBlock => matMultOutBlock
outMM.reset(outMM.rlen, outMM.clen, _params.output.sparse);
LibMatrixDNNHelper.singleThreadedMatMult(_params.input2, outIm2col, outMM, false, true, _params);
// Copy the outMM of shape [K x PQ] to a row in params.output
partialCopy1(outMM, _params.output, n, K, PQ);
// Add bias to current row if necessary, always dense
if(_params.bias != null)
addBias(n, _params.output.getDenseBlockValues(),
_params.bias.getDenseBlockValues(), K, PQ);
}
//multi-threaded nnz maintenance of current working set
return _params.output.recomputeNonZeros(_rl, _ru-1);
}
// Copy the matrix src of shape [K X PQ] to the r-th row in params.output
private static void partialCopy1(MatrixBlock src, MatrixBlock dest, int r, int K, int PQ) {
// Copying is required as LibMatrixMult.matrixMult (and/or Java) is not pointer aware.
if( src.isEmptyBlock() )
return;
if( src.sparse ) { //* <- SPARSE
SparseBlock srcBlock = src.sparseBlock;
SparseBlock sdestBlock = dest.sparseBlock;
double[] ddestBlock = dest.getDenseBlockValues();
for(int k = 0; k < src.getNumRows(); k++) {
if( srcBlock.isEmpty(k) ) continue;
int apos = srcBlock.pos(k);
int alen = srcBlock.size(k);
int[] aix = srcBlock.indexes(k);
double[] avals = srcBlock.values(k);
if( dest.sparse ) {
sdestBlock.setIndexRange(r,
0, K*PQ, avals, aix, apos, alen);
}
else {
int desPosK = r + k*PQ;
for(int j = apos; j < apos+alen; j++)
ddestBlock[desPosK+aix[j]] = avals[j];
}
}
}
else { //* <- DENSE
if( dest.sparse ) {
dest.getSparseBlock().setIndexRange(r, 0, K*PQ,
src.getDenseBlockValues(), 0, K*PQ);
}
else {
System.arraycopy(src.getDenseBlockValues(), 0,
dest.getDenseBlockValues(), r*K*PQ, K*PQ);
}
}
}
}
/**
* This implementation is similar to LoopedIm2ColConv2dAllChan, except for using a
* sparse-dense matrix multiplication with t(t(Xi) %*% t(F)) instead of a
* dense-sparse matrix multiplication with Xi %*% F.
*
* NOTE: this implementation assumes that the filter is passed in transposed form
* in order to share this temporary matrix (and its creation cost) across threads.
*/
private static class LoopedIm2ColConv2dTransAllChan extends LoopedIm2ColConv2dAllChan
{
public LoopedIm2ColConv2dTransAllChan(int rl, int ru, DnnParameters params) {
super(rl, ru, params);
}
@Override
public Long call() throws Exception {
final int PQ = _params.P*_params.Q, K = _params.K, CRS = _params.C*_params.R*_params.S;
MatrixBlock outIm2col = new MatrixBlock(PQ, CRS, _params.input1.sparse).allocateBlock();
LibMatrixDNNIm2Col.preallocateSparseOutput(_params.input1, outIm2col);
MatrixBlock outMM = new MatrixBlock(PQ, K, false);
for(int n = _rl; n < _ru; n++) {
LibMatrixDNNIm2Col.im2col(_params.input1, outIm2col, n, _params, true);
// t(_im2ColOutBlock) %*% t(filter) => t(matMultOutBlock)
outMM.reset(outMM.rlen, outMM.clen, false);
LibMatrixDNNHelper.singleThreadedMatMult(outIm2col, _params.input2, outMM, false, false, _params);
// Copy the matrix matMultOutBlock of shape [K X PQ] to params.output.denseBlock + destPos
partialCopyTrans(outMM, _params.output, n*K*PQ, K, PQ);
// Add bias to current row if necessary, always dense
if(_params.bias != null)
addBias(n, _params.output.getDenseBlockValues(),
_params.bias.getDenseBlockValues(), K, PQ);
}
//multi-threaded nnz maintenance of current working set
return _params.output.recomputeNonZeros(_rl, _ru-1);
}
private static void partialCopyTrans(MatrixBlock src, MatrixBlock dest, int destPos, int K, int PQ) {
if( src.isEmptyBlock() )
return;
//copy src into its destination row w/ piggybacked transpose
//src is [PQ x K] -> [K x PQ] -> [1 x KPQ]
if(src.isInSparseFormat()) {
SparseBlock sblock = src.sparseBlock;
double[] c = dest.getDenseBlockValues();
for(int i = 0; i < src.getNumRows(); i++) {
if( sblock.isEmpty(i) ) continue;
int apos = sblock.pos(i);
int alen = sblock.size(i);
int[] aix = sblock.indexes(i);
double[] avals = sblock.values(i);
int desPosK = destPos + i;
for(int j = apos; j < apos+alen; j++)
c[desPosK+aix[j]*PQ] = avals[j];
}
}
else {
double[] a = src.getDenseBlockValues();
double[] c = dest.getDenseBlockValues();
final int blocksizeIJ = 128; //128KB for L2
//cache-conscious blocked execution
for( int bi = 0; bi < PQ; bi+=blocksizeIJ )
for( int bj = 0; bj < K; bj+=blocksizeIJ ) {
int bimin = Math.min(bi+blocksizeIJ, PQ);
int bjmin = Math.min(bj+blocksizeIJ, K);
//core transpose operation
for(int i=bi, aix=bi*K+bj, cix=bj*PQ+bi; i<bimin; i++, aix+=K, cix++)
LibMatrixReorg.transposeRow(a, c, aix, destPos+cix, PQ, bjmin-bj);
}
}
}
}
/**
* This operator is used only if native is enabled, filter is dense and input is sparse
*/
private static class SparseNativeConv2d implements Callable<Long>
{
public final int _rl, _ru;
private final DnnParameters _params;
public SparseNativeConv2d(int rl, int ru, DnnParameters params) {
_rl = rl; _ru = ru;
_params = params;
}
@Override
public Long call() throws Exception {
int KPQ = _params.K*_params.P*_params.Q;
double[] temp = new double[KPQ];
for(int n = _rl; n < _ru; n++) {
if( !_params.input1.getSparseBlock().isEmpty(n) ) {
int apos = _params.input1.getSparseBlock().pos(n);
int alen = _params.input1.getSparseBlock().size(n);
int[] aix = _params.input1.getSparseBlock().indexes(n);
double[] avals = _params.input1.getSparseBlock().values(n);
NativeHelper.conv2dSparse(apos, alen, aix, avals, _params.input2.getDenseBlockValues(), temp,
1, _params.C, _params.H, _params.W, _params.K, _params.R, _params.S,
_params.stride_h, _params.stride_w, _params.pad_h, _params.pad_w, _params.P, _params.Q, 1);
System.arraycopy(temp, 0, _params.output.getDenseBlockValues(), n*KPQ, KPQ);
}
}
//multi-threaded nnz maintenance of current working set
return _params.output.recomputeNonZeros(_rl, _ru-1);
}
}
// BACKWARD DATA
/**
* This operator is used only if native is enabled and filter is sparse.
* dout is converted into dense if sparse.
*/
private static class SparseNativeConv2dBackwardDataDense implements Callable<Long>
{
public final int _rl, _ru;
private final DnnParameters _params;
public SparseNativeConv2dBackwardDataDense(int rl, int ru, DnnParameters params) {
_rl = rl; _ru = ru;
_params = params;
}
@Override
public Long call() throws Exception {
int CHW = _params.C*_params.H*_params.W;
double [] ret = new double[CHW];
double [] filterArr = _params.input1.getDenseBlockValues();
double [] dout_n = new double[_params.P*_params.Q*_params.K];
for(int n = _rl; n < _ru; n++) {
getRowInDenseFormat(_params.input2, n, dout_n);
if(n > _rl)
Arrays.fill(ret, 0);
NativeHelper.conv2dBackwardDataDense(filterArr, dout_n, ret, 1,
_params.C, _params.H, _params.W, _params.K,
_params.R, _params.S, _params.stride_h, _params.stride_w, _params.pad_h, _params.pad_w, _params.P, _params.Q, 1);
System.arraycopy(ret, 0, _params.output.getDenseBlockValues(), n*CHW, CHW);
}
//multi-threaded nnz maintenance of current working set
return _params.output.recomputeNonZeros(_rl, _ru-1);
}
}
/**
* General conv2d backward data operator
*/
private static class Conv2dBackwardData implements Callable<Long> {
public final int _rl, _ru;
private final DnnParameters _params;
public Conv2dBackwardData(int rl, int ru, DnnParameters params) {
_rl = rl; _ru = ru;
_params = params;
}
@Override
public Long call() throws Exception {
int PQ = _params.P*_params.Q; int K = _params.K; int CRS = _params.C*_params.R*_params.S;
MatrixBlock filter = _params.input1;
MatrixBlock dout = _params.input2;
MatrixBlock outRotate = new MatrixBlock(PQ, K, dout.sparse);
MatrixBlock outMM = new MatrixBlock(PQ, CRS, false);
outRotate.allocateBlock();
LibMatrixDNNRotate180.Rotate180Worker rotate180Worker =
LibMatrixDNNRotate180.Rotate180Worker.getWorker( dout, outRotate, _params, true, false);
for(int n = _rl; n < _ru; n++) {
// rotate180(dout[n,]) => dout_reshaped
rotate180Worker.execute(n, 0);
// dout_reshaped %*% filter => temp
outMM.reset(PQ, CRS, false);
LibMatrixDNNHelper.singleThreadedMatMult(outRotate, filter, outMM, !outRotate.sparse, false, _params);
// col2im(temp) => output[n,]
LibMatrixDNNIm2Col.col2imOverSingleImage(n, outMM, _params);
}
//multi-threaded nnz maintenance of current working set
return _params.output.recomputeNonZeros(_rl, _ru-1);
}
}
//BACKWARD FILTER
/**
* This operator is used only if native is enabled and input is sparse.
* dout is converted into dense if sparse.
*/
private static class SparseNativeConv2dBackwardFilterDense implements Callable<Long>
{
public final int _rl, _ru;
private final DnnParameters _params;
public SparseNativeConv2dBackwardFilterDense(int rl, int ru, DnnParameters params) {
_rl = rl; _ru = ru;
_params = params;
}
@Override
public Long call() throws Exception {
int CRS = _params.C*_params.R*_params.S, PQ = _params.P*_params.Q, K = _params.K;
MatrixBlock dout_n = new MatrixBlock(PQ, K, false);
dout_n.allocateBlock();
LibMatrixDNNRotate180.Rotate180Worker rotate180Worker =
LibMatrixDNNRotate180.Rotate180Worker.getWorker( _params.input2, dout_n, _params, true, false);
double [] ldout_n = dout_n.getDenseBlockValues();
double [] partRet = new double[CRS*_params.K]; //CRS x K
for(int n = _rl; n < _ru; n++) {
if( !_params.input1.getSparseBlock().isEmpty(n) ) {
// rotate180(dout[n,]) => dout_n
rotate180Worker.execute(n, 0);
int apos = _params.input1.getSparseBlock().pos(n);
int alen = _params.input1.getSparseBlock().size(n);
int[] aix = _params.input1.getSparseBlock().indexes(n);
double[] avals = _params.input1.getSparseBlock().values(n);
NativeHelper.conv2dBackwardFilterSparseDense(apos, alen, aix, avals,
ldout_n, partRet, 1, _params.C, _params.H, _params.W, _params.K,
_params.R, _params.S, _params.stride_h, _params.stride_w, _params.pad_h, _params.pad_w, _params.P, _params.Q, 1);
}
}
inplaceTransAdd(partRet, _params);
return 0L;
}
}
/**
* General conv2d backward data operator
*/
private static class Conv2dBackwardFilter implements Callable<Long> {
private final int _rl, _ru;
private final DnnParameters _params;
public Conv2dBackwardFilter(int rl, int ru, DnnParameters params) {
_rl = rl; _ru = ru;
_params = params;
}
@Override
public Long call() throws Exception {
int PQ = _params.P*_params.Q, K = _params.K, CRS = _params.C*_params.R*_params.S;
MatrixBlock dout = _params.input2;
MatrixBlock im2ColOutBlock = new MatrixBlock(CRS, PQ, _params.input1.sparse).allocateBlock();
LibMatrixDNNIm2Col.preallocateSparseOutput(_params.input1, im2ColOutBlock);
MatrixBlock outRotate = new MatrixBlock(PQ, K, dout.sparse);
MatrixBlock outMM = new MatrixBlock(CRS, K, false);
outRotate.allocateBlock();
Rotate180Worker rotate180Worker = Rotate180Worker.getWorker( dout, outRotate, _params, true, false);
double [] partRet = new double[CRS*_params.K];
for(int n = _rl; n < _ru; n++) {
// rotate180(dout[n,]) => dout_reshaped
rotate180Worker.execute(n, 0);
// im2col(input) => _im2ColOutBlock
LibMatrixDNNIm2Col.im2col(_params.input1, im2ColOutBlock, n, _params, false);
outMM.reset(CRS, K, false);
LibMatrixDNNHelper.singleThreadedMatMult(im2ColOutBlock, outRotate, outMM, !im2ColOutBlock.sparse, !outRotate.sparse, _params);
if( !outMM.isEmptyBlock() ) //accumulate row results
LibMatrixMult.vectAdd(outMM.getDenseBlockValues(), partRet, 0, 0, K*CRS);
}
inplaceTransAdd(partRet, _params);
return 0L;
}
}
private static class Conv2dBackwardFilterTrans implements Callable<Long> {
private final int _rl, _ru;
private final DnnParameters _params;
public Conv2dBackwardFilterTrans(int rl, int ru, DnnParameters params) {
_rl = rl; _ru = ru;
_params = params;
}
@Override
public Long call() throws Exception {
int PQ = _params.P*_params.Q, K = _params.K, CRS = _params.C*_params.R*_params.S;
MatrixBlock dout = _params.input2;
MatrixBlock im2ColOutBlock = new MatrixBlock(PQ, CRS, _params.input1.sparse).allocateBlock();
LibMatrixDNNIm2Col.preallocateSparseOutput(_params.input1, im2ColOutBlock);
MatrixBlock outRotate = new MatrixBlock(K, PQ, dout.sparse).allocateBlock();
MatrixBlock outMM = new MatrixBlock(K, CRS, false).allocateBlock();
Rotate180Worker rotate180Worker = Rotate180Worker.getWorker( dout, outRotate, _params, true, true);
double [] partRet = new double[CRS*_params.K];
for(int n = _rl; n < _ru; n++) {
// rotate180(dout[n,]) => dout_reshaped
rotate180Worker.execute(n, 0);
// im2col(input) => _im2ColOutBlock
LibMatrixDNNIm2Col.im2col(_params.input1, im2ColOutBlock, n, _params, true);
outMM.reset(K, CRS, false);
//Timing time = new Timing(true);
LibMatrixDNNHelper.singleThreadedMatMult(outRotate, im2ColOutBlock,
outMM, !outRotate.sparse, !im2ColOutBlock.sparse, _params);
if( !outMM.isEmptyBlock() ) //accumulate row results
LibMatrixMult.vectAdd(outMM.getDenseBlockValues(), partRet, 0, 0, K*CRS);
}
//no need to transpose because t(t(out)) cancel out
inplaceAdd(partRet, _params);
return 0L;
}
}
private static void inplaceAdd(double[] a, DnnParameters params) {
synchronized (params.output.denseBlock) {
LibMatrixMult.vectAdd(a, params.output.getDenseBlockValues(), 0, 0, a.length);
}
}
private static void inplaceTransAdd(double[] a, DnnParameters params) {
synchronized (params.output.denseBlock) {
// Perform transposed addition: output of size [K, CRS] += input of size [CRS,K]
double [] c = params.output.getDenseBlockValues();
final int CRS = params.C*params.R*params.S, K = params.K;
final int blocksizeIJ = 128; //L2 cache
//cache-conscious blocked execution
for( int bi=0; bi<CRS; bi+=blocksizeIJ )
for( int bj=0; bj<K; bj+=blocksizeIJ ) {
int bimin = Math.min(bi+blocksizeIJ, CRS);
int bjmin = Math.min(bj+blocksizeIJ, K);
//core transpose add operation
for(int i=bi, aix=bi*K; i<bimin; i++, aix+=K)
for(int j=bj, cix=i+bj*CRS; j<bjmin; j++, cix+=CRS)
c[cix] += a[aix+j];
}
}
}
private static void getRowInDenseFormat(MatrixBlock input, int n, double [] ret) {
if(input.getNumColumns() != ret.length) {
throw new DMLRuntimeException("Invalid parameters");
}
// Use temporary array to avoid binary search
if(input.isInSparseFormat()) {
Arrays.fill(ret, 0);
if( !input.sparseBlock.isEmpty(n) ) {
int apos = input.sparseBlock.pos(n);
int alen = input.sparseBlock.size(n);
int[] aix = input.sparseBlock.indexes(n);
double[] avals = input.sparseBlock.values(n);
for(int j=apos; j<apos+alen; j++)
ret[ aix[j] ] = avals[j];
}
}
else {
System.arraycopy(input.getDenseBlockValues(),
n*input.getNumColumns(), ret, 0, input.getNumColumns());
}
}
private static void addBias(int r, double [] out, double [] bias, int K, int PQ) {
for(int k=0, cix=r*K*PQ; k<K; k++, cix+=PQ)
LibMatrixMult.vectAddInPlace(bias[k], out, cix, PQ);
}
// ----------------------------------------------------------------------------------------------
// TODO: Support sparse native convolution operations without dense intermediates + dense matmult
// Currently, it will fall back to more optimized sparse Java-based operators.
private static boolean isEligibleForConv2dBackwardFilterSparseDense(DnnParameters params) {
// NativeHelper.conv2dBackwardFilterSparseDense only if input is sparse.
// dout converted to dense if sparse.
// return params.enableNative && params.input1.isInSparseFormat();
return false;
}
private static boolean isEligibleForConv2dSparse(DnnParameters params) {
// NativeHelper.conv2dSparse only if filter is dense and input is sparse
// return params.enableNative && params.input1.isInSparseFormat() && !params.input2.isInSparseFormat();
return false;
}
private static boolean isEligibleForConv2dBackwardDataDense(DnnParameters params) {
// NativeHelper.conv2dBackwardDataDense only if filter is dense.
// dout converted to dense if sparse.
// return params.enableNative && !params.input1.isInSparseFormat();
return false;
}
// ----------------------------------------------------------------------------------------------
}