src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java - systemds - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.sysml.runtime.instructions.cp;

 import java.util.ArrayList;

 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.parser.Expression.ValueType;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
 import org.apache.sysml.runtime.functionobjects.SwapIndex;
 import org.apache.sysml.runtime.instructions.InstructionUtils;
 import org.apache.sysml.runtime.matrix.data.LibMatrixDNN;
 import org.apache.sysml.runtime.matrix.data.LibMatrixDNN.ConvolutionParameters;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.matrix.operators.ReorgOperator;
 import org.apache.sysml.runtime.util.ConvolutionUtils;
 import org.apache.sysml.utils.Statistics;

 public class ConvolutionCPInstruction extends UnaryCPInstruction {

 	private CPOperand _in2; // used for pooling backward
 	private ArrayList<CPOperand> _input_shape;
 	private ArrayList<CPOperand> _filter_shape;
 	private ArrayList<CPOperand> _stride = new ArrayList<CPOperand>();
 	private ArrayList<CPOperand> _padding = new ArrayList<CPOperand>();
 	private boolean _reuseNonZeroedOutput = false;
 	private int _numThreads = -1;
 	public ConvolutionCPInstruction(CPOperand in, CPOperand out, String opcode,
 			String istr, ArrayList<CPOperand> stride,
 			ArrayList<CPOperand> padding, ArrayList<CPOperand> input_shape,
 			ArrayList<CPOperand> filter_shape, int numThreads) {
 		super(new ReorgOperator(SwapIndex.getSwapIndexFnObject()), in, out,
 				opcode, istr);
 		_cptype = CPINSTRUCTION_TYPE.Convolution;
 		_stride = stride;
 		_padding = padding;
 		_input_shape = input_shape;
 		_filter_shape = filter_shape;
 		_numThreads = numThreads;
 	}

 	public ConvolutionCPInstruction(CPOperand in, CPOperand in2, CPOperand out, String opcode,
 			String istr, ArrayList<CPOperand> stride,
 			ArrayList<CPOperand> padding, ArrayList<CPOperand> input_shape,
 			ArrayList<CPOperand> filter_shape, int numThreads) {
 		super(new ReorgOperator(SwapIndex.getSwapIndexFnObject()), in, out,
 				opcode, istr);
 		_in2 = in2;
 		_cptype = CPINSTRUCTION_TYPE.Convolution;
 		_stride = stride;
 		_padding = padding;
 		_input_shape = input_shape;
 		_filter_shape = filter_shape;
 		_numThreads = numThreads;
 	}

 	public static ConvolutionCPInstruction parseInstruction(String str)
 			throws DMLRuntimeException {
 		CPOperand in = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
 		CPOperand out = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);

 		String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
 		String opcode = parts[0];
 		if (opcode.equalsIgnoreCase("maxpooling")) {
 			InstructionUtils.checkNumFields(parts, 15);
 			// stride1, stride2, padding1, padding2
 			// input_shape1, input_shape2, input_shape3, input_shape4,
 			// filter_shape1, filter_shape2, filter_shape3, filter_shape4, k
 			in.split(parts[1]);
 			out.split(parts[14]);

 			ArrayList<CPOperand> stride = new ArrayList<CPOperand>();
 			ArrayList<CPOperand> padding = new ArrayList<CPOperand>();
 			ArrayList<CPOperand> input_shape = new ArrayList<CPOperand>();
 			ArrayList<CPOperand> filter_shape = new ArrayList<CPOperand>();
 			stride.add(new CPOperand(parts[2]));
 			stride.add(new CPOperand(parts[3]));
 			padding.add(new CPOperand(parts[4]));
 			padding.add(new CPOperand(parts[5]));
 			input_shape.add(new CPOperand(parts[6]));
 			input_shape.add(new CPOperand(parts[7]));
 			input_shape.add(new CPOperand(parts[8]));
 			input_shape.add(new CPOperand(parts[9]));
 			filter_shape.add(new CPOperand(parts[10]));
 			filter_shape.add(new CPOperand(parts[11]));
 			filter_shape.add(new CPOperand(parts[12]));
 			filter_shape.add(new CPOperand(parts[13]));
 			int k = Integer.parseInt(parts[15]);

 			return new ConvolutionCPInstruction(in, out, opcode, str, stride,
 					padding, input_shape, filter_shape, k);
 		}
 		else if (opcode.equalsIgnoreCase("maxpooling_backward")
 				|| opcode.equalsIgnoreCase("conv2d")
 				|| opcode.equalsIgnoreCase("conv2d_backward_filter")
 				|| opcode.equalsIgnoreCase("conv2d_backward_data")) {
 			InstructionUtils.checkNumFields(parts, 16);
 			// dout, stride1, stride2, padding1, padding2
 			// input_shape1, input_shape2, input_shape3, input_shape4,
 			// filter_shape1, filter_shape2, filter_shape3, filter_shape4, k
 			in.split(parts[1]);
 			CPOperand in2 = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
 			in2.split(parts[2]);
 			out.split(parts[15]);

 			ArrayList<CPOperand> stride = new ArrayList<CPOperand>();
 			ArrayList<CPOperand> padding = new ArrayList<CPOperand>();
 			ArrayList<CPOperand> input_shape = new ArrayList<CPOperand>();
 			ArrayList<CPOperand> filter_shape = new ArrayList<CPOperand>();
 			stride.add(new CPOperand(parts[3]));
 			stride.add(new CPOperand(parts[4]));
 			padding.add(new CPOperand(parts[5]));
 			padding.add(new CPOperand(parts[6]));
 			input_shape.add(new CPOperand(parts[7]));
 			input_shape.add(new CPOperand(parts[8]));
 			input_shape.add(new CPOperand(parts[9]));
 			input_shape.add(new CPOperand(parts[10]));
 			filter_shape.add(new CPOperand(parts[11]));
 			filter_shape.add(new CPOperand(parts[12]));
 			filter_shape.add(new CPOperand(parts[13]));
 			filter_shape.add(new CPOperand(parts[14]));
 			int k = Integer.parseInt(parts[16]);

 			return new ConvolutionCPInstruction(in, in2, out, opcode, str, stride,
 					padding, input_shape, filter_shape, k);
 		}
 		else {
 			throw new DMLRuntimeException("Unknown opcode while parsing a ConvolutionCPInstruction: " + str);
 		}
 	}

 	private int getScalarInput(ExecutionContext ec, ArrayList<CPOperand> aL,
 			int index) throws DMLRuntimeException {
 		return (int) ec.getScalarInput(aL.get(index).getName(),
 				aL.get(index).getValueType(), aL.get(index).isLiteral())
 				.getLongValue();
 	}

 	@Override
 	public void processInstruction(ExecutionContext ec)
 			throws DMLRuntimeException {
 		// acquire inputs
 		MatrixBlock outputBlock = null;
 		MatrixBlock matBlock = ec.getMatrixInput(input1.getName());
 		int pad_h = getScalarInput(ec, _padding, 0);
 		int pad_w = getScalarInput(ec, _padding, 1);
 		int stride_h = getScalarInput(ec, _stride, 0);
 		int stride_w = getScalarInput(ec, _stride, 1);

 		int N = getScalarInput(ec, _input_shape, 0);
 		int C = getScalarInput(ec, _input_shape, 1);
 		int H = getScalarInput(ec, _input_shape, 2);
 		int W = getScalarInput(ec, _input_shape, 3);

 		int K = getScalarInput(ec, _filter_shape, 0);

 		int R = getScalarInput(ec, _filter_shape, 2);
 		int S = getScalarInput(ec, _filter_shape, 3);
 		int P = (int) ConvolutionUtils.getP(H, R, stride_h, pad_h);
 		int Q = (int) ConvolutionUtils.getQ(W, S, stride_w, pad_w);

 		ConvolutionParameters params = new ConvolutionParameters(N, C, H, W, K, R, S, stride_h, stride_w, pad_h, pad_w, _numThreads);
 		if (instOpcode.equalsIgnoreCase("maxpooling")) {
 			if(matBlock.isEmptyBlock()) {
 				outputBlock = new MatrixBlock(N, C*P*Q, true, 0);
 			}
 			else {
 				// Is eligible for REUSE_NONZEROED_OUTPUT but cannot guarantee that previous output has been rmvar-ed
 				// without somewhat expensive HashMap checks
 				outputBlock = getDenseOutputBlock(ec, N, C*P*Q, true);
 				params.setReuseNonZeroedOutput(_reuseNonZeroedOutput);
 			}
 			LibMatrixDNN.maxpooling(matBlock, outputBlock, params);
 		}
 		else if (instOpcode.equalsIgnoreCase("maxpooling_backward")) {
 			MatrixBlock dout = ec.getMatrixInput(_in2.getName());
 			if(matBlock.isEmptyBlock() || dout.isEmptyBlock()) {
 				outputBlock = new MatrixBlock(N, C*H*W, true, 0);
 			}
 			else {
 				// Is eligible for REUSE_NONZEROED_OUTPUT but cannot guarantee that previous output has been rmvar-ed
 				// without somewhat expensive HashMap checks
 				outputBlock = getDenseOutputBlock(ec, N, C*H*W, false);
 				params.setReuseNonZeroedOutput(_reuseNonZeroedOutput);
 			}
 			LibMatrixDNN.maxpooling_backward(matBlock, dout, outputBlock, params);
 			ec.releaseMatrixInput(_in2.getName());
 		}
 		else if (instOpcode.equalsIgnoreCase("conv2d")) {
 			MatrixBlock filter = ec.getMatrixInput(_in2.getName());
 			if(filter.isEmptyBlock() || matBlock.isEmptyBlock()) {
 				outputBlock = new MatrixBlock(N, K*P*Q, true, 0);
 			}
 			else {
 				outputBlock = getDenseOutputBlock(ec, N, K*P*Q, false);
 				params.setReuseNonZeroedOutput(_reuseNonZeroedOutput);
 				LibMatrixDNN.conv2d(matBlock, filter, outputBlock, params);
 			}
 			ec.releaseMatrixInput(_in2.getName());
 		}
 		else if (instOpcode.equalsIgnoreCase("conv2d_backward_filter")) {
 			MatrixBlock dout = ec.getMatrixInput(_in2.getName());
 			if(dout.isEmptyBlock() || matBlock.isEmptyBlock()) {
 				outputBlock = new MatrixBlock(K, C*R*S, true, 0);
 			}
 			else {
 				outputBlock = getDenseOutputBlock(ec, K, C*R*S, false);
 				params.setReuseNonZeroedOutput(_reuseNonZeroedOutput);
 				LibMatrixDNN.conv2d_backward_filter(matBlock, dout, outputBlock, params);
 			}
 			ec.releaseMatrixInput(_in2.getName());
 		}
 		else if (instOpcode.equalsIgnoreCase("conv2d_backward_data")) {
 			MatrixBlock dout = ec.getMatrixInput(_in2.getName());
 			if(dout.isEmptyBlock() || matBlock.isEmptyBlock()) {
 				outputBlock = new MatrixBlock(N, C * H * W, true, 0);
 			}
 			else {
 				outputBlock = getDenseOutputBlock(ec, N, C * H * W, false);
 				params.setReuseNonZeroedOutput(_reuseNonZeroedOutput);
 				LibMatrixDNN.conv2d_backward_data(matBlock, dout, outputBlock, params);
 			}
 			ec.releaseMatrixInput(_in2.getName());
 		}
 		else {
 			throw new DMLRuntimeException("Unsupported op code " + instOpcode);
 		}

 		// release inputs/outputs
 		ec.releaseMatrixInput(input1.getName());
 		ec.setMatrixOutput(getOutputVariableName(), outputBlock);
 	}

 	@SuppressWarnings("unused")
 	private MatrixBlock getDenseOutputBlock(ExecutionContext ec, int numRows, int numCols, boolean reuseNonZeroedOutput1) throws DMLRuntimeException {
 		long start = -1;
 		if(DMLScript.STATISTICS)
 			start = System.nanoTime();

 		MatrixBlock outputBlock = new MatrixBlock(numRows, numCols, false, numRows * numCols);
 		_reuseNonZeroedOutput = false;
 		if(reuseNonZeroedOutput1 && DMLScript.REUSE_NONZEROED_OUTPUT) {
 			_reuseNonZeroedOutput = true;
 			outputBlock.allocateDenseBlock(true, !_reuseNonZeroedOutput);
 		}
 		else  {
 			outputBlock.allocateDenseBlock();
 		}
 		outputBlock.setNonZeros(-1);

 		if(DMLScript.STATISTICS)
 			Statistics.incrementAllocationTime(System.nanoTime()-start, false);
 		return outputBlock;
 	}
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package org.apache.sysml.runtime.instructions.cp;

	import java.util.ArrayList;

	import org.apache.sysml.api.DMLScript;
	import org.apache.sysml.parser.Expression.DataType;
	import org.apache.sysml.parser.Expression.ValueType;
	import org.apache.sysml.runtime.DMLRuntimeException;
	import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
	import org.apache.sysml.runtime.functionobjects.SwapIndex;
	import org.apache.sysml.runtime.instructions.InstructionUtils;
	import org.apache.sysml.runtime.matrix.data.LibMatrixDNN;
	import org.apache.sysml.runtime.matrix.data.LibMatrixDNN.ConvolutionParameters;
	import org.apache.sysml.runtime.matrix.data.MatrixBlock;
	import org.apache.sysml.runtime.matrix.operators.ReorgOperator;
	import org.apache.sysml.runtime.util.ConvolutionUtils;
	import org.apache.sysml.utils.Statistics;

	public class ConvolutionCPInstruction extends UnaryCPInstruction {

	private CPOperand _in2; // used for pooling backward
	private ArrayList<CPOperand> _input_shape;
	private ArrayList<CPOperand> _filter_shape;
	private ArrayList<CPOperand> _stride = new ArrayList<CPOperand>();
	private ArrayList<CPOperand> _padding = new ArrayList<CPOperand>();
	private boolean _reuseNonZeroedOutput = false;
	private int _numThreads = -1;
	public ConvolutionCPInstruction(CPOperand in, CPOperand out, String opcode,
	String istr, ArrayList<CPOperand> stride,
	ArrayList<CPOperand> padding, ArrayList<CPOperand> input_shape,
	ArrayList<CPOperand> filter_shape, int numThreads) {
	super(new ReorgOperator(SwapIndex.getSwapIndexFnObject()), in, out,
	opcode, istr);
	_cptype = CPINSTRUCTION_TYPE.Convolution;
	_stride = stride;
	_padding = padding;
	_input_shape = input_shape;
	_filter_shape = filter_shape;
	_numThreads = numThreads;
	}

	public ConvolutionCPInstruction(CPOperand in, CPOperand in2, CPOperand out, String opcode,
	String istr, ArrayList<CPOperand> stride,
	ArrayList<CPOperand> padding, ArrayList<CPOperand> input_shape,
	ArrayList<CPOperand> filter_shape, int numThreads) {
	super(new ReorgOperator(SwapIndex.getSwapIndexFnObject()), in, out,
	opcode, istr);
	_in2 = in2;
	_cptype = CPINSTRUCTION_TYPE.Convolution;
	_stride = stride;
	_padding = padding;
	_input_shape = input_shape;
	_filter_shape = filter_shape;
	_numThreads = numThreads;
	}

	public static ConvolutionCPInstruction parseInstruction(String str)
	throws DMLRuntimeException {
	CPOperand in = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
	CPOperand out = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);

	String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
	String opcode = parts[0];
	if (opcode.equalsIgnoreCase("maxpooling")) {
	InstructionUtils.checkNumFields(parts, 15);
	// stride1, stride2, padding1, padding2
	// input_shape1, input_shape2, input_shape3, input_shape4,
	// filter_shape1, filter_shape2, filter_shape3, filter_shape4, k
	in.split(parts[1]);
	out.split(parts[14]);

	ArrayList<CPOperand> stride = new ArrayList<CPOperand>();
	ArrayList<CPOperand> padding = new ArrayList<CPOperand>();
	ArrayList<CPOperand> input_shape = new ArrayList<CPOperand>();
	ArrayList<CPOperand> filter_shape = new ArrayList<CPOperand>();
	stride.add(new CPOperand(parts[2]));
	stride.add(new CPOperand(parts[3]));
	padding.add(new CPOperand(parts[4]));
	padding.add(new CPOperand(parts[5]));
	input_shape.add(new CPOperand(parts[6]));
	input_shape.add(new CPOperand(parts[7]));
	input_shape.add(new CPOperand(parts[8]));
	input_shape.add(new CPOperand(parts[9]));
	filter_shape.add(new CPOperand(parts[10]));
	filter_shape.add(new CPOperand(parts[11]));
	filter_shape.add(new CPOperand(parts[12]));
	filter_shape.add(new CPOperand(parts[13]));
	int k = Integer.parseInt(parts[15]);

	return new ConvolutionCPInstruction(in, out, opcode, str, stride,
	padding, input_shape, filter_shape, k);
	}
	else if (opcode.equalsIgnoreCase("maxpooling_backward")
	\|\| opcode.equalsIgnoreCase("conv2d")
	\|\| opcode.equalsIgnoreCase("conv2d_backward_filter")
	\|\| opcode.equalsIgnoreCase("conv2d_backward_data")) {
	InstructionUtils.checkNumFields(parts, 16);
	// dout, stride1, stride2, padding1, padding2
	// input_shape1, input_shape2, input_shape3, input_shape4,
	// filter_shape1, filter_shape2, filter_shape3, filter_shape4, k
	in.split(parts[1]);
	CPOperand in2 = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
	in2.split(parts[2]);
	out.split(parts[15]);

	ArrayList<CPOperand> stride = new ArrayList<CPOperand>();
	ArrayList<CPOperand> padding = new ArrayList<CPOperand>();
	ArrayList<CPOperand> input_shape = new ArrayList<CPOperand>();
	ArrayList<CPOperand> filter_shape = new ArrayList<CPOperand>();
	stride.add(new CPOperand(parts[3]));
	stride.add(new CPOperand(parts[4]));
	padding.add(new CPOperand(parts[5]));
	padding.add(new CPOperand(parts[6]));
	input_shape.add(new CPOperand(parts[7]));
	input_shape.add(new CPOperand(parts[8]));
	input_shape.add(new CPOperand(parts[9]));
	input_shape.add(new CPOperand(parts[10]));
	filter_shape.add(new CPOperand(parts[11]));
	filter_shape.add(new CPOperand(parts[12]));
	filter_shape.add(new CPOperand(parts[13]));
	filter_shape.add(new CPOperand(parts[14]));
	int k = Integer.parseInt(parts[16]);

	return new ConvolutionCPInstruction(in, in2, out, opcode, str, stride,
	padding, input_shape, filter_shape, k);
	}
	else {
	throw new DMLRuntimeException("Unknown opcode while parsing a ConvolutionCPInstruction: " + str);
	}
	}

	private int getScalarInput(ExecutionContext ec, ArrayList<CPOperand> aL,
	int index) throws DMLRuntimeException {
	return (int) ec.getScalarInput(aL.get(index).getName(),
	aL.get(index).getValueType(), aL.get(index).isLiteral())
	.getLongValue();
	}

	@Override
	public void processInstruction(ExecutionContext ec)
	throws DMLRuntimeException {
	// acquire inputs
	MatrixBlock outputBlock = null;
	MatrixBlock matBlock = ec.getMatrixInput(input1.getName());
	int pad_h = getScalarInput(ec, _padding, 0);
	int pad_w = getScalarInput(ec, _padding, 1);
	int stride_h = getScalarInput(ec, _stride, 0);
	int stride_w = getScalarInput(ec, _stride, 1);

	int N = getScalarInput(ec, _input_shape, 0);
	int C = getScalarInput(ec, _input_shape, 1);
	int H = getScalarInput(ec, _input_shape, 2);
	int W = getScalarInput(ec, _input_shape, 3);

	int K = getScalarInput(ec, _filter_shape, 0);

	int R = getScalarInput(ec, _filter_shape, 2);
	int S = getScalarInput(ec, _filter_shape, 3);
	int P = (int) ConvolutionUtils.getP(H, R, stride_h, pad_h);
	int Q = (int) ConvolutionUtils.getQ(W, S, stride_w, pad_w);

	ConvolutionParameters params = new ConvolutionParameters(N, C, H, W, K, R, S, stride_h, stride_w, pad_h, pad_w, _numThreads);
	if (instOpcode.equalsIgnoreCase("maxpooling")) {
	if(matBlock.isEmptyBlock()) {
	outputBlock = new MatrixBlock(N, CPQ, true, 0);
	}
	else {
	// Is eligible for REUSE_NONZEROED_OUTPUT but cannot guarantee that previous output has been rmvar-ed
	// without somewhat expensive HashMap checks
	outputBlock = getDenseOutputBlock(ec, N, CPQ, true);
	params.setReuseNonZeroedOutput(_reuseNonZeroedOutput);
	}
	LibMatrixDNN.maxpooling(matBlock, outputBlock, params);
	}
	else if (instOpcode.equalsIgnoreCase("maxpooling_backward")) {
	MatrixBlock dout = ec.getMatrixInput(_in2.getName());
	if(matBlock.isEmptyBlock() \|\| dout.isEmptyBlock()) {
	outputBlock = new MatrixBlock(N, CHW, true, 0);
	}
	else {
	// Is eligible for REUSE_NONZEROED_OUTPUT but cannot guarantee that previous output has been rmvar-ed
	// without somewhat expensive HashMap checks
	outputBlock = getDenseOutputBlock(ec, N, CHW, false);
	params.setReuseNonZeroedOutput(_reuseNonZeroedOutput);
	}
	LibMatrixDNN.maxpooling_backward(matBlock, dout, outputBlock, params);
	ec.releaseMatrixInput(_in2.getName());
	}
	else if (instOpcode.equalsIgnoreCase("conv2d")) {
	MatrixBlock filter = ec.getMatrixInput(_in2.getName());
	if(filter.isEmptyBlock() \|\| matBlock.isEmptyBlock()) {
	outputBlock = new MatrixBlock(N, KPQ, true, 0);
	}
	else {
	outputBlock = getDenseOutputBlock(ec, N, KPQ, false);
	params.setReuseNonZeroedOutput(_reuseNonZeroedOutput);
	LibMatrixDNN.conv2d(matBlock, filter, outputBlock, params);
	}
	ec.releaseMatrixInput(_in2.getName());
	}
	else if (instOpcode.equalsIgnoreCase("conv2d_backward_filter")) {
	MatrixBlock dout = ec.getMatrixInput(_in2.getName());
	if(dout.isEmptyBlock() \|\| matBlock.isEmptyBlock()) {
	outputBlock = new MatrixBlock(K, CRS, true, 0);
	}
	else {
	outputBlock = getDenseOutputBlock(ec, K, CRS, false);
	params.setReuseNonZeroedOutput(_reuseNonZeroedOutput);
	LibMatrixDNN.conv2d_backward_filter(matBlock, dout, outputBlock, params);
	}
	ec.releaseMatrixInput(_in2.getName());
	}
	else if (instOpcode.equalsIgnoreCase("conv2d_backward_data")) {
	MatrixBlock dout = ec.getMatrixInput(_in2.getName());
	if(dout.isEmptyBlock() \|\| matBlock.isEmptyBlock()) {
	outputBlock = new MatrixBlock(N, C * H * W, true, 0);
	}
	else {
	outputBlock = getDenseOutputBlock(ec, N, C * H * W, false);
	params.setReuseNonZeroedOutput(_reuseNonZeroedOutput);
	LibMatrixDNN.conv2d_backward_data(matBlock, dout, outputBlock, params);
	}
	ec.releaseMatrixInput(_in2.getName());
	}
	else {
	throw new DMLRuntimeException("Unsupported op code " + instOpcode);
	}

	// release inputs/outputs
	ec.releaseMatrixInput(input1.getName());
	ec.setMatrixOutput(getOutputVariableName(), outputBlock);
	}

	@SuppressWarnings("unused")
	private MatrixBlock getDenseOutputBlock(ExecutionContext ec, int numRows, int numCols, boolean reuseNonZeroedOutput1) throws DMLRuntimeException {
	long start = -1;
	if(DMLScript.STATISTICS)
	start = System.nanoTime();

	MatrixBlock outputBlock = new MatrixBlock(numRows, numCols, false, numRows * numCols);
	_reuseNonZeroedOutput = false;
	if(reuseNonZeroedOutput1 && DMLScript.REUSE_NONZEROED_OUTPUT) {
	_reuseNonZeroedOutput = true;
	outputBlock.allocateDenseBlock(true, !_reuseNonZeroedOutput);
	}
	else {
	outputBlock.allocateDenseBlock();
	}
	outputBlock.setNonZeros(-1);

	if(DMLScript.STATISTICS)
	Statistics.incrementAllocationTime(System.nanoTime()-start, false);
	return outputBlock;
	}
	}