blob: dae2f82b9143252319e231045008ede56644a9b1 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.hops;
import java.util.ArrayList;
import org.apache.sysml.api.DMLScript;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.hops.Hop.MultiThreadedHop;
import org.apache.sysml.lops.ConvolutionTransform;
import org.apache.sysml.lops.Lop;
import org.apache.sysml.lops.LopsException;
import org.apache.sysml.lops.LopProperties.ExecType;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.matrix.data.LibMatrixDNN.ConvolutionParameters;
public class ConvolutionOp extends Hop implements MultiThreadedHop
{
private Hop.ConvOp op;
private int _maxNumThreads = -1; //-1 for unlimited
private ConvolutionOp() {
//default constructor for clone
}
public ConvolutionOp(String l, DataType dt, ValueType vt, ConvOp o, Hop inp)
{
super(l, dt, vt);
op = o;
getInput().add(0, inp);
inp.getParent().add(this);
//compute unknown dims and nnz
refreshSizeInformation();
}
public ConvolutionOp(String l, DataType dt, ValueType vt, ConvOp o, ArrayList<Hop> inp)
{
super(l, dt, vt);
op = o;
for( int i=0; i<inp.size(); i++ ) {
Hop in = inp.get(i);
getInput().add(i, in);
in.getParent().add(this);
}
//compute unknown dims and nnz
refreshSizeInformation();
}
public ConvOp getOp()
{
return op;
}
@Override
public String getOpString() {
return "" + HopsConv2Lops.get(op);
}
@Override
public Lop constructLops()
throws HopsException, LopsException
{
//return already created lops
if( getLops() != null )
return getLops();
ExecType et = optFindExecType();
ArrayList<Hop> inputs = getInput();
switch( op )
{
case MAX_POOLING:
case MAX_POOLING_BACKWARD:
case DIRECT_CONV2D:
case DIRECT_CONV2D_BACKWARD_DATA:
case DIRECT_CONV2D_BACKWARD_FILTER:
{
//TODO: Fix me. Currently forcing the instruction to GPU if gpu flag is set
if(DMLScript.USE_ACCELERATOR) {
et = ExecType.GPU;
setLops(constructConvolutionLops(et, inputs));
break;
}
else if(et == ExecType.CP) {
setLops(constructConvolutionLops(et, inputs));
break;
}
else {
// TODO: Add support for SPARK/MR backends once we are happy with the performance of
// single node Lenet script.
throw new HopsException("Unimplemented ConvolutionOp for execution type: " + et.name());
}
// break;
}
default:
throw new HopsException("Unsupported lops construction for operation type '"+op+"'.");
}
//add reblock/checkpoint lops if necessary
constructAndSetLopsDataFlowProperties();
return getLops();
}
public void setOp(ConvOp op) {
this.op = op;
}
public static Lop constructFusedConvolutionLops(ExecType et,
ArrayList<Hop> inputs,
ConvOp op, ConvolutionOp primaryOp,
long rlen, long clen) throws HopsException, LopsException {
int expectedNumInputs = 13;
if(op == ConvOp.MAX_POOLING_BACKWARD
|| op == ConvOp.DIRECT_CONV2D
|| op == ConvOp.DIRECT_CONV2D_BACKWARD_FILTER
|| op == ConvOp.DIRECT_CONV2D_BACKWARD_DATA) {
expectedNumInputs = 14;
}
if(inputs.size() != expectedNumInputs) {
throw new HopsException("Incorrect number of inputs for " + op.name());
}
Lop in = inputs.get(0).constructLops();
int numThreads = et == ExecType.CP ? OptimizerUtils.getConstrainedNumThreads(primaryOp.getMaxNumThreads()) : 1;
ConvolutionTransform transform1 = new ConvolutionTransform( in,
HopsConv2Lops.get(op), primaryOp.getDataType(), primaryOp.getValueType(), et, numThreads);
// setOutputDimensions(transform1);
transform1.getOutputParameters().setDimensions(
rlen, clen, primaryOp.getRowsInBlock(), primaryOp.getColsInBlock(), -1, primaryOp.getUpdateType());
// setLineNumbers(transform1);
transform1.setAllPositions(primaryOp.getBeginLine(), primaryOp.getBeginColumn(), primaryOp.getEndLine(), primaryOp.getEndColumn());
in.addOutput(transform1);
// stride1, stride2, padding1, padding2
// input_shape1, input_shape2, input_shape3, input_shape4,
// filter_shape1, filter_shape2, filter_shape3, filter_shape4
for( int i=1; i < inputs.size(); i++ )
{
Lop ltmp = inputs.get(i).constructLops();
transform1.addInput(ltmp);
//if(i == 1 && expectedNumInputs == 14)
ltmp.addOutput(transform1);
}
transform1.setLevel(); //force order of added lops
return transform1;
}
public Lop constructConvolutionLops(ExecType et, ArrayList<Hop> inputs) throws HopsException, LopsException {
int expectedNumInputs = 13;
if(op == ConvOp.MAX_POOLING_BACKWARD
|| op == ConvOp.DIRECT_CONV2D
|| op == ConvOp.DIRECT_CONV2D_BACKWARD_FILTER
|| op == ConvOp.DIRECT_CONV2D_BACKWARD_DATA) {
expectedNumInputs = 14;
}
if(inputs.size() != expectedNumInputs) {
throw new HopsException("Incorrect number of inputs for " + op.name());
}
Lop in = inputs.get(0).constructLops();
int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
ConvolutionTransform transform1 = new ConvolutionTransform( in,
HopsConv2Lops.get(op), getDataType(), getValueType(), et, k);
setOutputDimensions(transform1);
setLineNumbers(transform1);
in.addOutput(transform1);
// stride1, stride2, padding1, padding2
// input_shape1, input_shape2, input_shape3, input_shape4,
// filter_shape1, filter_shape2, filter_shape3, filter_shape4
for( int i=1; i < inputs.size(); i++ )
{
Lop ltmp = inputs.get(i).constructLops();
transform1.addInput(ltmp);
//if(i == 1 && expectedNumInputs == 14)
ltmp.addOutput(transform1);
}
transform1.setLevel(); //force order of added lops
return transform1;
}
@Override
protected double computeOutputMemEstimate( long dim1, long dim2, long nnz )
{
double sparsity = 1.0;
return OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, sparsity);
}
@Override
protected double computeIntermediateMemEstimate( long dim1, long dim2, long nnz )
{
//default: no intermediate memory requirements
return 0;
}
@Override
protected long[] inferOutputCharacteristics( MemoTable memo )
{
// [numRows, numCols, NNZ]
long[] ret = null;
ConvolutionParameters params;
try {
params = parseInput();
} catch (DMLRuntimeException e) {
throw new RuntimeException(e);
}
switch(op)
{
case MAX_POOLING:
{
ret = new long[3];
ret[0] = getInput().get(0)._dim1;
ret[1] = getExtractedVal(params.C, params.P, params.Q);
ret[2] = -1;
break;
}
case MAX_POOLING_BACKWARD:
{
ret = new long[3];
ret[0] = getInput().get(0)._dim1;
ret[1] = getInput().get(0)._dim2;
ret[2] = -1;
break;
}
case DIRECT_CONV2D:
{
ret = new long[3];
ret[0] = getInput().get(0)._dim1;
ret[1] = getExtractedVal(getInput().get(1)._dim1, params.P, params.Q);
ret[2] = -1;
break;
}
case DIRECT_CONV2D_BACKWARD_FILTER:
{
ret = new long[3];
ret[0] = getInput().get(1)._dim1;
ret[1] = getInput().get(1)._dim2;
ret[2] = -1;
break;
}
case DIRECT_CONV2D_BACKWARD_DATA:
{
ret = new long[3];
ret[0] = getInput().get(0)._dim1;
ret[1] = getInput().get(0)._dim2;
ret[2] = -1;
break;
}
default:
throw new RuntimeException("Unsupported op:" + op.name());
}
if(LOG.isDebugEnabled() && (ret[0] <= 0 || ret[1] <= 0)) {
LOG.debug("Unknown dimensions for ConvolutionOp in inferOutputCharacteristics:" + op.name() + " " + ret[0] + " " + ret[1] +
" img_dim=[" + params.N + " " + params.C + " " + params.H + " " + params.W + "]" +
" filter_dim=[" + params.K + " " + params.C + " " + params.H + " " + params.W + "]" +
" output_feature_map=[" + params.P + " " + params.Q + "] stride=[" + params.stride_h + " " + params.stride_w + "]" +
" pad=[" + params.pad_h + " " + params.pad_w + "]");
}
return ret;
}
@Override
public boolean allowsAllExecTypes()
{
return true;
}
@Override
protected ExecType optFindExecType() throws HopsException {
checkAndSetForcedPlatform();
//TODO: Remove this once memEstimate is fixed for these instructions
if((op == ConvOp.MAX_POOLING || op == ConvOp.MAX_POOLING_BACKWARD) && DMLScript.USE_ACCELERATOR) {
return ExecType.GPU;
}
ExecType REMOTE = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR;
if( _etypeForced != null )
{
_etype = _etypeForced;
}
else
{
// TODO: After adding Spark backend, uncomment this
if ( OptimizerUtils.isMemoryBasedOptLevel() ) {
_etype = findExecTypeByMemEstimate();
}
else
{
_etype = REMOTE;
}
//check for valid CP dimensions and matrix size
checkAndSetInvalidCPDimsAndSize();
}
//mark for recompile (forever)
if( ConfigurationManager.isDynamicRecompilation() && !dimsKnown(true) && _etype==REMOTE )
setRequiresRecompile();
_etype = ExecType.CP;
return _etype;
}
// stride1, stride2, padding1, padding2
// input_shape1, input_shape2, input_shape3, input_shape4,
// filter_shape1, filter_shape2, filter_shape3, filter_shape4
ConvolutionParameters parseInput() throws DMLRuntimeException {
ConvolutionParameters params = null;
if(op == ConvOp.MAX_POOLING_BACKWARD
|| op == ConvOp.DIRECT_CONV2D
|| op == ConvOp.DIRECT_CONV2D_BACKWARD_FILTER
|| op == ConvOp.DIRECT_CONV2D_BACKWARD_DATA) {
params = new ConvolutionParameters(
computeSizeInformation(getInput().get(6)),
computeSizeInformation(getInput().get(7)),
computeSizeInformation(getInput().get(8)),
computeSizeInformation(getInput().get(9)),
computeSizeInformation(getInput().get(10)),
computeSizeInformation(getInput().get(12)),
computeSizeInformation(getInput().get(13)),
computeSizeInformation(getInput().get(2)),
computeSizeInformation(getInput().get(3)),
computeSizeInformation(getInput().get(4)),
computeSizeInformation(getInput().get(5)), _maxNumThreads);
}
else {
params = new ConvolutionParameters(
computeSizeInformation(getInput().get(5)),
computeSizeInformation(getInput().get(6)),
computeSizeInformation(getInput().get(7)),
computeSizeInformation(getInput().get(8)),
computeSizeInformation(getInput().get(9)),
computeSizeInformation(getInput().get(11)),
computeSizeInformation(getInput().get(12)),
computeSizeInformation(getInput().get(1)),
computeSizeInformation(getInput().get(2)),
computeSizeInformation(getInput().get(3)),
computeSizeInformation(getInput().get(4)), _maxNumThreads);
}
return params;
}
long getExtractedVal(long val1, long val2) {
if(val1 == -1 || val2 == -1) {
return -1;
}
return val1*val2;
}
public static long getExtractedVal(long val1, long val2, long val3) {
if(val1 == -1 || val2 == -1 || val3 == -1) {
return -1;
}
return val1*val2*val3;
}
@Override
public void refreshSizeInformation()
{
ConvolutionParameters params;
try {
params = parseInput();
} catch (DMLRuntimeException e) {
throw new RuntimeException(e);
}
switch(op)
{
case MAX_POOLING:
{
_dim1 = getInput().get(0)._dim1;
_dim2 = getExtractedVal(params.C, params.P, params.Q);
_nnz = -1; // cannot infer stats
break;
}
case MAX_POOLING_BACKWARD:
{
_dim1 = getInput().get(0)._dim1;
_dim2 = getInput().get(0)._dim2;
_nnz = -1;
break;
}
case DIRECT_CONV2D:
{
_dim1 = getInput().get(0)._dim1;
_dim2 = getExtractedVal(getInput().get(1)._dim1, params.P, params.Q);
_nnz = -1; // cannot infer stats
break;
}
case DIRECT_CONV2D_BACKWARD_DATA:
{
_dim1 = getInput().get(0)._dim1;
_dim2 = getInput().get(0)._dim2;
_nnz = -1; // cannot infer stats
break;
}
case DIRECT_CONV2D_BACKWARD_FILTER:
{
_dim1 = getInput().get(1)._dim1;
_dim2 = getInput().get(1)._dim2;
_nnz = -1; // cannot infer stats
break;
}
default:
throw new RuntimeException("The sizes are not refreshed for " + op.name());
}
if(LOG.isDebugEnabled() && (_dim1 <= 0 || _dim2 <= 0)) {
LOG.debug("Unknown dimensions for ConvolutionOp in refreshSizeInformation:" + op.name() + " " + _dim1 + " " + _dim2 +
" img_dim=[" + params.N + " " + params.C + " " + params.H + " " + params.W + "]" +
" filter_dim=[" + params.K + " " + params.C + " " + params.H + " " + params.W + "]" +
" output_feature_map=[" + params.P + " " + params.Q + "] stride=[" + params.stride_h + " " + params.stride_w + "]" +
" pad=[" + params.pad_h + " " + params.pad_w + "]");
}
}
@Override
public Object clone() throws CloneNotSupportedException
{
ConvolutionOp ret = new ConvolutionOp();
//copy generic attributes
ret.clone(this, false);
//copy specific attributes
ret.op = op;
ret._maxNumThreads = _maxNumThreads;
return ret;
}
@Override
public boolean compare( Hop that )
{
if( !(that instanceof ConvolutionOp) )
return false;
ConvolutionOp that2 = (ConvolutionOp)that;
boolean ret = (op == that2.op)
&& (getInput().size()==that.getInput().size())
&& _maxNumThreads == that2._maxNumThreads;
//compare all childs
if( ret ) //sizes matched
for( int i=0; i<_input.size(); i++ )
ret &= getInput().get(i) == that2.getInput().get(i);
return ret;
}
@Override
public void printMe() throws HopsException
{
if (LOG.isDebugEnabled()){
if (getVisited() != VisitStatus.DONE) {
super.printMe();
LOG.debug(" Operation: " + op);
for (Hop h : getInput()) {
h.printMe();
}
}
setVisited(VisitStatus.DONE);
}
}
@Override
public void setMaxNumThreads( int k ) {
_maxNumThreads = k;
}
@Override
public int getMaxNumThreads() {
return _maxNumThreads;
}
}