blob: fc1fd2d6d3eccfbb793d5a3be30dccf2b275a10e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.hops;
import java.util.HashMap;
import java.util.Map.Entry;
import org.apache.sysml.api.DMLScript;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.hops.rewrite.HopRewriteUtils;
import org.apache.sysml.hops.Hop.MultiThreadedHop;
import org.apache.sysml.lops.Lop;
import org.apache.sysml.lops.DataGen;
import org.apache.sysml.lops.LopsException;
import org.apache.sysml.lops.LopProperties.ExecType;
import org.apache.sysml.parser.DataIdentifier;
import org.apache.sysml.parser.DataExpression;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.parser.Statement;
import org.apache.sysml.runtime.controlprogram.parfor.ProgramConverter;
/**
*
*
*/
public class DataGenOp extends Hop implements MultiThreadedHop
{
public static final long UNSPECIFIED_SEED = -1;
// defines the specific data generation method
private DataGenMethod _op;
private int _maxNumThreads = -1; //-1 for unlimited
/**
* List of "named" input parameters. They are maintained as a hashmap:
* parameter names (String) are mapped as indices (Integer) into getInput()
* arraylist.
*
* i.e., getInput().get(_paramIndexMap.get(parameterName)) refers to the Hop
* that is associated with parameterName.
*/
private HashMap<String, Integer> _paramIndexMap = new HashMap<String, Integer>();
/** target identifier which will hold the random object */
private DataIdentifier _id;
//Rand-specific attributes
/** sparsity of the random object, this is used for mem estimate */
private double _sparsity = -1;
/** base directory for temp file (e.g., input seeds)*/
private String _baseDir;
//seq-specific attributes (used for recompile/recompile)
private double _incr = Double.MAX_VALUE;
private DataGenOp() {
//default constructor for clone
}
/**
* <p>Creates a new Rand HOP.</p>
*
* @param id the target identifier
* @param inputParameters HashMap of the input parameters for Rand Hop
*/
public DataGenOp(DataGenMethod mthd, DataIdentifier id, HashMap<String, Hop> inputParameters)
{
super(id.getName(), DataType.MATRIX, ValueType.DOUBLE);
_id = id;
_op = mthd;
int index = 0;
for( Entry<String, Hop> e: inputParameters.entrySet() ) {
String s = e.getKey();
Hop input = e.getValue();
getInput().add(input);
input.getParent().add(this);
_paramIndexMap.put(s, index);
index++;
}
if ( mthd == DataGenMethod.RAND )
_sparsity = Double.valueOf(((LiteralOp)inputParameters.get(DataExpression.RAND_SPARSITY)).getName());
//generate base dir
String scratch = ConfigurationManager.getScratchSpace();
_baseDir = scratch + Lop.FILE_SEPARATOR + Lop.PROCESS_PREFIX + DMLScript.getUUID() + Lop.FILE_SEPARATOR +
Lop.FILE_SEPARATOR + ProgramConverter.CP_ROOT_THREAD_ID + Lop.FILE_SEPARATOR;
//compute unknown dims and nnz
refreshSizeInformation();
}
@Override
public String getOpString() {
return "dg(" + _op.toString().toLowerCase() +")";
}
public DataGenMethod getOp() {
return _op;
}
@Override
public void setMaxNumThreads( int k ) {
_maxNumThreads = k;
}
@Override
public int getMaxNumThreads() {
return _maxNumThreads;
}
@Override
public Lop constructLops()
throws HopsException, LopsException
{
//return already created lops
if( getLops() != null )
return getLops();
ExecType et = optFindExecType();
HashMap<String, Lop> inputLops = new HashMap<String, Lop>();
for (Entry<String, Integer> cur : _paramIndexMap.entrySet()) {
if( cur.getKey().equals(DataExpression.RAND_ROWS) && _dim1>0 )
inputLops.put(cur.getKey(), new LiteralOp(_dim1).constructLops());
else if( cur.getKey().equals(DataExpression.RAND_COLS) && _dim2>0 )
inputLops.put(cur.getKey(), new LiteralOp(_dim2).constructLops());
else
inputLops.put(cur.getKey(), getInput().get(cur.getValue()).constructLops());
}
DataGen rnd = new DataGen(_op, _id, inputLops,_baseDir,
getDataType(), getValueType(), et);
int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
rnd.setNumThreads(k);
rnd.getOutputParameters().setDimensions(
getDim1(), getDim2(),
//robust handling for blocksize (important for -exec singlenode; otherwise incorrect results)
(getRowsInBlock()>0)?getRowsInBlock():ConfigurationManager.getBlocksize(),
(getColsInBlock()>0)?getColsInBlock():ConfigurationManager.getBlocksize(),
//actual rand nnz might differ (in cp/mr they are corrected after execution)
(_op==DataGenMethod.RAND && et==ExecType.SPARK && getNnz()!=0) ? -1 : getNnz(),
getUpdateType());
setLineNumbers(rnd);
setLops(rnd);
//add reblock/checkpoint lops if necessary
constructAndSetLopsDataFlowProperties();
return getLops();
}
@Override
public void printMe() throws HopsException
{
if (LOG.isDebugEnabled()){
if(getVisited() != VisitStatus.DONE)
{
super.printMe();
}
setVisited(VisitStatus.DONE);
}
}
@Override
public boolean allowsAllExecTypes()
{
return true;
}
@Override
protected double computeOutputMemEstimate( long dim1, long dim2, long nnz )
{
double ret = 0;
if ( _op == DataGenMethod.RAND ) {
if( hasConstantValue(0.0) ) { //if empty block
ret = OptimizerUtils.estimateSizeEmptyBlock(dim1, dim2);
}
else {
//sparsity-aware estimation (dependent on sparse generation approach); for pure dense generation
//we would need to disable sparsity-awareness and estimate via sparsity=1.0
ret = OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, _sparsity);
}
}
else {
ret = OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, 1.0);
}
return ret;
}
@Override
protected double computeIntermediateMemEstimate( long dim1, long dim2, long nnz )
{
if ( _op == DataGenMethod.RAND && dimsKnown() ) {
long numBlocks = (long) (Math.ceil((double)dim1/ConfigurationManager.getBlocksize())
* Math.ceil((double)dim2/ConfigurationManager.getBlocksize()));
return 32 + numBlocks*8.0; // 32 bytes of overhead for an array of long & numBlocks long values.
}
else
return 0;
}
@Override
protected long[] inferOutputCharacteristics( MemoTable memo )
{
//infer rows and
if( (_op == DataGenMethod.RAND || _op == DataGenMethod.SINIT ) &&
OptimizerUtils.ALLOW_WORSTCASE_SIZE_EXPRESSION_EVALUATION )
{
long dim1 = computeDimParameterInformation(getInput().get(_paramIndexMap.get(DataExpression.RAND_ROWS)), memo);
long dim2 = computeDimParameterInformation(getInput().get(_paramIndexMap.get(DataExpression.RAND_COLS)), memo);
long nnz = (long)(_sparsity * dim1 * dim2);
if( dim1>0 && dim2>0 )
return new long[]{ dim1, dim2, nnz };
}
else if ( _op == DataGenMethod.SEQ )
{
Hop from = getInput().get(_paramIndexMap.get(Statement.SEQ_FROM));
Hop to = getInput().get(_paramIndexMap.get(Statement.SEQ_TO));
Hop incr = getInput().get(_paramIndexMap.get(Statement.SEQ_INCR));
//in order to ensure correctness we also need to know from and incr
//here, we check for the common case of seq(1,x), i.e. from=1, incr=1
if( from instanceof LiteralOp && HopRewriteUtils.getDoubleValueSafe((LiteralOp)from)==1
&& incr instanceof LiteralOp && HopRewriteUtils.getDoubleValueSafe((LiteralOp)incr)==1 )
{
long toVal = computeDimParameterInformation(to, memo);
if( toVal > 0 )
return new long[]{ toVal, 1, -1 };
}
//here, we check for the common case of seq(x,1,-1), i.e. from=x, to=1 incr=-1
if( to instanceof LiteralOp && HopRewriteUtils.getDoubleValueSafe((LiteralOp)to)==1
&& incr instanceof LiteralOp && HopRewriteUtils.getDoubleValueSafe((LiteralOp)incr)==-1 )
{
long fromVal = computeDimParameterInformation(from, memo);
if( fromVal > 0 )
return new long[]{ fromVal, 1, -1 };
}
}
return null;
}
@Override
protected ExecType optFindExecType() throws HopsException {
checkAndSetForcedPlatform();
ExecType REMOTE = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR;
if( _etypeForced != null )
_etype = _etypeForced;
else
{
if ( OptimizerUtils.isMemoryBasedOptLevel() ) {
_etype = findExecTypeByMemEstimate();
}
else if (this.areDimsBelowThreshold() || this.isVector())
_etype = ExecType.CP;
else
_etype = REMOTE;
//check for valid CP dimensions and matrix size
checkAndSetInvalidCPDimsAndSize();
}
//mark for recompile (forever)
if( ConfigurationManager.isDynamicRecompilation() && !dimsKnown(true) && _etype==REMOTE )
setRequiresRecompile();
//always force string initialization into CP (not supported in MR)
//similarly, sample is currently not supported in MR either
if( _op == DataGenMethod.SINIT )
_etype = ExecType.CP;
//workaround until sample supported in MR
if( _op == DataGenMethod.SAMPLE && _etype == ExecType.MR )
_etype = ExecType.CP;
return _etype;
}
@Override
public void refreshSizeInformation()
{
Hop input1 = null;
Hop input2 = null;
Hop input3 = null;
if ( _op == DataGenMethod.RAND || _op == DataGenMethod.SINIT )
{
input1 = getInput().get(_paramIndexMap.get(DataExpression.RAND_ROWS)); //rows
input2 = getInput().get(_paramIndexMap.get(DataExpression.RAND_COLS)); //cols
//refresh rows information
refreshRowsParameterInformation(input1);
//refresh cols information
refreshColsParameterInformation(input2);
}
else if (_op == DataGenMethod.SEQ )
{
input1 = getInput().get(_paramIndexMap.get(Statement.SEQ_FROM));
input2 = getInput().get(_paramIndexMap.get(Statement.SEQ_TO));
input3 = getInput().get(_paramIndexMap.get(Statement.SEQ_INCR));
double from = computeBoundsInformation(input1);
boolean fromKnown = (from != Double.MAX_VALUE);
double to = computeBoundsInformation(input2);
boolean toKnown = (to != Double.MAX_VALUE);
double incr = computeBoundsInformation(input3);
boolean incrKnown = (incr != Double.MAX_VALUE);
if( fromKnown && toKnown && incr == 1) {
incr = ( from >= to ) ? -1 : 1;
}
if ( fromKnown && toKnown && incrKnown ) {
setDim1(1 + (long)Math.floor(((double)(to-from))/incr));
setDim2(1);
_incr = incr;
}
}
//refresh nnz information (for seq, sparsity is always -1)
if( _op == DataGenMethod.RAND && hasConstantValue(0.0) )
_nnz = 0;
else if ( dimsKnown() && _sparsity>=0 ) //general case
_nnz = (long) (_sparsity * _dim1 * _dim2);
}
public HashMap<String, Integer> getParamIndexMap()
{
return _paramIndexMap;
}
public int getParamIndex(String key)
{
return _paramIndexMap.get(key);
}
/**
*
* @return
*/
public boolean hasConstantValue()
{
//robustness for other operations, not specifying min/max/sparsity
if( _op != DataGenMethod.RAND )
return false;
Hop min = getInput().get(_paramIndexMap.get(DataExpression.RAND_MIN)); //min
Hop max = getInput().get(_paramIndexMap.get(DataExpression.RAND_MAX)); //max
Hop sparsity = getInput().get(_paramIndexMap.get(DataExpression.RAND_SPARSITY)); //sparsity
//literal value comparison
if( min instanceof LiteralOp && max instanceof LiteralOp && sparsity instanceof LiteralOp) {
try {
double minVal = HopRewriteUtils.getDoubleValue((LiteralOp)min);
double maxVal = HopRewriteUtils.getDoubleValue((LiteralOp)max);
double sp = HopRewriteUtils.getDoubleValue((LiteralOp)sparsity);
return (sp==1.0 && minVal == maxVal);
}
catch(Exception ex) {
return false;
}
}
//reference comparison (based on common subexpression elimination)
else if ( min == max && sparsity instanceof LiteralOp ) {
return (HopRewriteUtils.getDoubleValueSafe((LiteralOp)sparsity)==1);
}
return false;
}
/**
*
* @param val
* @return
*/
public boolean hasConstantValue(double val)
{
//string initialization does not exhibit constant values
if( _op != DataGenMethod.RAND )
return false;
boolean ret = false;
Hop min = getInput().get(_paramIndexMap.get(DataExpression.RAND_MIN)); //min
Hop max = getInput().get(_paramIndexMap.get(DataExpression.RAND_MAX)); //max
//literal value comparison
if( min instanceof LiteralOp && max instanceof LiteralOp){
double minVal = HopRewriteUtils.getDoubleValueSafe((LiteralOp)min);
double maxVal = HopRewriteUtils.getDoubleValueSafe((LiteralOp)max);
ret = (minVal == val && maxVal == val);
}
//sparsity awareness if requires
if( ret && val != 0 ) {
Hop sparsity = getInput().get(_paramIndexMap.get(DataExpression.RAND_SPARSITY)); //sparsity
ret &= (sparsity == null || sparsity instanceof LiteralOp
&& HopRewriteUtils.getDoubleValueSafe((LiteralOp)sparsity) == 1);
}
return ret;
}
public void setIncrementValue(double incr)
{
_incr = incr;
}
public double getIncrementValue()
{
return _incr;
}
public static long generateRandomSeed()
{
return System.nanoTime();
}
@Override
@SuppressWarnings("unchecked")
public Object clone() throws CloneNotSupportedException
{
DataGenOp ret = new DataGenOp();
//copy generic attributes
ret.clone(this, false);
//copy specific attributes
ret._op = _op;
ret._id = _id;
ret._sparsity = _sparsity;
ret._baseDir = _baseDir;
ret._paramIndexMap = (HashMap<String, Integer>) _paramIndexMap.clone();
ret._maxNumThreads = _maxNumThreads;
//note: no deep cp of params since read-only
return ret;
}
@Override
public boolean compare( Hop that )
{
if( !(that instanceof DataGenOp) )
return false;
DataGenOp that2 = (DataGenOp)that;
boolean ret = ( _op == that2._op
&& _sparsity == that2._sparsity
&& _baseDir.equals(that2._baseDir)
&& _paramIndexMap!=null && that2._paramIndexMap!=null
&& _maxNumThreads == that2._maxNumThreads );
if( ret )
{
for( Entry<String,Integer> e : _paramIndexMap.entrySet() )
{
String key1 = e.getKey();
int pos1 = e.getValue();
int pos2 = that2._paramIndexMap.get(key1);
ret &= ( that2.getInput().get(pos2)!=null
&& getInput().get(pos1) == that2.getInput().get(pos2) );
}
//special case for rand seed (no CSE if unspecified seed because runtime generated)
//note: if min and max is constant, we can safely merge those hops
if( _op == DataGenMethod.RAND || _op == DataGenMethod.SINIT ){
Hop seed = getInput().get(_paramIndexMap.get(DataExpression.RAND_SEED));
Hop min = getInput().get(_paramIndexMap.get(DataExpression.RAND_MIN));
Hop max = getInput().get(_paramIndexMap.get(DataExpression.RAND_MAX));
if( seed.getName().equals(String.valueOf(DataGenOp.UNSPECIFIED_SEED)) && min != max )
ret = false;
}
}
return ret;
}
}