blob: 0a3209cfaec94b16f58339f24c390b9fa532abe1 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysds.runtime.instructions.spark;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.controlprogram.caching.MatrixObject.UpdateType;
import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysds.runtime.controlprogram.context.SparkExecutionContext;
import org.apache.sysds.runtime.functionobjects.OffsetColumnIndex;
import org.apache.sysds.runtime.instructions.InstructionUtils;
import org.apache.sysds.runtime.instructions.cp.CPOperand;
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
import org.apache.sysds.runtime.matrix.data.MatrixIndexes;
import org.apache.sysds.runtime.matrix.operators.Operator;
import org.apache.sysds.runtime.matrix.operators.ReorgOperator;
import org.apache.sysds.runtime.meta.DataCharacteristics;
import org.apache.sysds.runtime.util.UtilFunctions;
import scala.Tuple2;
import java.util.ArrayList;
import java.util.Iterator;
public class AppendGSPInstruction extends BinarySPInstruction {
private boolean _cbind = true;
private AppendGSPInstruction(Operator op, CPOperand in1, CPOperand in2, CPOperand offset, CPOperand offset2,
CPOperand out, boolean cbind, String opcode, String istr) {
super(SPType.GAppend, op, in1, in2, out, opcode, istr);
_cbind = cbind;
}
public static AppendGSPInstruction parseInstruction ( String str ) {
String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
InstructionUtils.checkNumFields(parts, 6);
String opcode = parts[0];
CPOperand in1 = new CPOperand(parts[1]);
CPOperand in2 = new CPOperand(parts[2]);
CPOperand in3 = new CPOperand(parts[3]);
CPOperand in4 = new CPOperand(parts[4]);
CPOperand out = new CPOperand(parts[5]);
boolean cbind = Boolean.parseBoolean(parts[6]);
if(!opcode.equalsIgnoreCase("gappend"))
throw new DMLRuntimeException("Unknown opcode while parsing a AppendGSPInstruction: " + str);
return new AppendGSPInstruction(
new ReorgOperator(OffsetColumnIndex.getOffsetColumnIndexFnObject(-1)),
in1, in2, in3, in4, out, cbind, opcode, str);
}
@Override
public void processInstruction(ExecutionContext ec) {
// general case append (map-extend, aggregate)
SparkExecutionContext sec = (SparkExecutionContext)ec;
checkBinaryAppendInputCharacteristics(sec, _cbind, false, false);
DataCharacteristics mc1 = sec.getDataCharacteristics(input1.getName());
DataCharacteristics mc2 = sec.getDataCharacteristics(input2.getName());
JavaPairRDD<MatrixIndexes,MatrixBlock> in1 = sec.getBinaryMatrixBlockRDDHandleForVariable( input1.getName() );
JavaPairRDD<MatrixIndexes,MatrixBlock> in2 = sec.getBinaryMatrixBlockRDDHandleForVariable( input2.getName() );
JavaPairRDD<MatrixIndexes,MatrixBlock> out = null;
// General case: This one needs shifting and merging and hence has huge performance hit.
JavaPairRDD<MatrixIndexes,MatrixBlock> shifted_in2 = in2
.flatMapToPair(new ShiftMatrix(mc1, mc2, _cbind));
out = in1.cogroup(shifted_in2)
.mapToPair(new MergeWithShiftedBlocks(mc1, mc2, _cbind));
//put output RDD handle into symbol table
updateBinaryAppendOutputDataCharacteristics(sec, _cbind);
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
sec.addLineageRDD(output.getName(), input2.getName());
}
public static class MergeWithShiftedBlocks implements PairFunction<Tuple2<MatrixIndexes,Tuple2<Iterable<MatrixBlock>,Iterable<MatrixBlock>>>, MatrixIndexes, MatrixBlock>
{
private static final long serialVersionUID = 848955582909209400L;
private boolean _cbind;
private long _lastIxLeft;
private int _blen;
public MergeWithShiftedBlocks(DataCharacteristics mc1, DataCharacteristics mc2, boolean cbind)
{
_cbind = cbind;
_blen = cbind ? mc1.getBlocksize() : mc1.getBlocksize();
_lastIxLeft = (long) Math.ceil((double)(cbind ? mc1.getCols():mc1.getRows()) / _blen);
}
@Override
public Tuple2<MatrixIndexes, MatrixBlock> call(Tuple2<MatrixIndexes, Tuple2<Iterable<MatrixBlock>, Iterable<MatrixBlock>>> kv)
throws Exception
{
Iterator<MatrixBlock> iterLeft = kv._2._1.iterator();
Iterator<MatrixBlock> iterRight = kv._2._2.iterator();
//handle single left/right block input
if( !iterLeft.hasNext() ) {
MatrixBlock tmp = iterRight.next();
if( iterRight.hasNext() )
tmp.merge(iterRight.next(), false);
return new Tuple2<>(kv._1, tmp);
}
else if ( !iterRight.hasNext() ) {
return new Tuple2<>(kv._1, iterLeft.next());
}
MatrixBlock firstBlk = iterLeft.next();
MatrixBlock secondBlk = iterRight.next();
long ix = _cbind ? kv._1.getColumnIndex() : kv._1.getRowIndex();
// Since merge requires the dimensions matching
if( ix == _lastIxLeft && (_cbind && firstBlk.getNumColumns() < secondBlk.getNumColumns()
|| !_cbind && firstBlk.getNumRows()<secondBlk.getNumRows()) )
{
// This case occurs for last block of LHS matrix
MatrixBlock tmp = new MatrixBlock(secondBlk.getNumRows(), secondBlk.getNumColumns(), true);
firstBlk = tmp.leftIndexingOperations(firstBlk, 0, firstBlk.getNumRows()-1, 0, firstBlk.getNumColumns()-1, new MatrixBlock(), UpdateType.INPLACE_PINNED);
}
//merge with sort since blocks might be in any order
firstBlk.merge(secondBlk, false);
return new Tuple2<>(kv._1, firstBlk);
}
}
public static class ShiftMatrix implements PairFlatMapFunction<Tuple2<MatrixIndexes,MatrixBlock>, MatrixIndexes,MatrixBlock>
{
private static final long serialVersionUID = 3524189212798209172L;
private boolean _cbind;
private long _startIx;
private int _shiftBy;
private int _blen;
private long _outlen;
public ShiftMatrix(DataCharacteristics mc1, DataCharacteristics mc2, boolean cbind) {
_cbind = cbind;
_startIx = cbind ? UtilFunctions.computeBlockIndex(mc1.getCols(), mc1.getBlocksize()) :
UtilFunctions.computeBlockIndex(mc1.getRows(), mc1.getBlocksize());
_blen = mc1.getBlocksize();
_shiftBy = (int) (cbind ? mc1.getCols()%_blen : mc1.getRows()%_blen);
_outlen = cbind ? mc1.getCols()+mc2.getCols() : mc1.getRows()+mc2.getRows();
}
@Override
public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Tuple2<MatrixIndexes, MatrixBlock> kv)
throws Exception
{
//common preparation
ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> retVal = new ArrayList<>();
MatrixIndexes ix = kv._1();
MatrixBlock in = kv._2();
int cutAt = _blen - _shiftBy;
if( _cbind )
{
MatrixIndexes firstIndex = new MatrixIndexes(ix.getRowIndex(), ix.getColumnIndex()+_startIx-1);
MatrixIndexes secondIndex = new MatrixIndexes(ix.getRowIndex(), ix.getColumnIndex()+_startIx);
int lblen1 = UtilFunctions.computeBlockSize(_outlen, firstIndex.getColumnIndex(), _blen);
if(cutAt >= in.getNumColumns()) {
// The block is too small to be cut
MatrixBlock firstBlk = new MatrixBlock(in.getNumRows(), lblen1, true);
if( in.getNumColumns()>0 )
firstBlk = firstBlk.leftIndexingOperations(in, 0, in.getNumRows()-1,
lblen1-in.getNumColumns(), lblen1-1, new MatrixBlock(), UpdateType.INPLACE_PINNED);
retVal.add(new Tuple2<>(firstIndex, firstBlk));
}
else {
// Since merge requires the dimensions matching, shifting = slicing + left indexing
MatrixBlock firstSlicedBlk = in.slice(0, in.getNumRows()-1, 0, cutAt-1, new MatrixBlock());
MatrixBlock firstBlk = new MatrixBlock(in.getNumRows(), lblen1, true);
firstBlk = firstBlk.leftIndexingOperations(firstSlicedBlk, 0, in.getNumRows()-1, _shiftBy, _blen-1, new MatrixBlock(), UpdateType.INPLACE_PINNED);
MatrixBlock secondSlicedBlk = in.slice(0, in.getNumRows()-1, cutAt, in.getNumColumns()-1, new MatrixBlock());
int llen2 = UtilFunctions.computeBlockSize(_outlen, secondIndex.getColumnIndex(), _blen);
MatrixBlock secondBlk = new MatrixBlock(in.getNumRows(), llen2, true);
secondBlk = secondBlk.leftIndexingOperations(secondSlicedBlk, 0, in.getNumRows()-1, 0, secondSlicedBlk.getNumColumns()-1, new MatrixBlock(), UpdateType.INPLACE_PINNED);
retVal.add(new Tuple2<>(firstIndex, firstBlk));
retVal.add(new Tuple2<>(secondIndex, secondBlk));
}
}
else //rbind
{
MatrixIndexes firstIndex = new MatrixIndexes(ix.getRowIndex()+_startIx-1, ix.getColumnIndex());
MatrixIndexes secondIndex = new MatrixIndexes(ix.getRowIndex()+_startIx, ix.getColumnIndex());
int lblen1 = UtilFunctions.computeBlockSize(_outlen, firstIndex.getRowIndex(), _blen);
if(cutAt >= in.getNumRows()) {
// The block is too small to be cut
MatrixBlock firstBlk = new MatrixBlock(lblen1, in.getNumColumns(), true);
if( in.getNumRows()>0 )
firstBlk = firstBlk.leftIndexingOperations(in, lblen1-in.getNumRows(), lblen1-1,
0, in.getNumColumns()-1, new MatrixBlock(), UpdateType.INPLACE_PINNED);
retVal.add(new Tuple2<>(firstIndex, firstBlk));
}
else {
// Since merge requires the dimensions matching, shifting = slicing + left indexing
MatrixBlock firstSlicedBlk = in.slice(0, cutAt-1);
MatrixBlock firstBlk = new MatrixBlock(lblen1, in.getNumColumns(), true);
firstBlk = firstBlk.leftIndexingOperations(firstSlicedBlk, _shiftBy, _blen-1, 0, in.getNumColumns()-1, new MatrixBlock(), UpdateType.INPLACE_PINNED);
MatrixBlock secondSlicedBlk = in.slice(cutAt, in.getNumRows()-1, 0, in.getNumColumns()-1, new MatrixBlock());
int lblen2 = UtilFunctions.computeBlockSize(_outlen, secondIndex.getRowIndex(), _blen);
MatrixBlock secondBlk = new MatrixBlock(lblen2, in.getNumColumns(), true);
secondBlk = secondBlk.leftIndexingOperations(secondSlicedBlk, 0, secondSlicedBlk.getNumRows()-1, 0, in.getNumColumns()-1, new MatrixBlock(), UpdateType.INPLACE_PINNED);
retVal.add(new Tuple2<>(firstIndex, firstBlk));
retVal.add(new Tuple2<>(secondIndex, secondBlk));
}
}
return retVal.iterator();
}
}
}