| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.sysds.runtime.compress.lib; |
| |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.List; |
| import java.util.concurrent.Callable; |
| import java.util.concurrent.ExecutionException; |
| import java.util.concurrent.ExecutorService; |
| import java.util.concurrent.Future; |
| |
| import org.apache.commons.lang3.tuple.Pair; |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.apache.sysds.runtime.DMLRuntimeException; |
| import org.apache.sysds.runtime.compress.CompressedMatrixBlock; |
| import org.apache.sysds.runtime.compress.CompressionSettings; |
| import org.apache.sysds.runtime.compress.colgroup.ColGroup; |
| import org.apache.sysds.runtime.compress.colgroup.ColGroupOLE; |
| import org.apache.sysds.runtime.compress.colgroup.ColGroupUncompressed; |
| import org.apache.sysds.runtime.compress.colgroup.ColGroupValue; |
| import org.apache.sysds.runtime.compress.utils.LinearAlgebraUtils; |
| import org.apache.sysds.runtime.data.DenseBlock; |
| import org.apache.sysds.runtime.data.SparseBlock; |
| import org.apache.sysds.runtime.functionobjects.SwapIndex; |
| import org.apache.sysds.runtime.matrix.data.LibMatrixReorg; |
| import org.apache.sysds.runtime.matrix.data.MatrixBlock; |
| import org.apache.sysds.runtime.matrix.operators.ReorgOperator; |
| import org.apache.sysds.runtime.util.CommonThreadPool; |
| |
| public class LibLeftMultBy { |
| private static final Log LOG = LogFactory.getLog(LibLeftMultBy.class.getName()); |
| |
| private static ThreadLocal<double[]> memPoolOLE = new ThreadLocal<double[]>() { |
| @Override |
| protected double[] initialValue() { |
| return null; |
| } |
| }; |
| |
| public static MatrixBlock leftMultByMatrix(List<ColGroup> groups, MatrixBlock that, MatrixBlock ret, |
| boolean doTranspose, boolean allocTmp, int numCols, boolean overlapping, int k, Pair<Integer, int[]> v) { |
| int numRowsOutput = doTranspose ? that.getNumColumns() : that.getNumRows(); |
| if(ret == null) |
| ret = new MatrixBlock(numRowsOutput, numCols, false, numRowsOutput * numCols); |
| else if(!(ret.getNumColumns() == numCols && ret.getNumRows() == numRowsOutput && ret.isAllocated())) |
| ret.reset(numRowsOutput, numCols, false, numRowsOutput * numCols); |
| |
| if(that instanceof CompressedMatrixBlock) { |
| if(doTranspose) { |
| return leftMultByCompressedTransposedMatrix(groups, |
| (CompressedMatrixBlock) that, |
| ret, |
| k, |
| numCols, |
| v, |
| overlapping); |
| } |
| else { |
| LOG.error("Decompression Left side Matrix (Should not really happen)"); |
| that = ((CompressedMatrixBlock) that).decompress(k); |
| } |
| } |
| else if(doTranspose) { |
| ReorgOperator r_op = new ReorgOperator(SwapIndex.getSwapIndexFnObject(), k); |
| that = that.reorgOperations(r_op, new MatrixBlock(), 0, 0, 0); |
| } |
| |
| return leftMultByMatrix(groups, that, ret, k, numCols, v, overlapping); |
| |
| } |
| |
| public static void leftMultByTransposeSelf(List<ColGroup> groups, MatrixBlock result, int k, int numColumns, |
| Pair<Integer, int[]> v, boolean overlapping) { |
| |
| if(k <= 1) { |
| int cl = 0; |
| int cu = numColumns; |
| leftMultByTransposeSelfOverlapping(groups, result, v, cl, cu, overlapping); |
| } |
| else { |
| try { |
| ExecutorService pool = CommonThreadPool.get(k); |
| ArrayList<MatrixMultTransposeTaskOverlapping> tasks = new ArrayList<>(); |
| int blklen = (int) (Math.ceil((double) numColumns / k)); |
| for(int i = 0; i * blklen < numColumns; i++) |
| tasks.add(new MatrixMultTransposeTaskOverlapping(groups, result, i * blklen, |
| Math.min((i + 1) * blklen, numColumns), v, overlapping)); |
| List<Future<Object>> ret = pool.invokeAll(tasks); |
| for(Future<Object> tret : ret) |
| tret.get(); // check for errors |
| pool.shutdown(); |
| } |
| catch(InterruptedException | ExecutionException e) { |
| throw new DMLRuntimeException(e); |
| } |
| } |
| } |
| |
| public static MatrixBlock leftMultByCompressedTransposedMatrix(List<ColGroup> colGroups, CompressedMatrixBlock that, |
| MatrixBlock ret, int k, int numColumns, Pair<Integer, int[]> v, boolean overlapping) { |
| |
| if(ret == null) |
| ret = new MatrixBlock(that.getNumColumns(), numColumns, true, -1); |
| else |
| ret.reset(that.getNumColumns(), numColumns, true, -1); |
| ret.allocateDenseBlock(); |
| Pair<Integer, int[]> thatV = that.getMaxNumValues(); |
| LOG.error("Left mult by compressed transposed matrix: threads" + k); |
| if(k <= 1) { |
| int rl = 0; |
| int ru = that.getNumColumns(); |
| leftMultByTranspose(colGroups, that.getColGroups(), ret, v, thatV, rl, ru, overlapping, 1); |
| } |
| else { |
| try { |
| ExecutorService pool = CommonThreadPool.get(k); |
| ArrayList<leftMultByCompressedTransposedMatrixTask> tasks = new ArrayList<>(); |
| int blklen = (int) (Math.ceil((double) that.getNumColumns() / k)); |
| int numBlocks = that.getNumColumns() / blklen; |
| int numExtraThreads = k / numBlocks; |
| LOG.error("overlapping : " + overlapping); |
| // if(!overlapping) { |
| // for(int i = 0; i < that.getNumColumns(); i++) { |
| // tasks.add(new leftMultByCompressedTransposedMatrixTask(colGroups, that.getColGroups(), ret, v, |
| // thatV, i * blklen, Math.min((i + 1) * blklen, that.getNumColumns()), overlapping, )); |
| // } |
| // } |
| // else { |
| for(int i = 0; i * blklen < that.getNumColumns(); i++) |
| tasks.add(new leftMultByCompressedTransposedMatrixTask(colGroups, that.getColGroups(), ret, v, |
| thatV, i * blklen, Math.min((i + 1) * blklen, that.getNumColumns()), overlapping, |
| numExtraThreads)); |
| // } |
| List<Future<Object>> futures = pool.invokeAll(tasks); |
| LOG.error("tasks: " + futures.size() + " Each task has threads: " + numExtraThreads); |
| for(Future<Object> tret : futures) |
| tret.get(); // check for errors |
| pool.shutdown(); |
| } |
| catch(InterruptedException | ExecutionException e) { |
| throw new DMLRuntimeException(e); |
| } |
| } |
| return ret; |
| } |
| |
| private static MatrixBlock leftMultByMatrix(List<ColGroup> colGroups, MatrixBlock that, MatrixBlock ret, int k, |
| int numColumns, Pair<Integer, int[]> v, boolean overlapping) { |
| ret.allocateDenseBlock(); |
| if(that.isInSparseFormat()) { |
| ret = leftMultBySparseMatrix(colGroups, that, ret, k, numColumns, v, overlapping); |
| } |
| else { |
| ret = leftMultByDenseMatrix(colGroups, that, ret, k, numColumns, v, overlapping); |
| } |
| |
| ret.setNonZeros(ret.getNumColumns() * ret.getNumRows()); |
| return ret; |
| } |
| |
| private static MatrixBlock leftMultByDenseMatrix(List<ColGroup> colGroups, MatrixBlock that, MatrixBlock ret, int k, |
| int numColumns, Pair<Integer, int[]> v, boolean overlapping) { |
| DenseBlock db = that.getDenseBlock(); |
| if(db == null) |
| throw new DMLRuntimeException("Invalid LeftMult By Dense matrix, input matrix was sparse"); |
| |
| double[] retV = ret.getDenseBlockValues(); |
| double[] thatV; |
| int blockU; |
| int blockL = 0; |
| for(ColGroup grp : colGroups) |
| if(grp instanceof ColGroupUncompressed) |
| ((ColGroupUncompressed) grp).leftMultByMatrix(that, ret); |
| |
| for(int b = 0; b < db.numBlocks(); b++) { |
| int blockSize = db.blockSize(b); |
| blockU = Math.min(blockL + blockSize, ret.getNumRows()); |
| thatV = db.valuesAt(b); |
| |
| if(k == 1) { |
| // Pair<Integer, int[]> v = getMaxNumValues(colGroups); |
| |
| ColGroupValue.setupThreadLocalMemory(v.getLeft() + 1); |
| for(int j = 0; j < colGroups.size(); j++) { |
| colGroups.get(j).leftMultByMatrix(thatV, |
| retV, |
| colGroups.get(j).getValues(), |
| that.getNumRows(), |
| ret.getNumColumns(), |
| 0, |
| ret.getNumRows(), |
| 0); |
| } |
| ColGroupValue.cleanupThreadLocalMemory(); |
| } |
| else { |
| try { |
| ExecutorService pool = CommonThreadPool.get(k); |
| // compute remaining compressed column groups in parallel |
| ArrayList<LeftMatrixMatrixMultTask> tasks = new ArrayList<>(); |
| int rowBlockSize = 1; |
| for(int blo = blockL; blo < blockU; blo += rowBlockSize) { |
| tasks.add(new LeftMatrixMatrixMultTask(colGroups, thatV, retV, that.getNumRows(), numColumns, |
| blo, Math.min(blo + rowBlockSize, blockU), blo - blockL, v)); |
| } |
| |
| List<Future<Object>> futures = pool.invokeAll(tasks); |
| |
| pool.shutdown(); |
| for(Future<Object> future : futures) |
| future.get(); |
| } |
| catch(InterruptedException | ExecutionException e) { |
| throw new DMLRuntimeException(e); |
| } |
| } |
| blockL += blockSize; |
| } |
| return ret; |
| } |
| |
| private static MatrixBlock leftMultByVectorTranspose(List<ColGroup> colGroups, MatrixBlock vector, |
| MatrixBlock result, boolean doTranspose, boolean allocTmp, Pair<Integer, int[]> v, boolean overlap) { |
| |
| MatrixBlock rowVector = vector; |
| // Note that transpose here is a metadata operation since the input is a vector. |
| if(doTranspose) { |
| rowVector = new MatrixBlock(1, vector.getNumRows(), false); |
| LibMatrixReorg.transpose(vector, rowVector); |
| } |
| |
| // initialize and allocate the result |
| result.reset(); |
| result.allocateDenseBlock(); |
| |
| // setup memory pool for reuse |
| if(allocTmp) { |
| // Pair<Integer, int[]> v = getMaxNumValues(colGroups); |
| ColGroupValue.setupThreadLocalMemory(v.getLeft() + 1); // +1 for efficiency in DDC groups. |
| for(int i = 0; i < colGroups.size(); i++) { |
| colGroups.get(i).leftMultByRowVector(rowVector.getDenseBlockValues(), |
| result.getDenseBlockValues(), |
| v.getRight()[i]); |
| } |
| ColGroupValue.cleanupThreadLocalMemory(); |
| } |
| else { |
| |
| for(ColGroup grp : colGroups) { |
| grp.leftMultByRowVector(rowVector.getDenseBlockValues(), result.getDenseBlockValues(), -1); |
| } |
| } |
| |
| // delegate matrix-vector operation to each column group |
| |
| // post-processing |
| // if(allocTmp) |
| result.recomputeNonZeros(); |
| |
| return result; |
| } |
| |
| public static MatrixBlock leftMultByVectorTranspose(List<ColGroup> colGroups, MatrixBlock vector, |
| MatrixBlock result, boolean doTranspose, int k, Pair<Integer, int[]> v, boolean overlap) { |
| // transpose vector if required |
| MatrixBlock rowVector = vector; |
| if(doTranspose) { |
| rowVector = new MatrixBlock(1, vector.getNumRows(), false); |
| LibMatrixReorg.transpose(vector, rowVector); |
| } |
| |
| // initialize and allocate the result |
| result.reset(); |
| result.allocateDenseBlock(); |
| |
| // multi-threaded execution |
| try { |
| // compute uncompressed column group in parallel |
| // ColGroupUncompressed uc = getUncompressedColGroup(); |
| // if(uc != null) |
| // uc.leftMultByRowVector(rowVector, result, k); |
| |
| // compute remaining compressed column groups in parallel |
| ExecutorService pool = CommonThreadPool.get(Math.min(colGroups.size(), k)); |
| ArrayList<LeftMatrixVectorMultTask> tasks = new ArrayList<>(); |
| |
| // if(overlap){ |
| tasks.add(new LeftMatrixVectorMultTask(colGroups, rowVector, result, v)); |
| // } else{ |
| // ArrayList<ColGroup>[] grpParts = createStaticTaskPartitioning(colGroups, 4 * k, true); |
| // for(ArrayList<ColGroup> groups : grpParts) |
| // tasks.add(new LeftMatrixVectorMultTask(groups, rowVector, result, v)); |
| // } |
| |
| List<Future<Object>> ret = pool.invokeAll(tasks); |
| pool.shutdown(); |
| for(Future<Object> tmp : ret) |
| tmp.get(); |
| |
| } |
| catch(InterruptedException | ExecutionException e) { |
| LOG.error(e); |
| throw new DMLRuntimeException(e); |
| } |
| |
| // post-processing |
| result.recomputeNonZeros(); |
| |
| return result; |
| } |
| |
| private static MatrixBlock leftMultBySparseMatrix(List<ColGroup> colGroups, MatrixBlock that, MatrixBlock ret, |
| int k, int numColumns, Pair<Integer, int[]> v, boolean overlapping) { |
| |
| SparseBlock sb = that.getSparseBlock(); |
| if(sb == null) |
| throw new DMLRuntimeException("Invalid Left Mult by Sparse matrix, input matrix was dense"); |
| |
| for(ColGroup grp : colGroups) { |
| if(grp instanceof ColGroupUncompressed) |
| ((ColGroupUncompressed) grp).leftMultByMatrix(that, ret); |
| } |
| |
| double[][] materialized = new double[colGroups.size()][]; |
| boolean containsOLE = false; |
| for(int i = 0; i < colGroups.size(); i++) { |
| materialized[i] = colGroups.get(i).getValues(); |
| if(colGroups.get(i) instanceof ColGroupOLE) { |
| containsOLE = true; |
| } |
| } |
| if(k == 1) { |
| double[] tmpA = containsOLE ? new double[CompressionSettings.BITMAP_BLOCK_SZ * 2] : null; |
| |
| ColGroupValue.setupThreadLocalMemory(v.getLeft() + 1); |
| for(int j = 0; j < colGroups.size(); j++) { |
| for(int r = 0; r < that.getNumRows(); r++) { |
| if(!sb.isEmpty(r)) { |
| colGroups.get(j).leftMultBySparseMatrix(sb, |
| ret.getDenseBlockValues(), |
| materialized[j], |
| that.getNumRows(), |
| numColumns, |
| r, |
| tmpA); |
| } |
| } |
| } |
| ColGroupValue.cleanupThreadLocalMemory(); |
| } |
| else { |
| ExecutorService pool = CommonThreadPool.get(k); |
| ArrayList<LeftMatrixSparseMatrixMultTask> tasks = new ArrayList<>(); |
| try { |
| // long thatnnz = that.getNonZeros(); |
| // int rowBlockSize = that.getNumRows() / k; |
| int rowBlockSize = (int) Math.ceil(1000.0 / (that.getNonZeros() / that.getNumRows())); |
| // rowBlockSize = 1; |
| for(int r = 0; r * rowBlockSize < that.getNumRows(); r++) { |
| if(overlapping) { |
| tasks.add(new LeftMatrixSparseMatrixMultTask(colGroups, materialized, sb, |
| ret.getDenseBlockValues(), that.getNumRows(), numColumns, v, r * rowBlockSize, |
| Math.min((r + 1) * rowBlockSize, that.getNumRows()))); |
| } |
| else { |
| for(int i = 0; i < colGroups.size(); i++) { |
| tasks.add(new LeftMatrixSparseMatrixMultTask(colGroups.get(i), materialized, i, sb, |
| ret.getDenseBlockValues(), that.getNumRows(), numColumns, v, r * rowBlockSize, |
| Math.min((r + 1) * rowBlockSize, that.getNumRows()))); |
| } |
| } |
| } |
| |
| List<Future<Object>> futures = pool.invokeAll(tasks); |
| pool.shutdown(); |
| for(Future<Object> future : futures) |
| future.get(); |
| memPoolOLE.remove(); |
| } |
| catch(InterruptedException | ExecutionException e) { |
| throw new DMLRuntimeException(e); |
| } |
| } |
| |
| return ret; |
| |
| } |
| |
| // private static void leftMultByTransposeSelfNonOverlapping(List<ColGroup> groups, MatrixBlock result, |
| // Pair<Integer, int[]> v, int gl, int gu) { |
| |
| // // TODO exploit potential multiplcation in compressed format. |
| |
| // final int numRows = groups.get(0).getNumRows(); |
| |
| // // preallocated dense tmp matrix blocks |
| // MatrixBlock lhs = new MatrixBlock(1, numRows, false); |
| // MatrixBlock tmpret = new MatrixBlock(1, result.getNumColumns(), false); |
| // lhs.allocateDenseBlock(); |
| // tmpret.allocateDenseBlock(); |
| |
| // // setup memory pool for reuse |
| // ColGroupValue.setupThreadLocalMemory(v.getLeft() + 1); |
| |
| // // approach: for each colgroup, extract uncompressed columns one at-a-time |
| // // vector-matrix multiplies against remaining col groups |
| // for(int i = gl; i < gu; i++) { |
| // // get current group and relevant col groups |
| // ColGroup group = groups.get(i); |
| // int[] ixgroup = group.getColIndices(); |
| // List<ColGroup> tmpList = groups.subList(i, groups.size()); |
| |
| // // if(group instanceof ColGroupDDC // single DDC group |
| // // && ixgroup.length == 1 && !containsUC && numRows < CompressionSettings.BITMAP_BLOCK_SZ) { |
| // // // compute vector-matrix partial result |
| // // leftMultByVectorTranspose(tmpList, (ColGroupDDC) group, tmpret); |
| |
| // // // write partial results (disjoint non-zeros) |
| // // LinearAlgebraUtils.copyNonZerosToUpperTriangle(result, tmpret, ixgroup[0]); |
| // // } |
| // // else { |
| // // for all uncompressed lhs columns vectors |
| // for(int j = 0; j < result.getNumColumns(); j++) { |
| // ColGroup.decompressToBlock(lhs, j, groups); |
| |
| // if(!lhs.isEmptyBlock(false)) { |
| // // tmpret.reset(); |
| // // compute vector-matrix partial result |
| // // leftMultByMatrix(groups,lhs, tmpret, false, true, 0, 0, overlapping, 1, v ); |
| // leftMultByVectorTranspose(groups, lhs, tmpret, false, true, v, overlapping); |
| // // LOG.error(tmpret); |
| |
| // // write partial results (disjoint non-zeros) |
| // LinearAlgebraUtils.copyNonZerosToUpperTriangle(result, tmpret, j); |
| // } |
| // lhs.reset(); |
| // // } |
| // } |
| // } |
| |
| // // post processing |
| // ColGroupValue.cleanupThreadLocalMemory(); |
| // } |
| |
| private static void leftMultByTransposeSelfOverlapping(List<ColGroup> groups, MatrixBlock result, |
| Pair<Integer, int[]> v, int cl, int cu, boolean overlapping) { |
| // It should be possible to get better performance exploiting if the matrix is not overlapping. |
| // TODO: exploit specfic column groups (DDC most likely) to gain better performance. |
| // Idea multiplying with one self simply use count of values, and then |
| // calculate : count * v^2 |
| |
| final int numRows = groups.get(0).getNumRows(); |
| |
| // preallocated dense tmp matrix blocks |
| MatrixBlock lhs = new MatrixBlock(1, numRows, false); |
| MatrixBlock tmpret = new MatrixBlock(1, result.getNumColumns(), false); |
| lhs.allocateDenseBlock(); |
| tmpret.allocateDenseBlock(); |
| |
| // setup memory pool for reuse |
| ColGroupValue.setupThreadLocalMemory(v.getLeft() + 1); |
| |
| for(int j = cl; j < cu; j++) { |
| ColGroup.decompressToBlock(lhs, j, groups); |
| if(!lhs.isEmptyBlock(false)) { |
| leftMultByVectorTranspose(groups, lhs, tmpret, false, true, v, overlapping); |
| LinearAlgebraUtils.copyNonZerosToUpperTriangle(result, tmpret, j); |
| } |
| lhs.reset(); |
| } |
| |
| // post processing |
| ColGroupValue.cleanupThreadLocalMemory(); |
| } |
| |
| private static void leftMultByTranspose(List<ColGroup> thisGroups, List<ColGroup> thatGroups, MatrixBlock result, |
| Pair<Integer, int[]> v, Pair<Integer, int[]> thatV, int rl, int ru, boolean overlapping, int k) { |
| |
| final int numRows = thisGroups.get(0).getNumRows(); |
| |
| // preallocated dense tmp matrix blocks |
| MatrixBlock lhs = new MatrixBlock(1, numRows, false); |
| MatrixBlock tmpret = new MatrixBlock(1, result.getNumColumns(), false); |
| lhs.allocateDenseBlock(); |
| tmpret.allocateDenseBlock(); |
| if(k > 1) |
| ColGroupValue.setupThreadLocalMemory(Math.max(v.getLeft(), thatV.getLeft()) + 1); |
| |
| ExecutorService pool = (k > 1) ? CommonThreadPool.get(k) : null; |
| ArrayList<leftMultByVectorTransposeTask> tasks = (k > 1) ? new ArrayList<>() : null; |
| for(int j = rl; j < ru; j++) { |
| ColGroup.decompressToBlock(lhs, j, thatGroups); |
| if(!lhs.isEmptyBlock(false)) { |
| if(!overlapping && k > 1) { |
| try { |
| int groupBatch = thisGroups.size() / k; |
| |
| for(int i = 0; i * groupBatch < thisGroups.size(); i++) { |
| tasks.add(new leftMultByVectorTransposeTask(thisGroups, lhs, tmpret, i * groupBatch, |
| Math.min(thisGroups.size(), (i + 1) * groupBatch), v)); |
| } |
| List<Future<Object>> futures = pool.invokeAll(tasks); |
| pool.shutdown(); |
| for(Future<Object> future : futures) |
| future.get(); |
| } |
| catch(InterruptedException | ExecutionException e) { |
| throw new DMLRuntimeException(e); |
| } |
| } |
| else { |
| for(ColGroup grp : thisGroups) { |
| grp.leftMultByRowVector(lhs.getDenseBlockValues(), tmpret.getDenseBlockValues(), -1); |
| } |
| } |
| for(int i = 0; i < tmpret.getNumColumns(); i++) { |
| result.appendValue(j, i, tmpret.quickGetValue(0, i)); |
| } |
| } |
| lhs.reset(); |
| } |
| |
| // post processing |
| ColGroupValue.cleanupThreadLocalMemory(); |
| |
| } |
| |
| private static class LeftMatrixVectorMultTask implements Callable<Object> { |
| private final List<ColGroup> _groups; |
| private final MatrixBlock _vect; |
| private final MatrixBlock _ret; |
| private final Pair<Integer, int[]> _v; |
| |
| protected LeftMatrixVectorMultTask(List<ColGroup> groups, MatrixBlock vect, MatrixBlock ret, |
| Pair<Integer, int[]> v) { |
| _groups = groups; |
| _vect = vect; |
| _ret = ret; |
| _v = v; |
| } |
| |
| @Override |
| public Object call() { |
| // setup memory pool for reuse |
| try { |
| ColGroupValue.setupThreadLocalMemory(_v.getLeft() + 1); |
| for(int i = 0; i < _groups.size(); i++) { |
| _groups.get(i) |
| .leftMultByRowVector(_vect.getDenseBlockValues(), _ret.getDenseBlockValues(), _v.getRight()[i]); |
| } |
| |
| ColGroupValue.cleanupThreadLocalMemory(); |
| } |
| catch(Exception e) { |
| throw new DMLRuntimeException(e); |
| } |
| return null; |
| } |
| } |
| |
| private static class LeftMatrixMatrixMultTask implements Callable<Object> { |
| private final List<ColGroup> _group; |
| private final double[] _that; |
| private final double[] _ret; |
| private final int _numRows; |
| private final int _numCols; |
| private final int _rl; |
| private final int _ru; |
| private final int _vOff; |
| private final Pair<Integer, int[]> _v; |
| |
| protected LeftMatrixMatrixMultTask(List<ColGroup> group, double[] that, double[] ret, int numRows, int numCols, |
| int rl, int ru, int vOff, Pair<Integer, int[]> v) { |
| _group = group; |
| _that = that; |
| _ret = ret; |
| _numRows = numRows; |
| _numCols = numCols; |
| _rl = rl; |
| _ru = ru; |
| _vOff = vOff; |
| _v = v; |
| } |
| |
| @Override |
| public Object call() { |
| // setup memory pool for reuse |
| |
| double[][] materialized = new double[_group.size()][]; |
| for(int i = 0; i < _group.size(); i++) { |
| materialized[i] = _group.get(i).getValues(); |
| } |
| // Pair<Integer, int[]> v = getMaxNumValues(_group); |
| try { |
| ColGroupValue.setupThreadLocalMemory(_v.getLeft() + 1); |
| for(int j = 0; j < _group.size(); j++) { |
| _group.get(j).leftMultByMatrix(_that, _ret, materialized[j], _numRows, _numCols, _rl, _ru, _vOff); |
| } |
| ColGroupValue.cleanupThreadLocalMemory(); |
| |
| } |
| catch(Exception e) { |
| throw new DMLRuntimeException(e); |
| } |
| return null; |
| } |
| } |
| |
| private static class LeftMatrixSparseMatrixMultTask implements Callable<Object> { |
| private final List<ColGroup> _groups; |
| private final ColGroup _group; |
| private final int _i; // Used to identify the index for the materialized values. |
| private final SparseBlock _that; |
| private final double[] _ret; |
| private final int _numRows; |
| private final int _numCols; |
| private final Pair<Integer, int[]> _v; |
| private final double[][] _materialized; |
| private final int _rl; |
| private final int _ru; |
| |
| protected LeftMatrixSparseMatrixMultTask(List<ColGroup> group, double[][] materialized, SparseBlock that, |
| double[] ret, int numRows, int numCols, Pair<Integer, int[]> v, int rl, int ru) { |
| _groups = group; |
| _group = null; |
| _i = -1; |
| _materialized = materialized; |
| _that = that; |
| _ret = ret; |
| _numRows = numRows; |
| _numCols = numCols; |
| _v = v; |
| _rl = rl; |
| _ru = ru; |
| } |
| |
| protected LeftMatrixSparseMatrixMultTask(ColGroup group, double[][] materialized, int i, SparseBlock that, |
| double[] ret, int numRows, int numCols, Pair<Integer, int[]> v, int rl, int ru) { |
| _groups = null; |
| _group = group; |
| _i = i; |
| _materialized = materialized; |
| _that = that; |
| _ret = ret; |
| _numRows = numRows; |
| _numCols = numCols; |
| _v = v; |
| _rl = rl; |
| _ru = ru; |
| } |
| |
| @Override |
| public Object call() { |
| // Temporary Array to store 2 * block size in |
| double[] tmpA = memPoolOLE.get(); |
| if(tmpA == null) { |
| if(_groups != null) { |
| tmpA = new double[Math.min(CompressionSettings.BITMAP_BLOCK_SZ * 2, _groups.get(0).getNumRows())]; |
| } |
| else { |
| tmpA = new double[Math.min(CompressionSettings.BITMAP_BLOCK_SZ * 2, _group.getNumRows())]; |
| } |
| } |
| else { |
| Arrays.fill(tmpA, 0.0); |
| } |
| |
| ColGroupValue.setupThreadLocalMemory(_v.getLeft() + 1); |
| try { |
| if(_groups != null) { |
| for(int j = 0; j < _groups.size(); j++) { |
| double[] materializedV = _materialized[j]; |
| for(int r = _rl; r < _ru; r++) { |
| if(!_that.isEmpty(r)) { |
| // LOG.error(_that.get(r)); |
| // _v.getRight()[j], |
| _groups.get(j) |
| .leftMultBySparseMatrix(_that, _ret, materializedV, _numRows, _numCols, r, tmpA); |
| // Arrays.fill(tmpA, 0.0); |
| } |
| } |
| } |
| } |
| else if(_group != null) { |
| for(int r = _rl; r < _ru; r++) { |
| if(!_that.isEmpty(r)) { |
| // _v.getRight()[0], |
| _group.leftMultBySparseMatrix(_that, _ret, _materialized[_i], _numRows, _numCols, r, tmpA); |
| // Arrays.fill(tmpA, 0.0); |
| } |
| } |
| } |
| } |
| catch(Exception e) { |
| e.printStackTrace(); |
| throw new DMLRuntimeException(e); |
| } |
| ColGroupValue.cleanupThreadLocalMemory(); |
| return null; |
| } |
| } |
| |
| // private static class MatrixMultTransposeTaskNonOverlapping implements Callable<Object> { |
| // private final List<ColGroup> _groups; |
| // private final MatrixBlock _ret; |
| // private final int _gl; |
| // private final int _gu; |
| // private final Pair<Integer, int[]> _v; |
| |
| // protected MatrixMultTransposeTaskNonOverlapping(List<ColGroup> groups, MatrixBlock ret, int gl, int gu, |
| // Pair<Integer, int[]> v, boolean overlapping) { |
| // _groups = groups; |
| // _ret = ret; |
| // _gl = gl; |
| // _gu = gu; |
| // _v = v; |
| // } |
| |
| // @Override |
| // public Object call() { |
| // leftMultByTransposeSelfNonOverlapping(_groups, _ret, _v, _gl, _gu); |
| // return null; |
| // } |
| // } |
| |
| private static class MatrixMultTransposeTaskOverlapping implements Callable<Object> { |
| private final List<ColGroup> _groups; |
| private final MatrixBlock _ret; |
| private final int _gl; |
| private final int _gu; |
| private final Pair<Integer, int[]> _v; |
| private final boolean _overlapping; |
| |
| protected MatrixMultTransposeTaskOverlapping(List<ColGroup> groups, MatrixBlock ret, int gl, int gu, |
| Pair<Integer, int[]> v, boolean overlapping) { |
| _groups = groups; |
| _ret = ret; |
| _gl = gl; |
| _gu = gu; |
| _v = v; |
| _overlapping = overlapping; |
| } |
| |
| @Override |
| public Object call() { |
| leftMultByTransposeSelfOverlapping(_groups, _ret, _v, _gl, _gu, _overlapping); |
| return null; |
| } |
| } |
| |
| private static class leftMultByCompressedTransposedMatrixTask implements Callable<Object> { |
| private final List<ColGroup> _groups; |
| private final List<ColGroup> _thatGroups; |
| private final MatrixBlock _ret; |
| private final int _rl; |
| private final int _ru; |
| private final Pair<Integer, int[]> _v; |
| private final Pair<Integer, int[]> _thatV; |
| private final boolean _overlapping; |
| private final int _extraThreads; |
| |
| protected leftMultByCompressedTransposedMatrixTask(List<ColGroup> thisGroups, List<ColGroup> thatGroups, |
| MatrixBlock ret, Pair<Integer, int[]> v, Pair<Integer, int[]> thatV, int rl, int ru, boolean overlapping, |
| int extraThreads) { |
| _groups = thisGroups; |
| _thatGroups = thatGroups; |
| _ret = ret; |
| _rl = rl; |
| _ru = ru; |
| _v = v; |
| _thatV = thatV; |
| _overlapping = overlapping; |
| _extraThreads = extraThreads; |
| } |
| |
| @Override |
| public Object call() { |
| leftMultByTranspose(_groups, _thatGroups, _ret, _v, _thatV, _rl, _ru, _overlapping, _extraThreads); |
| return null; |
| } |
| } |
| |
| private static class leftMultByVectorTransposeTask implements Callable<Object> { |
| private final List<ColGroup> _grps; |
| private final MatrixBlock _rowVector; |
| private final MatrixBlock _result; |
| private final int _gl; |
| private final int _gu; |
| private final Pair<Integer, int[]> _v; |
| |
| protected leftMultByVectorTransposeTask(List<ColGroup> grps, MatrixBlock rowVector, MatrixBlock result, int gl, |
| int gu, Pair<Integer, int[]> v) { |
| _grps = grps; |
| _rowVector = rowVector; |
| _result = result; |
| _gl = gl; |
| _gu = gu; |
| _v = v; |
| } |
| |
| @Override |
| public Object call() { |
| ColGroupValue.setupThreadLocalMemory(_v.getLeft() + 1); |
| for(int i = _gl; i < _gu; i++) { |
| _grps.get(i).leftMultByRowVector(_rowVector.getDenseBlockValues(), _result.getDenseBlockValues(), -1); |
| } |
| ColGroupValue.cleanupThreadLocalMemory(); |
| return null; |
| } |
| } |
| } |