blob: a7f3f740444cf7fd2320c78a71af1fd1bc036025 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysds.runtime.compress;
import org.apache.sysds.runtime.compress.utils.AbstractBitmap;
import org.apache.sysds.runtime.compress.utils.Bitmap;
import org.apache.sysds.runtime.compress.utils.BitmapLossy;
import org.apache.sysds.runtime.compress.utils.DblArray;
import org.apache.sysds.runtime.compress.utils.DblArrayIntListHashMap;
import org.apache.sysds.runtime.compress.utils.DoubleIntListHashMap;
import org.apache.sysds.runtime.compress.utils.IntArrayList;
import org.apache.sysds.runtime.data.SparseBlock;
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
/**
* Static functions for encoding bitmaps in various ways.
*/
public class BitmapEncoder {
/**
* Generate uncompressed bitmaps for a set of columns in an uncompressed matrix block.
*
* @param colIndices Indexes (within the block) of the columns to extract
* @param rawBlock An uncompressed matrix block; can be dense or sparse
* @param compSettings The compression settings used for the compression.
* @return uncompressed bitmap representation of the columns
*/
public static AbstractBitmap extractBitmap(int[] colIndices, MatrixBlock rawBlock,
CompressionSettings compSettings) {
// note: no sparse column selection reader because low potential
// single column selection
Bitmap res = null;
if(colIndices.length == 1) {
res = extractBitmap(colIndices[0], rawBlock, compSettings);
}
// multiple column selection (general case)
else {
ReaderColumnSelection reader = null;
if(rawBlock.isInSparseFormat() && compSettings.transposeInput)
reader = new ReaderColumnSelectionSparse(rawBlock, colIndices, compSettings);
else
reader = new ReaderColumnSelectionDense(rawBlock, colIndices, compSettings);
res = extractBitmap(colIndices, rawBlock, reader);
}
if(compSettings.lossy) {
return BitmapLossy.makeBitmapLossy(res);
}
else {
return res;
}
}
/**
* Extract Bitmap from a single column. It will always skip all zero values. It also counts the instances of zero.
*
* @param colIndex The index of the column
* @param rawBlock The Raw matrix block (that can be transposed)
* @param compSettings The Compression settings used, in this instance to know if the raw block is transposed.
* @return Bitmap containing the Information of the column.
*/
private static Bitmap extractBitmap(int colIndex, MatrixBlock rawBlock, CompressionSettings compSettings) {
// probe map for distinct items (for value or value groups)
DoubleIntListHashMap distinctVals = new DoubleIntListHashMap();
// scan rows and probe/build distinct items
final int m = compSettings.transposeInput ? rawBlock.getNumColumns() : rawBlock.getNumRows();
int numZeros = 0;
if(rawBlock.isInSparseFormat() && compSettings.transposeInput) { // SPARSE and Transposed.
SparseBlock a = rawBlock.getSparseBlock();
if(a != null && !a.isEmpty(colIndex)) {
int apos = a.pos(colIndex);
int alen = a.size(colIndex);
numZeros = m - alen;
int[] aix = a.indexes(colIndex);
double[] avals = a.values(colIndex);
// IntArrayList lstPtr0 = new IntArrayList(); // for 0 values
// int last = -1;
// iterate over non-zero entries but fill in zeros
for(int j = apos; j < apos + alen; j++) {
// fill in zero values
// if(!skipZeros)
// for(int k = last + 1; k < aix[j]; k++)
// lstPtr0.appendValue(k);
// handle non-zero value
IntArrayList lstPtr = distinctVals.get(avals[j]);
if(lstPtr == null) {
lstPtr = new IntArrayList();
distinctVals.appendValue(avals[j], lstPtr);
}
lstPtr.appendValue(aix[j]);
// last = aix[j];
}
// fill in remaining zero values
// if(!skipZeros) {
// for(int k = last + 1; k < m; k++)
// lstPtr0.appendValue(k);
// if(lstPtr0.size() > 0)
// distinctVals.appendValue(0, lstPtr0);
// }
}
// else if(!skipZeros) { // full 0 column
// IntArrayList lstPtr = new IntArrayList();
// for(int i = 0; i < m; i++)
// lstPtr.appendValue(i);
// distinctVals.appendValue(0, lstPtr);
// }
}
else // GENERAL CASE
{
for(int i = 0; i < m; i++) {
double val = compSettings.transposeInput ? rawBlock.quickGetValue(colIndex, i) : rawBlock
.quickGetValue(i, colIndex);
if(val != 0) {
IntArrayList lstPtr = distinctVals.get(val);
if(lstPtr == null) {
lstPtr = new IntArrayList();
distinctVals.appendValue(val, lstPtr);
}
lstPtr.appendValue(i);
}
else {
numZeros++;
}
}
}
return Bitmap.makeBitmap(distinctVals, numZeros);
}
private static Bitmap extractBitmap(int[] colIndices, MatrixBlock rawBlock, ReaderColumnSelection rowReader) {
// probe map for distinct items (for value or value groups)
DblArrayIntListHashMap distinctVals = new DblArrayIntListHashMap();
// scan rows and probe/build distinct items
DblArray cellVals = null;
int zero = 0;
while((cellVals = rowReader.nextRow()) != null) {
IntArrayList lstPtr = distinctVals.get(cellVals);
if(lstPtr == null) {
// create new objects only on demand
lstPtr = new IntArrayList();
distinctVals.appendValue(new DblArray(cellVals), lstPtr);
}
zero += DblArray.isZero(cellVals) ? 1 : 0;
lstPtr.appendValue(rowReader.getCurrentRowIndex());
}
return Bitmap.makeBitmap(distinctVals, colIndices.length, zero);
}
}