blob: 540739a835b3f19ba35e50c72eeb1e0c08fc040a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.compress;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.compress.utils.ConverterUtils;
import org.apache.sysml.runtime.compress.utils.LinearAlgebraUtils;
import org.apache.sysml.runtime.functionobjects.Builtin;
import org.apache.sysml.runtime.functionobjects.KahanFunction;
import org.apache.sysml.runtime.functionobjects.KahanPlus;
import org.apache.sysml.runtime.instructions.cp.KahanObject;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.operators.ScalarOperator;
/**
* Class to encapsulate information about a column group that is encoded with
* simple lists of offsets for each set of distinct values.
*
*/
public class ColGroupOLE extends ColGroupOffset
{
private static final long serialVersionUID = -9157676271360528008L;
private static final Log LOG = LogFactory.getLog(ColGroupOLE.class.getName());
public ColGroupOLE() {
super();
}
/**
* Main constructor. Constructs and stores the necessary bitmaps.
*
* @param colIndices
* indices (within the block) of the columns included in this
* column
* @param numRows
* total number of rows in the parent block
* @param ubm
* Uncompressed bitmap representation of the block
*/
public ColGroupOLE(int[] colIndices, int numRows, UncompressedBitmap ubm)
{
super(colIndices, numRows, ubm);
// compress the bitmaps
final int numVals = ubm.getNumValues();
char[][] lbitmaps = new char[numVals][];
int totalLen = 0;
for( int i=0; i<numVals; i++ ) {
lbitmaps[i] = BitmapEncoder.genOffsetBitmap(
ubm.getOffsetsList(i).extractValues(), ubm.getNumOffsets(i));
totalLen += lbitmaps[i].length;
}
// compact bitmaps to linearized representation
createCompressedBitmaps(numVals, totalLen, lbitmaps);
if( LOW_LEVEL_OPT && CREATE_SKIPLIST
&& numRows > 2*BitmapEncoder.BITMAP_BLOCK_SZ )
{
int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
_skiplist = new int[numVals];
int rl = (getNumRows()/2/blksz)*blksz;
for (int k = 0; k < numVals; k++) {
int boff = _ptr[k];
int blen = len(k);
int bix = 0;
for( int i=0; i<rl && bix<blen; i+=blksz ) {
bix += _data[boff+bix] + 1;
}
_skiplist[k] = bix;
}
}
//debug output
double ucSize = MatrixBlock.estimateSizeDenseInMemory(numRows, colIndices.length);
if( estimateInMemorySize() > ucSize )
LOG.warn("OLE group larger than UC dense: "+estimateInMemorySize()+" "+ucSize);
}
public ColGroupOLE(int[] colIndices, int numRows, boolean zeros, double[] values, char[] bitmaps, int[] bitmapOffs) {
super(colIndices, numRows, zeros, values);
_data = bitmaps;
_ptr = bitmapOffs;
}
@Override
public CompressionType getCompType() {
return CompressionType.OLE_BITMAP;
}
@Override
public void decompressToBlock(MatrixBlock target, int rl, int ru)
{
if( LOW_LEVEL_OPT && getNumValues() > 1 )
{
final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
final int numCols = getNumCols();
final int numVals = getNumValues();
//cache blocking config and position array
int[] apos = skipScan(numVals, rl);
//cache conscious append via horizontal scans
for( int bi=rl; bi<ru; bi+=blksz ) {
for (int k = 0, off=0; k < numVals; k++, off+=numCols) {
int boff = _ptr[k];
int blen = len(k);
int bix = apos[k];
if( bix >= blen )
continue;
int len = _data[boff+bix];
int pos = boff+bix+1;
for( int i=pos; i<pos+len; i++ )
for( int j=0, rix = bi+_data[i]; j<numCols; j++ )
if( _values[off+j]!=0 )
target.appendValue(rix, _colIndexes[j], _values[off+j]);
apos[k] += len + 1;
}
}
}
else
{
//call generic decompression with decoder
super.decompressToBlock(target, rl, ru);
}
}
@Override
public void decompressToBlock(MatrixBlock target, int[] colixTargets)
{
if( LOW_LEVEL_OPT && getNumValues() > 1 )
{
final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
final int numCols = getNumCols();
final int numVals = getNumValues();
final int n = getNumRows();
//cache blocking config and position array
int[] apos = new int[numVals];
int[] cix = new int[numCols];
//prepare target col indexes
for( int j=0; j<numCols; j++ )
cix[j] = colixTargets[_colIndexes[j]];
//cache conscious append via horizontal scans
for( int bi=0; bi<n; bi+=blksz ) {
for (int k = 0, off=0; k < numVals; k++, off+=numCols) {
int boff = _ptr[k];
int blen = len(k);
int bix = apos[k];
if( bix >= blen )
continue;
int len = _data[boff+bix];
int pos = boff+bix+1;
for( int i=pos; i<pos+len; i++ )
for( int j=0, rix = bi+_data[i]; j<numCols; j++ )
if( _values[off+j]!=0 )
target.appendValue(rix, cix[j], _values[off+j]);
apos[k] += len + 1;
}
}
}
else
{
//call generic decompression with decoder
super.decompressToBlock(target, colixTargets);
}
}
@Override
public void decompressToBlock(MatrixBlock target, int colpos)
{
final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
final int numCols = getNumCols();
final int numVals = getNumValues();
final int n = getNumRows();
double[] c = target.getDenseBlock();
//cache blocking config and position array
int[] apos = allocIVector(numVals, true);
//cache conscious append via horizontal scans
int nnz = 0;
for( int bi=0; bi<n; bi+=blksz ) {
Arrays.fill(c, bi, Math.min(bi+blksz, n), 0);
for (int k = 0, off=0; k < numVals; k++, off+=numCols) {
int boff = _ptr[k];
int blen = len(k);
int bix = apos[k];
if( bix >= blen )
continue;
int len = _data[boff+bix];
int pos = boff+bix+1;
for( int i=pos; i<pos+len; i++ ) {
c[bi+_data[i]] = _values[off+colpos];
nnz++;
}
apos[k] += len + 1;
}
}
target.setNonZeros(nnz);
}
@Override
public int[] getCounts() {
final int numVals = getNumValues();
int[] counts = new int[numVals];
for (int k = 0; k < numVals; k++) {
int boff = _ptr[k];
int blen = len(k);
//iterate over bitmap blocks and count partial lengths
int count = 0;
for (int bix=0; bix < blen; bix+=_data[boff+bix]+1)
count += _data[boff+bix];
counts[k] = count;
}
return counts;
}
@Override
public int[] getCounts(int rl, int ru) {
final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
final int numVals = getNumValues();
int[] counts = new int[numVals];
for (int k = 0; k < numVals; k++) {
int boff = _ptr[k];
int blen = len(k);
int bix = skipScanVal(k, rl);
int count = 0;
for( int off=rl; bix < blen && off<ru; bix+=_data[boff+bix]+1, off+=blksz )
count += _data[boff+bix];
counts[k] = count;
}
return counts;
}
@Override
public ColGroup scalarOperation(ScalarOperator op)
throws DMLRuntimeException
{
double val0 = op.executeScalar(0);
//fast path: sparse-safe operations
// Note that bitmaps don't change and are shallow-copied
if( op.sparseSafe || val0==0 ) {
return new ColGroupOLE(_colIndexes, _numRows, _zeros,
applyScalarOp(op), _data, _ptr);
}
//slow path: sparse-unsafe operations (potentially create new bitmap)
//note: for efficiency, we currently don't drop values that become 0
boolean[] lind = computeZeroIndicatorVector();
int[] loff = computeOffsets(lind);
if( loff.length==0 ) { //empty offset list: go back to fast path
return new ColGroupOLE(_colIndexes, _numRows, true,
applyScalarOp(op), _data, _ptr);
}
double[] rvalues = applyScalarOp(op, val0, getNumCols());
char[] lbitmap = BitmapEncoder.genOffsetBitmap(loff, loff.length);
char[] rbitmaps = Arrays.copyOf(_data, _data.length+lbitmap.length);
System.arraycopy(lbitmap, 0, rbitmaps, _data.length, lbitmap.length);
int[] rbitmapOffs = Arrays.copyOf(_ptr, _ptr.length+1);
rbitmapOffs[rbitmapOffs.length-1] = rbitmaps.length;
return new ColGroupOLE(_colIndexes, _numRows, loff.length<_numRows,
rvalues, rbitmaps, rbitmapOffs);
}
@Override
public void rightMultByVector(MatrixBlock vector, MatrixBlock result, int rl, int ru)
throws DMLRuntimeException
{
double[] b = ConverterUtils.getDenseVector(vector);
double[] c = result.getDenseBlock();
final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
final int numCols = getNumCols();
final int numVals = getNumValues();
//prepare reduced rhs w/ relevant values
double[] sb = new double[numCols];
for (int j = 0; j < numCols; j++) {
sb[j] = b[_colIndexes[j]];
}
if( LOW_LEVEL_OPT && numVals > 1 && _numRows > blksz )
{
//since single segment scans already exceed typical L2 cache sizes
//and because there is some overhead associated with blocking, the
//best configuration aligns with L3 cache size (x*vcores*64K*8B < L3)
//x=4 leads to a good yet slightly conservative compromise for single-/
//multi-threaded and typical number of cores and L3 cache sizes
final int blksz2 = ColGroupOffset.WRITE_CACHE_BLKSZ;
//step 1: prepare position and value arrays
int[] apos = skipScan(numVals, rl);
double[] aval = preaggValues(numVals, sb);
//step 2: cache conscious matrix-vector via horizontal scans
for( int bi=rl; bi<ru; bi+=blksz2 )
{
int bimax = Math.min(bi+blksz2, ru);
//horizontal segment scan, incl pos maintenance
for (int k = 0; k < numVals; k++) {
int boff = _ptr[k];
int blen = len(k);
double val = aval[k];
int bix = apos[k];
for( int ii=bi; ii<bimax && bix<blen; ii+=blksz ) {
//prepare length, start, and end pos
int len = _data[boff+bix];
int pos = boff+bix+1;
//compute partial results
LinearAlgebraUtils.vectAdd(val, c, _data, pos, ii, len);
bix += len + 1;
}
apos[k] = bix;
}
}
}
else
{
//iterate over all values and their bitmaps
for (int k = 0; k < numVals; k++)
{
//prepare value-to-add for entire value bitmap
int boff = _ptr[k];
int blen = len(k);
double val = sumValues(k, sb);
//iterate over bitmap blocks and add values
if (val != 0) {
int bix = 0;
int off = 0;
int slen = -1;
//scan to beginning offset if necessary
if( rl > 0 ){
for (; bix<blen & off<rl; bix += slen+1, off += blksz) {
slen = _data[boff+bix];
}
}
//compute partial results
for (; bix<blen & off<ru; bix += slen + 1, off += blksz) {
slen = _data[boff+bix];
for (int blckIx = 1; blckIx <= slen; blckIx++) {
c[off + _data[boff+bix + blckIx]] += val;
}
}
}
}
}
}
@Override
public void leftMultByRowVector(MatrixBlock vector, MatrixBlock result)
throws DMLRuntimeException
{
double[] a = ConverterUtils.getDenseVector(vector);
double[] c = result.getDenseBlock();
final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
final int numCols = getNumCols();
final int numVals = getNumValues();
final int n = getNumRows();
if( LOW_LEVEL_OPT && numVals > 1 && _numRows > blksz )
{
//cache blocking config (see matrix-vector mult for explanation)
final int blksz2 = ColGroupOffset.READ_CACHE_BLKSZ;
//step 1: prepare position and value arrays
//current pos per OLs / output values
int[] apos = allocIVector(numVals, true);
double[] cvals = allocDVector(numVals, true);
//step 2: cache conscious matrix-vector via horizontal scans
for( int ai=0; ai<n; ai+=blksz2 )
{
int aimax = Math.min(ai+blksz2, n);
//horizontal segment scan, incl pos maintenance
for (int k = 0; k < numVals; k++) {
int boff = _ptr[k];
int blen = len(k);
int bix = apos[k];
double vsum = 0;
for( int ii=ai; ii<aimax && bix<blen; ii+=blksz ) {
//prepare length, start, and end pos
int len = _data[boff+bix];
int pos = boff+bix+1;
//iterate over bitmap blocks and compute partial results (a[i]*1)
vsum += LinearAlgebraUtils.vectSum(a, _data, ii, pos, len);
bix += len + 1;
}
apos[k] = bix;
cvals[k] += vsum;
}
}
//step 3: scale partial results by values and write to global output
for (int k = 0, valOff=0; k < numVals; k++, valOff+=numCols)
for( int j = 0; j < numCols; j++ )
c[ _colIndexes[j] ] += cvals[k] * _values[valOff+j];
}
else
{
//iterate over all values and their bitmaps
for (int k=0, valOff=0; k<numVals; k++, valOff+=numCols)
{
int boff = _ptr[k];
int blen = len(k);
//iterate over bitmap blocks and add partial results
double vsum = 0;
for (int bix=0, off=0; bix < blen; bix+=_data[boff+bix]+1, off+=blksz)
vsum += LinearAlgebraUtils.vectSum(a, _data, off, boff+bix+1, _data[boff+bix]);
//scale partial results by values and write results
for( int j = 0; j < numCols; j++ )
c[ _colIndexes[j] ] += vsum * _values[ valOff+j ];
}
}
}
@Override
public void leftMultByRowVector(ColGroupDDC a, MatrixBlock result)
throws DMLRuntimeException
{
//note: this method is only applicable for numrows < blocksize
double[] c = result.getDenseBlock();
final int numCols = getNumCols();
final int numVals = getNumValues();
//iterate over all values and their bitmaps
for (int k=0, valOff=0; k<numVals; k++, valOff+=numCols) {
int boff = _ptr[k];
//iterate over bitmap blocks and add partial results
double vsum = 0;
for( int j = boff+1; j < boff+1+_data[boff]; j++ )
vsum += a.getData(_data[j]);
//scale partial results by values and write results
for( int j = 0; j < numCols; j++ )
c[ _colIndexes[j] ] += vsum * _values[ valOff+j ];
}
}
@Override
protected final void computeSum(MatrixBlock result, KahanFunction kplus)
{
KahanObject kbuff = new KahanObject(result.quickGetValue(0, 0), result.quickGetValue(0, 1));
//iterate over all values and their bitmaps
final int numVals = getNumValues();
final int numCols = getNumCols();
for (int k = 0; k < numVals; k++)
{
int boff = _ptr[k];
int blen = len(k);
int valOff = k * numCols;
//iterate over bitmap blocks and count partial lengths
int count = 0;
for (int bix=0; bix < blen; bix+=_data[boff+bix]+1)
count += _data[boff+bix];
//scale counts by all values
for( int j = 0; j < numCols; j++ )
kplus.execute3(kbuff, _values[ valOff+j ], count);
}
result.quickSetValue(0, 0, kbuff._sum);
result.quickSetValue(0, 1, kbuff._correction);
}
@Override
protected final void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru)
{
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
final int numVals = getNumValues();
double[] c = result.getDenseBlock();
if( ALLOW_CACHE_CONSCIOUS_ROWSUMS &&
LOW_LEVEL_OPT && numVals > 1 && _numRows > blksz )
{
final int blksz2 = ColGroupOffset.WRITE_CACHE_BLKSZ/2;
//step 1: prepare position and value arrays
int[] apos = skipScan(numVals, rl);
double[] aval = sumAllValues(kplus, kbuff, false);
//step 2: cache conscious row sums via horizontal scans
for( int bi=rl; bi<ru; bi+=blksz2 )
{
int bimax = Math.min(bi+blksz2, ru);
//horizontal segment scan, incl pos maintenance
for (int k = 0; k < numVals; k++) {
int boff = _ptr[k];
int blen = len(k);
double val = aval[k];
int bix = apos[k];
for( int ii=bi; ii<bimax && bix<blen; ii+=blksz ) {
//prepare length, start, and end pos
int len = _data[boff+bix];
int pos = boff+bix+1;
//compute partial results
for (int i = 0; i < len; i++) {
int rix = ii + _data[pos + i];
kbuff.set(c[2*rix], c[2*rix+1]);
kplus2.execute2(kbuff, val);
c[2*rix] = kbuff._sum;
c[2*rix+1] = kbuff._correction;
}
bix += len + 1;
}
apos[k] = bix;
}
}
}
else
{
//iterate over all values and their bitmaps
for (int k = 0; k < numVals; k++)
{
//prepare value-to-add for entire value bitmap
int boff = _ptr[k];
int blen = len(k);
double val = sumValues(k, kplus, kbuff);
//iterate over bitmap blocks and add values
if (val != 0) {
int slen;
int bix = skipScanVal(k, rl);
for( int off=((rl+1)/blksz)*blksz; bix<blen && off<ru; bix+=slen+1, off+=blksz ) {
slen = _data[boff+bix];
for (int i = 1; i <= slen; i++) {
int rix = off + _data[boff+bix + i];
kbuff.set(c[2*rix], c[2*rix+1]);
kplus2.execute2(kbuff, val);
c[2*rix] = kbuff._sum;
c[2*rix+1] = kbuff._correction;
}
}
}
}
}
}
@Override
protected final void computeColSums(MatrixBlock result, KahanFunction kplus)
{
KahanObject kbuff = new KahanObject(0, 0);
//iterate over all values and their bitmaps
final int numVals = getNumValues();
final int numCols = getNumCols();
for (int k = 0; k < numVals; k++)
{
int boff = _ptr[k];
int blen = len(k);
int valOff = k * numCols;
//iterate over bitmap blocks and count partial lengths
int count = 0;
for (int bix=0; bix < blen; bix+=_data[boff+bix]+1)
count += _data[boff+bix];
//scale counts by all values
for( int j = 0; j < numCols; j++ ) {
kbuff.set(result.quickGetValue(0, _colIndexes[j]),result.quickGetValue(1, _colIndexes[j]));
kplus.execute3(kbuff, _values[ valOff+j ], count);
result.quickSetValue(0, _colIndexes[j], kbuff._sum);
result.quickSetValue(1, _colIndexes[j], kbuff._correction);
}
}
}
@Override
protected final void computeRowMxx(MatrixBlock result, Builtin builtin, int rl, int ru)
{
//NOTE: zeros handled once for all column groups outside
final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
final int numVals = getNumValues();
double[] c = result.getDenseBlock();
//iterate over all values and their bitmaps
for (int k = 0; k < numVals; k++)
{
//prepare value-to-add for entire value bitmap
int boff = _ptr[k];
int blen = len(k);
double val = mxxValues(k, builtin);
//iterate over bitmap blocks and add values
int slen;
int bix = skipScanVal(k, rl);
for( int off=bix*blksz; bix<blen && off<ru; bix+=slen+1, off+=blksz ) {
slen = _data[boff+bix];
for (int i = 1; i <= slen; i++) {
int rix = off + _data[boff+bix + i];
c[rix] = builtin.execute2(c[rix], val);
}
}
}
}
/**
* Utility function of sparse-unsafe operations.
*
* @return zero indicator vector
*/
@Override
protected boolean[] computeZeroIndicatorVector()
{
boolean[] ret = new boolean[_numRows];
final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
final int numVals = getNumValues();
//initialize everything with zero
Arrays.fill(ret, true);
//iterate over all values and their bitmaps
for (int k = 0; k < numVals; k++)
{
//prepare value-to-add for entire value bitmap
int boff = _ptr[k];
int blen = len(k);
//iterate over bitmap blocks and add values
int off = 0;
int slen;
for( int bix=0; bix < blen; bix+=slen+1, off+=blksz) {
slen = _data[boff+bix];
for (int i = 1; i <= slen; i++) {
ret[off + _data[boff+bix + i]] &= false;
}
}
}
return ret;
}
@Override
protected void countNonZerosPerRow(int[] rnnz, int rl, int ru)
{
final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
final int blksz2 = ColGroupOffset.WRITE_CACHE_BLKSZ;
final int numVals = getNumValues();
final int numCols = getNumCols();
//current pos per OLs / output values
int[] apos = skipScan(numVals, rl);
//cache conscious count via horizontal scans
for( int bi=rl; bi<ru; bi+=blksz2 ) {
int bimax = Math.min(bi+blksz2, ru);
//iterate over all values and their bitmaps
for (int k = 0; k < numVals; k++) {
//prepare value-to-add for entire value bitmap
int boff = _ptr[k];
int blen = len(k);
int bix = apos[k];
//iterate over bitmap blocks and add values
for( int off=bi, slen=0; bix<blen && off<bimax; bix+=slen+1, off+=blksz ) {
slen = _data[boff+bix];
for (int blckIx = 1; blckIx <= slen; blckIx++) {
rnnz[off + _data[boff+bix + blckIx] - rl] += numCols;
}
}
apos[k] = bix;
}
}
}
/////////////////////////////////
// internal helper functions
/**
* Scans to given row_lower position by exploiting any existing
* skip list and scanning segment length fields. Returns array
* of positions for all values.
*
* @param numVals number of values
* @param rl row lower position
* @return array of positions for all values
*/
private int[] skipScan(int numVals, int rl) {
int[] ret = allocIVector(numVals, rl==0);
final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
if( rl > 0 ) { //rl aligned with blksz
int rskip = (getNumRows()/2/blksz)*blksz;
for( int k = 0; k < numVals; k++ ) {
int boff = _ptr[k];
int blen = len(k);
int start = (rl>=rskip)?rskip:0;
int bix = (rl>=rskip)?_skiplist[k]:0;
for( int i=start; i<rl && bix<blen; i+=blksz ) {
bix += _data[boff+bix] + 1;
}
ret[k] = bix;
}
}
return ret;
}
private int skipScanVal(int k, int rl) {
final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
if( rl > 0 ) { //rl aligned with blksz
int rskip = (getNumRows()/2/blksz)*blksz;
int boff = _ptr[k];
int blen = len(k);
int start = (rl>=rskip)?rskip:0;
int bix = (rl>=rskip)?_skiplist[k]:0;
for( int i=start; i<rl && bix<blen; i+=blksz ) {
bix += _data[boff+bix] + 1;
}
return bix;
}
return 0;
}
@Override
public Iterator<Integer> getIterator(int k) {
return new OLEValueIterator(k, 0, getNumRows());
}
@Override
public Iterator<Integer> getIterator(int k, int rl, int ru) {
return new OLEValueIterator(k, rl, ru);
}
@Override
public ColGroupRowIterator getRowIterator(int rl, int ru) {
return new OLERowIterator(rl, ru);
}
private class OLEValueIterator implements Iterator<Integer>
{
private final int _ru;
private final int _boff;
private final int _blen;
private int _bix;
private int _start;
private int _slen;
private int _spos;
private int _rpos;
public OLEValueIterator(int k, int rl, int ru) {
_ru = ru;
_boff = _ptr[k];
_blen = len(k);
//initialize position via segment-aligned skip-scan
int lrl = rl - rl%BitmapEncoder.BITMAP_BLOCK_SZ;
_bix = skipScanVal(k, lrl);
_start = lrl;
//move position to actual rl boundary
if( _bix < _blen ) {
_slen = _data[_boff + _bix];
_spos = 0;
_rpos = _data[_boff + _bix + 1];
while( _rpos < rl )
nextRowOffset();
}
else {
_rpos = _ru;
}
}
@Override
public boolean hasNext() {
return (_rpos < _ru);
}
@Override
public Integer next() {
if( !hasNext() )
throw new RuntimeException("No more OLE entries.");
int ret = _rpos;
nextRowOffset();
return ret;
}
private void nextRowOffset() {
if( _spos+1 < _slen ) {
_spos++;
_rpos = _start + _data[_boff + _bix + _spos + 1];
}
else {
_start += BitmapEncoder.BITMAP_BLOCK_SZ;
_bix += _slen+1;
if( _bix < _blen ) {
_slen = _data[_boff + _bix];
_spos = 0;
_rpos = _start + _data[_boff + _bix + 1];
}
else {
_rpos = _ru;
}
}
}
}
private class OLERowIterator extends ColGroupRowIterator
{
private final int[] _apos;
private final int[] _vcodes;
public OLERowIterator(int rl, int ru) {
_apos = skipScan(getNumValues(), rl);
_vcodes = new int[Math.min(BitmapEncoder.BITMAP_BLOCK_SZ, ru-rl)];
Arrays.fill(_vcodes, -1); //initial reset
getNextSegment();
}
@Override
public void next(double[] buff, int rowIx, int segIx, boolean last) {
final int clen = _colIndexes.length;
final int vcode = _vcodes[segIx];
if( vcode >= 0 ) {
//copy entire value tuple if necessary
for(int j=0, off=vcode*clen; j<clen; j++)
buff[_colIndexes[j]] = _values[off+j];
//reset vcode to avoid scan on next segment
_vcodes[segIx] = -1;
}
if( segIx+1==BitmapEncoder.BITMAP_BLOCK_SZ && !last )
getNextSegment();
}
private void getNextSegment() {
//materialize value codes for entire segment in a
//single pass over all values (store value code by pos)
final int numVals = getNumValues();
for (int k = 0; k < numVals; k++) {
int boff = _ptr[k];
int blen = len(k);
int bix = _apos[k];
if( bix >= blen ) continue;
int slen = _data[boff+bix];
for(int i=0, off=boff+bix+1; i<slen; i++)
_vcodes[_data[off+i]] = k;
_apos[k] += slen+1;
}
}
}
}