blob: d876946be68746379145c4da28da9634665efeda [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysds.runtime.data;
import java.io.Serializable;
import java.util.Iterator;
import org.apache.sysds.runtime.matrix.data.IJV;
/**
* This SparseBlock is an abstraction for different sparse matrix formats.
* Since the design is a tradeoff between performance and generality, we
* restrict this abstraction to row-major sparse representations for now.
* All sparse matrix block operations are supposed to be implemented
* against this abstraction in order to enable variability/extensibility.
*
* Example sparse format that can be implemented efficiently include
* CSR, MCSR, and - with performance drawbacks - COO.
*
*/
public abstract class SparseBlock implements Serializable
{
private static final long serialVersionUID = -5008747088111141395L;
//internal configuration parameters for all sparse blocks
protected static final int INIT_CAPACITY = 4; //initial array capacity
protected static final double RESIZE_FACTOR1 = 2; //factor until reaching est nnz
protected static final double RESIZE_FACTOR2 = 1.1; //factor after reaching est nnz
public enum Type {
MCSR,
CSR,
COO,
}
////////////////////////
//basic allocation
/**
* Allocate the underlying data structure holding non-zero values
* of row r if necessary.
*
* @param r row index
*/
public abstract void allocate(int r);
/**
* Allocate the underlying data structure holding non-zero values
* of row r if necessary, w/ given size.
*
* @param r row index
* @param nnz number of non-zeros
*/
public abstract void allocate(int r, int nnz);
/**
* Allocate the underlying data structure holding non-zero values
* of row r w/ the specified estimated nnz and max nnz.
*
* @param r row index
* @param ennz estimated non-zeros
* @param maxnnz max non-zeros
*/
public abstract void allocate(int r, int ennz, int maxnnz);
/**
* Re-allocate physical row if physical size exceeds
* logical size plus resize factor.
*
* @param r row index
*/
public abstract void compact(int r);
////////////////////////
//obtain basic meta data
/**
* Get the number of rows in the sparse block.
*
* @return number of rows
*/
public abstract int numRows();
/**
* Indicates if the underlying implementation allows thread-safe row
* updates if concurrent threads update disjoint rows.
*
* @return true if thread-safe row updates
*/
public abstract boolean isThreadSafe();
/**
* Indicates if the underlying data structures returned by values
* and indexes are contiguous arrays, which can be exploited for
* more efficient operations.
*
* @return true if underlying data structures are contiguous arrays
*/
public abstract boolean isContiguous();
/**
* Indicates if all non-zero values are aligned with the given
* second sparse block instance, which can be exploited for
* more efficient operations. Two non-zeros are aligned if they
* have the same column index and reside in the same array position.
*
* @param that sparse block
* @return true if all non-zero values are aligned with the given second sparse block
*/
public boolean isAligned(SparseBlock that)
{
//step 1: cheap meta data comparisons
if( numRows() != that.numRows() ) //num rows check
return false;
//step 2: check column indexes per row
int rlen = numRows();
for( int i=0; i<rlen; i++ )
if( !isAligned(i, that) )
return false;
return true;
}
/**
* Indicates if all non-zero values of row r are aligned with
* the same row of the given second sparse block instance, which
* can be exploited for more efficient operations. Two non-zeros
* are aligned if they have the same column index and reside in
* the same array position.
*
* @param r row index starting at 0
* @param that sparse block
* @return true if all non-zero values of row r are aligned with the same row
* of the given second sparse block instance
*/
public boolean isAligned(int r, SparseBlock that)
{
//step 1: cheap meta data comparisons
if( size(r) != that.size(r) || pos(r) != that.pos(r) )
return false;
//step 2: check column indexes per row
if( !isEmpty(r) ) {
int alen = size(r);
int apos = pos(r);
int[] aix = indexes(r);
int[] bix = that.indexes(r);
for( int j=apos; j<apos+alen; j++ )
if( aix[j] != bix[j] )
return false;
}
return true;
}
/**
* Indicates if the underlying data structure for a given row
* is already allocated.
*
* @param r row index
* @return true if already allocated
*/
public abstract boolean isAllocated(int r);
/**
* Clears the sparse block by deleting non-zero values. After this call
* all size() calls are guaranteed to return 0.
*/
public abstract void reset();
/**
* Clears the sparse block by deleting non-zero values. After this call
* all size() calls are guaranteed to return 0.
*
* @param ennz estimated non-zeros
* @param maxnnz max non-zeros
*/
public abstract void reset(int ennz, int maxnnz);
/**
* Clears row r of the sparse block by deleting non-zero values.
* After this call size(r) is guaranteed to return 0.
*
* @param r row index
* @param ennz estimated non-zeros
* @param maxnnz max non-zeros
*/
public abstract void reset(int r, int ennz, int maxnnz);
/**
* Get the number of non-zero values in the sparse block.
*
* @return number of non-zero values in sparse block
*/
public abstract long size();
/**
* Get the number of non-zero values in row r.
*
* @param r row index starting at 0
* @return number of non-zero values in row r
*/
public abstract int size(int r);
/**
* Get the number of non-zeros values in the row range
* of [rl, ru).
*
* @param rl row lower index
* @param ru row upper index
* @return number of non-zero values in the row range
*/
public abstract long size(int rl, int ru);
/**
* Get the number of non-zeros values in the row and column
* range of [rl/cl, ru/cu);
*
* @param rl row lower index
* @param ru row upper index
* @param cl column lower index
* @param cu column upper index
* @return number of non-zero values in the row and column range
*/
public abstract long size(int rl, int ru, int cl, int cu);
/**
* Get information if row r is empty, i.e., does not contain non-zero
* values. Equivalent to size(r)==0. Users should do this check if
* it is unknown if the underlying row data structure is allocated.
*
* @param r row index starting at 0
* @return true if row does not contain non-zero values
*/
public abstract boolean isEmpty(int r);
/**
* Validate the correctness of the internal data structures of the different
* sparse block implementations.
*
* @param rlen number of rows
* @param clen number of columns
* @param nnz number of non zeros
* @param strict enforce optional properties
* @return true if the sparse block is valid wrt the corresponding format
* such as COO, CSR, MCSR.
*/
public abstract boolean checkValidity(int rlen, int clen, long nnz, boolean strict);
////////////////////////
//obtain indexes/values/positions
/**
* Get the sorted array of column indexes of all non-zero entries in
* row r. Note that - for flexibility of the implementing format - the
* returned array may be larger, where the range for row r is given by
* [pos(r),pos(r)+size(r)).
*
* @param r row index starting at 0
* @return sorted array of column indexes
*/
public abstract int[] indexes(int r);
/**
* Get the array of all non-zero entries in row r, sorted by their column
* indexes. Note that - for flexibility of the implementing format - the
* returned array may be larger, where the range for row r is given by
* [pos(r),pos(r)+size(r)).
*
* @param r row index starting at 0
* @return array of all non-zero entries in row r sorted by column indexes
*/
public abstract double[] values(int r);
/**
* Get the starting position of row r in the indexes/values arrays returned
* by indexes(r) and values(r).
*
* @param r row index starting at 0
* @return starting position of row r
*/
public abstract int pos(int r);
////////////////////////
//update operations
/**
* Set the value of a matrix cell (r,c). This might update an existing
* non-zero value, insert a new non-zero value, or delete a non-zero value.
*
* @param r row index starting at 0
* @param c column index starting at 0
* @param v zero or non-zero value
* @return true, if number of non-zeros changed
*/
public abstract boolean set(int r, int c, double v);
/**
* Set the values of row r to the given sparse row. This might update
* existing non-zero values, insert a new row, or delete a row.
*
* NOTE: This method exists for incremental runtime integration and might
* be deleted in the future.
*
* @param r row index starting at 0
* @param row sparse row
* @param deep indicator to create deep copy of sparse row
*/
public abstract void set(int r, SparseRow row, boolean deep);
/**
* Add a value to a matrix cell (r,c). This might update an existing
* non-zero value, or insert a new non-zero value.
*
* @param r row index starting at 0
* @param c column index starting at 0
* @param v zero or non-zero value
* @return true, if number of non-zeros changed
*/
public abstract boolean add(int r, int c, double v);
/**
* Append a value to the end of the physical representation. This should
* only be used for operations with sequential write pattern or if followed
* by a sort() operation. Note that this operation does not perform any
* matrix cell updates.
*
* @param r row index starting at 0
* @param c column index starting at 0
* @param v zero or non-zero value
*/
public abstract void append(int r, int c, double v);
/**
* Sets a dense array of non-zeros values into the column range [cl,cu)
* in row r. The passed value array may be larger and the relevant range
* is given by [vix,vix+len).
*
* @param r row index starting at 0
* @param cl lower column index starting at 0
* @param cu upper column index starting at 0
* @param v value array
* @param vix start index in value array
* @param vlen number of relevant values
*/
public abstract void setIndexRange(int r, int cl, int cu, double[] v, int vix, int vlen);
/**
* Sets a sparse array of non-zeros values and indexes into the column range [cl,cu)
* in row r. The passed value array may be larger.
*
* @param r row index starting at 0
* @param cl lower column index starting at 0
* @param cu upper column index starting at 0
* @param v value array
* @param vix column index array
* @param vpos start index in value and index arrays
* @param vlen number of relevant values
*/
public abstract void setIndexRange(int r, int cl, int cu, double[] v, int[] vix, int vpos, int vlen);
/**
* Deletes all non-zero values of the given column range [cl,cu) in row r.
*
* @param r row index starting at 0
* @param cl lower column index starting at 0
* @param cu upper column index starting at 0
*/
public abstract void deleteIndexRange(int r, int cl, int cu);
/**
* Sort all non-zero value/index pairs of the sparse block by row
* and column index.
*/
public abstract void sort();
/**
* Sort all non-zero value/index pairs of row r column index.
*
* @param r row index starting at 0
*/
public abstract void sort(int r);
////////////////////////
//search operations
/**
* Get value of matrix cell (r,c). In case of non existing values
* this call returns 0.
*
* @param r row index starting at 0
* @param c column index starting at 0
* @return value of cell at position (r,c)
*/
public abstract double get(int r, int c);
/**
* Get values of row r in the format of a sparse row.
*
* NOTE: This method exists for incremental runtime integration and might
* be deleted in the future.
*
* @param r row index starting at 0
* @return values of row r as a sparse row
*/
public abstract SparseRow get(int r);
/**
* Get position of first column index lower than or equal column c
* in row r. The position is relative to the indexes/values arrays
* returned by indexes(r) and values(r). If no such value exists,
* this call returns -1.
*
* @param r row index starting at 0
* @param c column index starting at 0
* @return position of the first column index lower than or equal to column c in row r
*/
public abstract int posFIndexLTE(int r, int c);
/**
* Get position of first column index greater than or equal column c
* in row r. The position is relative to the indexes/values arrays
* returned by indexes(r) and values(r). If no such value exists,
* this call returns -1.
*
* @param r row index starting at 0
* @param c column index starting at 0
* @return position of the first column index greater than or equal to column c in row r
*/
public abstract int posFIndexGTE(int r, int c);
/**
* Get position of first column index greater than column c in row r.
* The position is relative to the indexes/values arrays returned by
* indexes(r) and values(r). If no such value exists, this call
* returns -1.
*
* @param r row index starting at 0
* @param c column index starting at 0
* @return position of the first column index greater than column c in row r
*/
public abstract int posFIndexGT(int r, int c);
////////////////////////
//iterators
/**
* Get a non-zero iterator over the entire sparse block. Note that
* the returned IJV object is reused across next calls and should
* be directly consumed or deep copied.
*
* @return IJV iterator
*/
public Iterator<IJV> getIterator() {
//default generic iterator, override if necessary
return new SparseBlockIterator(numRows());
}
/**
* Get a non-zero iterator over the partial sparse block [0,ru). Note
* that the returned IJV object is reused across next calls and should
* be directly consumed or deep copied.
*
* @param ru exclusive upper row index starting at 0
* @return IJV iterator
*/
public Iterator<IJV> getIterator(int ru) {
//default generic iterator, override if necessary
return new SparseBlockIterator(ru);
}
/**
* Get a non-zero iterator over the subblock [rl, ru). Note that
* the returned IJV object is reused across next calls and should
* be directly consumed or deep copied.
*
* @param rl inclusive lower row index starting at 0
* @param ru exclusive upper row index starting at 0
* @return IJV iterator
*/
public Iterator<IJV> getIterator(int rl, int ru) {
//default generic iterator, override if necessary
return new SparseBlockIterator(rl, Math.min(ru,numRows()));
}
@Override
public abstract String toString();
/**
* Default sparse block iterator implemented against the sparse block
* api in an implementation-agnostic manner.
*
*/
private class SparseBlockIterator implements Iterator<IJV>
{
private int _rlen = 0; //row upper
private int _curRow = -1; //current row
private int _curColIx = -1; //current col index pos
private int[] _curIndexes = null; //current col indexes
private double[] _curValues = null; //current col values
private boolean _noNext = false; //end indicator
private IJV retijv = new IJV(); //reuse output tuple
protected SparseBlockIterator(int ru) {
_rlen = ru;
_curRow = 0;
findNextNonZeroRow();
}
protected SparseBlockIterator(int rl, int ru) {
_rlen = ru;
_curRow = rl;
findNextNonZeroRow();
}
@Override
public boolean hasNext() {
return !_noNext;
}
@Override
public IJV next( ) {
retijv.set(_curRow, _curIndexes[_curColIx], _curValues[_curColIx]);
//NOTE: no preincrement on curcolix to avoid OpenJDK8 escape analysis bug, encountered
//with tests SparsityRecompileTest/SparsityFunctionRecompileTest on parfor local result merge
if( _curColIx < pos(_curRow)+size(_curRow)-1 )
_curColIx++;
else {
_curRow++;
findNextNonZeroRow();
}
return retijv;
}
@Override
public void remove() {
throw new RuntimeException("SparseBlockIterator is unsupported!");
}
/**
* Moves cursor to next non-zero value or indicates that no more
* values are available.
*/
private void findNextNonZeroRow() {
while( _curRow<_rlen && isEmpty(_curRow))
_curRow++;
if(_curRow >= _rlen)
_noNext = true;
else {
_curColIx = pos(_curRow);
_curIndexes = indexes(_curRow);
_curValues = values(_curRow);
}
}
}
}