| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.sysds.runtime.compress.colgroup; |
| |
| import java.io.DataInput; |
| import java.io.DataOutput; |
| import java.io.IOException; |
| import java.util.Arrays; |
| |
| import org.apache.commons.lang.NotImplementedException; |
| import org.apache.sysds.runtime.compress.DMLCompressionException; |
| import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; |
| import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; |
| import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; |
| import org.apache.sysds.runtime.compress.colgroup.dictionary.MatrixBlockDictionary; |
| import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory; |
| import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex; |
| import org.apache.sysds.runtime.compress.colgroup.mapping.MapToZero; |
| import org.apache.sysds.runtime.compress.colgroup.offset.AIterator; |
| import org.apache.sysds.runtime.compress.colgroup.offset.AOffset; |
| import org.apache.sysds.runtime.compress.colgroup.offset.AOffset.OffsetSliceInfo; |
| import org.apache.sysds.runtime.compress.colgroup.offset.AOffsetIterator; |
| import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory; |
| import org.apache.sysds.runtime.compress.colgroup.scheme.ICLAScheme; |
| import org.apache.sysds.runtime.compress.cost.ComputationCostEstimator; |
| import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup; |
| import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory; |
| import org.apache.sysds.runtime.compress.estim.encoding.IEncode; |
| import org.apache.sysds.runtime.data.DenseBlock; |
| import org.apache.sysds.runtime.data.SparseBlock; |
| import org.apache.sysds.runtime.functionobjects.Builtin; |
| import org.apache.sysds.runtime.matrix.data.MatrixBlock; |
| import org.apache.sysds.runtime.matrix.operators.BinaryOperator; |
| import org.apache.sysds.runtime.matrix.operators.ScalarOperator; |
| import org.apache.sysds.runtime.matrix.operators.UnaryOperator; |
| |
| /** |
| * Column group that sparsely encodes the dictionary values. The idea is that all values is encoded with indexes except |
| * the most common one. the most common one can be inferred by not being included in the indexes. If the values are very |
| * sparse then the most common one is zero. |
| * |
| * This column group is handy in cases where sparse unsafe operations is executed on very sparse columns. Then the zeros |
| * would be materialized in the group without any overhead. |
| */ |
| public class ColGroupSDCSingleZeros extends ASDCZero { |
| private static final long serialVersionUID = 8033235615964315078L; |
| |
| private ColGroupSDCSingleZeros(IColIndex colIndices, int numRows, ADictionary dict, AOffset offsets, |
| int[] cachedCounts) { |
| super(colIndices, numRows, dict, offsets, cachedCounts); |
| if(offsets.getSize() * 2 > numRows + 2) |
| throw new DMLCompressionException("Wrong direction of SDCSingleZero compression should be other way " + numRows |
| + " vs " + _indexes + "\n" + this); |
| } |
| |
| public static AColGroup create(IColIndex colIndices, int numRows, ADictionary dict, AOffset offsets, |
| int[] cachedCounts) { |
| if(dict == null) |
| return new ColGroupEmpty(colIndices); |
| else if(offsets.getSize() * 2 > numRows + 2) { |
| AOffset rev = AOffset.reverse(numRows, offsets); |
| ADictionary empty = MatrixBlockDictionary.create(new MatrixBlock(1, colIndices.size(), true)); |
| return ColGroupSDCSingle.create(colIndices, numRows, empty, dict.getValues(), rev, null); |
| } |
| else |
| return new ColGroupSDCSingleZeros(colIndices, numRows, dict, offsets, cachedCounts); |
| } |
| |
| @Override |
| public CompressionType getCompType() { |
| return CompressionType.SDC; |
| } |
| |
| @Override |
| public ColGroupType getColGroupType() { |
| return ColGroupType.SDCSingleZeros; |
| } |
| |
| @Override |
| protected void decompressToDenseBlockDenseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, |
| double[] values) { |
| final AIterator it = _indexes.getIterator(rl); |
| if(it == null) |
| return; |
| else if(it.value() >= ru) |
| return; |
| // _indexes.cacheIterator(it, ru); |
| else { |
| decompressToDenseBlockDenseDictionaryWithProvidedIterator(db, rl, ru, offR, offC, values, it); |
| // _indexes.cacheIterator(it, ru); |
| } |
| } |
| |
| @Override |
| public void decompressToDenseBlockDenseDictionaryWithProvidedIterator(DenseBlock db, int rl, int ru, int offR, |
| int offC, double[] values, AIterator it) { |
| final int last = _indexes.getOffsetToLast(); |
| if(it == null || it.value() >= ru || rl > last) |
| return; |
| else if(ru > _indexes.getOffsetToLast()) |
| decompressToDenseBlockDenseDictionaryPost(db, rl, ru, offR, offC, values, it); |
| else { |
| if(_colIndexes.size() == 1 && db.getDim(1) == 1) |
| decompressToDenseBlockDenseDictionaryPreSingleColOutContiguous(db, rl, ru, offR, offC, values[0], it); |
| else |
| decompressToDenseBlockDenseDictionaryPre(db, rl, ru, offR, offC, values, it); |
| } |
| } |
| |
| private void decompressToDenseBlockDenseDictionaryPost(DenseBlock db, int rl, int ru, int offR, int offC, |
| double[] values, AIterator it) { |
| final int maxOff = _indexes.getOffsetToLast(); |
| final int nCol = _colIndexes.size(); |
| int row = offR + it.value(); |
| double[] c = db.values(row); |
| int off = db.pos(row); |
| for(int j = 0; j < nCol; j++) |
| c[off + _colIndexes.get(j) + offC] += values[j]; |
| while(it.value() < maxOff) { |
| it.next(); |
| row = offR + it.value(); |
| c = db.values(row); |
| off = db.pos(row); |
| for(int j = 0; j < nCol; j++) |
| c[off + _colIndexes.get(j) + offC] += values[j]; |
| |
| } |
| } |
| |
| private void decompressToDenseBlockDenseDictionaryPreSingleColOutContiguous(DenseBlock db, int rl, int ru, int offR, |
| int offC, double v, AIterator it) { |
| final double[] c = db.values(0); |
| int r = it.value(); |
| while(r < ru) { |
| c[offR + r] += v; |
| r = it.next(); |
| } |
| } |
| |
| private void decompressToDenseBlockDenseDictionaryPre(DenseBlock db, int rl, int ru, int offR, int offC, |
| double[] values, AIterator it) { |
| final int nCol = _colIndexes.size(); |
| int r = it.value(); |
| while(r < ru) { |
| final int row = offR + r; |
| final double[] c = db.values(row); |
| final int off = db.pos(row); |
| for(int j = 0; j < nCol; j++) |
| c[off + _colIndexes.get(j) + offC] += values[j]; |
| |
| r = it.next(); |
| } |
| } |
| |
| @Override |
| protected void decompressToDenseBlockSparseDictionary(DenseBlock db, int rl, int ru, int offR, int offC, |
| SparseBlock sb) { |
| final AIterator it = _indexes.getIterator(rl); |
| if(it == null) |
| return; |
| else if(it.value() >= ru) |
| _indexes.cacheIterator(it, ru); |
| |
| final int last = _indexes.getOffsetToLast(); |
| if(ru > last) |
| decompressToDenseBlockSparseDictionaryPost(db, rl, ru, offR, offC, sb, it, last); |
| else |
| decompressToDenseBlockSparseDictionaryPre(db, rl, ru, offR, offC, sb, it); |
| |
| } |
| |
| private final void decompressToDenseBlockSparseDictionaryPost(DenseBlock db, int rl, int ru, int offR, int offC, |
| SparseBlock sb, AIterator it, int last) { |
| final int apos = sb.pos(0); |
| final int alen = sb.size(0) + apos; |
| final double[] avals = sb.values(0); |
| final int[] aix = sb.indexes(0); |
| while(true) { |
| final int idx = offR + it.value(); |
| final double[] c = db.values(idx); |
| |
| final int off = db.pos(idx) + offC; |
| for(int j = apos; j < alen; j++) |
| c[off + _colIndexes.get(aix[j])] += avals[j]; |
| if(it.value() == last) |
| return; |
| it.next(); |
| } |
| } |
| |
| private final void decompressToDenseBlockSparseDictionaryPre(DenseBlock db, int rl, int ru, int offR, int offC, |
| SparseBlock sb, AIterator it) { |
| final int apos = sb.pos(0); |
| final int alen = sb.size(0) + apos; |
| final int[] aix = sb.indexes(0); |
| final double[] avals = sb.values(0); |
| while(it.isNotOver(ru)) { |
| final int row = offR + it.value(); |
| final double[] c = db.values(row); |
| final int off = db.pos(row); |
| for(int j = apos; j < alen; j++) |
| c[off + _colIndexes.get(aix[j]) + offC] += avals[j]; |
| it.next(); |
| } |
| _indexes.cacheIterator(it, ru); |
| } |
| |
| @Override |
| protected void decompressToSparseBlockSparseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, |
| SparseBlock sb) { |
| final AIterator it = _indexes.getIterator(rl); |
| final int last = _indexes.getOffsetToLast(); |
| if(it == null) |
| return; |
| else if(it.value() >= ru) |
| _indexes.cacheIterator(it, ru); |
| else if(ru > last) { |
| final int apos = sb.pos(0); |
| final int alen = sb.size(0) + apos; |
| final int[] aix = sb.indexes(0); |
| final double[] avals = sb.values(0); |
| while(it.value() < last) { |
| final int row = offR + it.value(); |
| for(int j = apos; j < alen; j++) |
| ret.append(row, _colIndexes.get(aix[j]) + offC, avals[j]); |
| it.next(); |
| } |
| final int row = offR + it.value(); |
| for(int j = apos; j < alen; j++) |
| ret.append(row, _colIndexes.get(aix[j]) + offC, avals[j]); |
| } |
| else { |
| final int apos = sb.pos(0); |
| final int alen = sb.size(0) + apos; |
| final int[] aix = sb.indexes(0); |
| final double[] avals = sb.values(0); |
| while(it.isNotOver(ru)) { |
| final int row = offR + it.value(); |
| for(int j = apos; j < alen; j++) |
| ret.append(row, _colIndexes.get(aix[j]) + offC, avals[j]); |
| |
| it.next(); |
| } |
| _indexes.cacheIterator(it, ru); |
| } |
| } |
| |
| @Override |
| protected void decompressToSparseBlockDenseDictionary(SparseBlock ret, int rl, int ru, int offR, int offC, |
| double[] values) { |
| final AIterator it = _indexes.getIterator(rl); |
| if(it == null) |
| return; |
| else if(it.value() >= ru) |
| _indexes.cacheIterator(it, ru); |
| else if(ru > _indexes.getOffsetToLast()) { |
| final int nCol = _colIndexes.size(); |
| final int lastOff = _indexes.getOffsetToLast(); |
| int row = offR + it.value(); |
| for(int j = 0; j < nCol; j++) |
| ret.append(row, _colIndexes.get(j) + offC, values[j]); |
| while(it.value() < lastOff) { |
| it.next(); |
| row = offR + it.value(); |
| for(int j = 0; j < nCol; j++) |
| ret.append(row, _colIndexes.get(j) + offC, values[j]); |
| } |
| } |
| else { |
| final int nCol = _colIndexes.size(); |
| while(it.isNotOver(ru)) { |
| final int row = offR + it.value(); |
| for(int j = 0; j < nCol; j++) |
| ret.append(row, _colIndexes.get(j) + offC, values[j]); |
| |
| it.next(); |
| } |
| _indexes.cacheIterator(it, ru); |
| } |
| } |
| |
| @Override |
| public double getIdx(int r, int colIdx) { |
| final AIterator it = _indexes.getIterator(r); |
| if(it == null || it.value() != r) |
| return 0; |
| return _dict.getValue(colIdx); |
| } |
| |
| @Override |
| protected void computeRowSums(double[] c, int rl, int ru, double[] preAgg) { |
| computeRowSum(c, rl, ru, preAgg[0]); |
| } |
| |
| protected void computeRowSum(double[] c, int rl, int ru, double def) { |
| final AIterator it = _indexes.getIterator(rl); |
| if(it == null) |
| return; |
| else if(it.value() > ru) |
| _indexes.cacheIterator(it, ru); |
| else if(ru > _indexes.getOffsetToLast()) { |
| final int maxOff = _indexes.getOffsetToLast(); |
| while(true) { |
| c[it.value()] += def; |
| if(it.value() == maxOff) |
| break; |
| it.next(); |
| } |
| } |
| else { |
| while(it.isNotOver(ru)) { |
| c[it.value()] += def; |
| it.next(); |
| } |
| _indexes.cacheIterator(it, ru); |
| } |
| } |
| |
| @Override |
| protected void computeRowMxx(double[] c, Builtin builtin, int rl, int ru, double[] preAgg) { |
| ColGroupSDCSingle.computeRowMxx(c, builtin, rl, ru, _indexes, _numRows, 0, preAgg[0]); |
| } |
| |
| @Override |
| protected void computeRowProduct(double[] c, int rl, int ru, double[] preAgg) { |
| ColGroupSDCSingle.computeRowProduct(c, rl, ru, _indexes, _numRows, 0, preAgg[0]); |
| } |
| |
| @Override |
| public int[] getCounts(int[] counts) { |
| counts[0] = _indexes.getSize(); |
| return counts; |
| } |
| |
| @Override |
| protected void multiplyScalar(double v, double[] resV, int offRet, AIterator it) { |
| _dict.multiplyScalar(v, resV, offRet, 0, _colIndexes); |
| } |
| |
| @Override |
| public void preAggregateDense(MatrixBlock m, double[] preAgg, int rl, int ru, int cl, int cu) { |
| if(!m.getDenseBlock().isContiguous()) |
| throw new NotImplementedException("Not implemented support for preAggregate non contiguous dense matrix"); |
| final AIterator it = _indexes.getIterator(cl); |
| final double[] vals = m.getDenseBlockValues(); |
| final int nCol = m.getNumColumns(); |
| if(it == null) |
| return; |
| else if(it.value() > cu) |
| _indexes.cacheIterator(it, cu); |
| else if(cu < _indexes.getOffsetToLast() + 1) { |
| while(it.value() < cu) { |
| final int start = it.value() + nCol * rl; |
| final int end = it.value() + nCol * ru; |
| for(int offOut = 0, off = start; off < end; offOut++, off += nCol) |
| preAgg[offOut] += vals[off]; |
| it.next(); |
| } |
| _indexes.cacheIterator(it, cu); |
| } |
| else { |
| int of = it.value(); |
| int start = of + nCol * rl; |
| int end = of + nCol * ru; |
| for(int offOut = 0, off = start; off < end; offOut++, off += nCol) |
| preAgg[offOut] += vals[off]; |
| while(of < _indexes.getOffsetToLast()) { |
| it.next(); |
| of = it.value(); |
| start = of + nCol * rl; |
| end = of + nCol * ru; |
| for(int offOut = 0, off = start; off < end; offOut++, off += nCol) |
| preAgg[offOut] += vals[off]; |
| } |
| } |
| } |
| |
| @Override |
| public void preAggregateSparse(SparseBlock sb, double[] preAgg, int rl, int ru) { |
| final AOffsetIterator it = _indexes.getOffsetIterator(); |
| if(rl == ru - 1) |
| preAggregateSparseSingleRow(sb, preAgg, rl, _indexes.getOffsetToLast(), it); |
| else |
| preAggregateSparseMultiRow(sb, preAgg, rl, ru, _indexes.getOffsetToLast(), it); |
| } |
| |
| private static void preAggregateSparseSingleRow(final SparseBlock sb, final double[] preAgg, final int r, |
| final int last, final AOffsetIterator it) { |
| if(sb.isEmpty(r)) |
| return; |
| |
| final int apos = sb.pos(r); |
| final int alen = sb.size(r) + apos; |
| final int[] aix = sb.indexes(r); |
| final double[] avals = sb.values(r); |
| |
| double ret = 0; |
| int i = it.value(); |
| int j = apos; |
| while(i < last && j < alen) { |
| final int idx = aix[j]; |
| if(idx == i) { |
| ret += avals[j++]; |
| i = it.next(); |
| } |
| else if(idx < i) |
| j++; |
| else |
| i = it.next(); |
| } |
| |
| while(j < alen && aix[j] < last) |
| j++; |
| |
| if(j < alen && aix[j] == last) |
| ret += avals[j]; |
| |
| preAgg[0] = ret; |
| } |
| |
| private static void preAggregateSparseMultiRow(final SparseBlock sb, final double[] preAgg, final int rl, |
| final int ru, final int last, final AOffsetIterator it) { |
| |
| int i = it.value(); |
| final int[] aOffs = new int[ru - rl]; |
| |
| // Initialize offsets for each row |
| for(int r = rl; r < ru; r++) |
| aOffs[r - rl] = sb.pos(r); |
| |
| while(i < last) { // while we are not done iterating |
| for(int r = rl; r < ru; r++) { |
| if(sb.isEmpty(r)) |
| continue; |
| final int off = r - rl; |
| int apos = aOffs[off]; // current offset |
| final int alen = sb.size(r) + sb.pos(r); |
| final int[] aix = sb.indexes(r); |
| while(apos < alen && aix[apos] < i)// increment all pointers to offset |
| apos++; |
| |
| if(apos < alen && aix[apos] == i) |
| preAgg[off] += sb.values(r)[apos]; |
| aOffs[off] = apos; |
| } |
| i = it.next(); |
| } |
| |
| // process final element |
| for(int r = rl; r < ru; r++) { |
| if(sb.isEmpty(r)) |
| continue; |
| final int off = r - rl; |
| int apos = aOffs[off]; |
| final int alen = sb.size(r) + sb.pos(r); |
| final int[] aix = sb.indexes(r); |
| while(apos < alen && aix[apos] < last) |
| apos++; |
| |
| if(apos < alen && aix[apos] == last) |
| preAgg[off] += sb.values(r)[apos]; |
| aOffs[off] = apos; |
| } |
| |
| } |
| |
| @Override |
| public long estimateInMemorySize() { |
| long size = super.estimateInMemorySize(); |
| size += _indexes.getInMemorySize(); |
| return size; |
| } |
| |
| @Override |
| public AColGroup scalarOperation(ScalarOperator op) { |
| final double val0 = op.executeScalar(0); |
| final boolean isSparseSafeOp = val0 == 0; |
| final ADictionary nDict = _dict.applyScalarOp(op); |
| if(isSparseSafeOp) |
| return create(_colIndexes, _numRows, nDict, _indexes, getCachedCounts()); |
| else { |
| final double[] defaultTuple = new double[_colIndexes.size()]; |
| Arrays.fill(defaultTuple, val0); |
| return ColGroupSDCSingle.create(_colIndexes, _numRows, nDict, defaultTuple, _indexes, getCachedCounts()); |
| } |
| } |
| |
| @Override |
| public AColGroup unaryOperation(UnaryOperator op) { |
| final double val0 = op.fn.execute(0); |
| final ADictionary nDict = _dict.applyUnaryOp(op); |
| if(val0 == 0) |
| return create(_colIndexes, _numRows, nDict, _indexes, getCachedCounts()); |
| else { |
| final double[] defaultTuple = new double[_colIndexes.size()]; |
| Arrays.fill(defaultTuple, val0); |
| return ColGroupSDCSingle.create(_colIndexes, _numRows, nDict, defaultTuple, _indexes, getCachedCounts()); |
| } |
| } |
| |
| @Override |
| protected double computeMxx(double c, Builtin builtin) { |
| c = builtin.execute(c, 0); |
| return _dict.aggregate(c, builtin); |
| } |
| |
| @Override |
| protected void computeColMxx(double[] c, Builtin builtin) { |
| for(int x = 0; x < _colIndexes.size(); x++) |
| c[_colIndexes.get(x)] = builtin.execute(c[_colIndexes.get(x)], 0); |
| |
| _dict.aggregateCols(c, builtin, _colIndexes); |
| } |
| |
| @Override |
| public boolean containsValue(double pattern) { |
| return (pattern == 0) || _dict.containsValue(pattern); |
| } |
| |
| @Override |
| public AColGroup binaryRowOpLeft(BinaryOperator op, double[] v, boolean isRowSafe) { |
| if(isRowSafe) { |
| ADictionary ret = _dict.binOpLeft(op, v, _colIndexes); |
| return ColGroupSDCSingleZeros.create(_colIndexes, _numRows, ret, _indexes, getCachedCounts()); |
| } |
| else { |
| ADictionary newDict = _dict.binOpLeft(op, v, _colIndexes); |
| double[] defaultTuple = new double[_colIndexes.size()]; |
| for(int i = 0; i < _colIndexes.size(); i++) |
| defaultTuple[i] = op.fn.execute(v[_colIndexes.get(i)], 0); |
| return ColGroupSDCSingle.create(_colIndexes, _numRows, newDict, defaultTuple, _indexes, getCachedCounts()); |
| } |
| } |
| |
| @Override |
| public AColGroup binaryRowOpRight(BinaryOperator op, double[] v, boolean isRowSafe) { |
| if(isRowSafe) { |
| ADictionary ret = _dict.binOpRight(op, v, _colIndexes); |
| return ColGroupSDCSingleZeros.create(_colIndexes, _numRows, ret, _indexes, getCachedCounts()); |
| } |
| else { |
| ADictionary newDict = _dict.binOpRight(op, v, _colIndexes); |
| double[] defaultTuple = new double[_colIndexes.size()]; |
| for(int i = 0; i < _colIndexes.size(); i++) |
| defaultTuple[i] = op.fn.execute(0, v[_colIndexes.get(i)]); |
| return ColGroupSDCSingle.create(_colIndexes, _numRows, newDict, defaultTuple, _indexes, getCachedCounts()); |
| } |
| } |
| |
| @Override |
| public void write(DataOutput out) throws IOException { |
| super.write(out); |
| _indexes.write(out); |
| } |
| |
| public static ColGroupSDCSingleZeros read(DataInput in, int nRows) throws IOException { |
| IColIndex cols = ColIndexFactory.read(in); |
| ADictionary dict = DictionaryFactory.read(in); |
| AOffset indexes = OffsetFactory.readIn(in); |
| return new ColGroupSDCSingleZeros(cols, nRows, dict, indexes, null); |
| } |
| |
| @Override |
| public long getExactSizeOnDisk() { |
| long ret = super.getExactSizeOnDisk(); |
| ret += _indexes.getExactSizeOnDisk(); |
| return ret; |
| } |
| |
| @Override |
| public boolean sameIndexStructure(AColGroupCompressed that) { |
| if(that instanceof ColGroupSDCSingleZeros) { |
| ColGroupSDCSingleZeros th = (ColGroupSDCSingleZeros) that; |
| return th._indexes == _indexes; |
| } |
| else if(that instanceof ColGroupSDCSingle) { |
| ColGroupSDCSingle th = (ColGroupSDCSingle) that; |
| return th._indexes == _indexes; |
| } |
| else |
| return false; |
| } |
| |
| @Override |
| protected AColGroup fixColIndexes(IColIndex newColIndex, int[] reordering) { |
| return ColGroupSDCSingleZeros.create(newColIndex, getNumRows(), _dict.reorder(reordering), _indexes, |
| getCachedCounts()); |
| } |
| |
| @Override |
| public void preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) { |
| final AOffsetIterator itThis = _indexes.getOffsetIterator(); |
| final int nCol = that._colIndexes.size(); |
| final int finalOffThis = _indexes.getOffsetToLast(); |
| final double[] rV = ret.getValues(); |
| if(nCol == 1) |
| preAggregateThatDDCStructureSingleCol(that, rV, itThis, finalOffThis); |
| else |
| preAggregateThatDDCStructureMultiCol(that, rV, itThis, finalOffThis, nCol); |
| } |
| |
| private void preAggregateThatDDCStructureSingleCol(ColGroupDDC that, double[] rV, AOffsetIterator itThis, |
| int finalOffThis) { |
| double rv = 0; |
| final double[] tV = that._dict.getValues(); |
| while(true) { |
| final int v = itThis.value(); |
| rv += tV[that._data.getIndex(v)]; |
| if(v >= finalOffThis) |
| break; |
| itThis.next(); |
| } |
| |
| rV[0] += rv; |
| } |
| |
| private void preAggregateThatDDCStructureMultiCol(ColGroupDDC that, double[] rV, AOffsetIterator itThis, |
| int finalOffThis, int nCol) { |
| while(true) { |
| final int v = itThis.value(); |
| final int fr = that._data.getIndex(v); |
| that._dict.addToEntry(rV, fr, 0, nCol); |
| if(v >= finalOffThis) |
| break; |
| itThis.next(); |
| } |
| } |
| |
| @Override |
| public void preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) { |
| final AIterator itThat = that._indexes.getIterator(); |
| final AOffsetIterator itThis = _indexes.getOffsetIterator(); |
| final int nCol = that._colIndexes.size(); |
| final int finalOffThis = _indexes.getOffsetToLast(); |
| final int finalOffThat = that._indexes.getOffsetToLast(); |
| final double[] rV = ret.getValues(); |
| if(nCol == 1) |
| preAggregateThatSDCZerosStructureSingleCol(that, rV, itThat, finalOffThat, itThis, finalOffThis); |
| else |
| preAggregateThatSDCZerosStructureMultiCol(that, rV, itThat, finalOffThat, itThis, finalOffThis, nCol); |
| } |
| |
| private void preAggregateThatSDCZerosStructureSingleCol(ColGroupSDCZeros that, double[] rV, AIterator itThat, |
| int finalOffThat, AOffsetIterator itThis, int finalOffThis) { |
| double rv = 0; |
| final double[] tV = that._dict.getValues(); |
| while(true) { |
| final int tv = itThat.value(); |
| final int v = itThis.value(); |
| if(tv == v) { |
| rv += tV[that._data.getIndex(itThat.getDataIndex())]; |
| if(tv >= finalOffThat || v >= finalOffThis) |
| break; |
| itThat.next(); |
| itThis.next(); |
| } |
| else if(tv < v) { |
| if(tv >= finalOffThat) |
| break; |
| itThat.next(); |
| } |
| else { |
| if(v >= finalOffThis) |
| break; |
| itThis.next(); |
| } |
| } |
| rV[0] += rv; |
| } |
| |
| private void preAggregateThatSDCZerosStructureMultiCol(ColGroupSDCZeros that, double[] rV, AIterator itThat, |
| int finalOffThat, AOffsetIterator itThis, int finalOffThis, int nCol) { |
| while(true) { |
| final int tv = itThat.value(); |
| final int v = itThis.value(); |
| if(tv == v) { |
| that._dict.addToEntry(rV, that._data.getIndex(itThat.getDataIndex()), 0, nCol); |
| if(tv >= finalOffThat || v >= finalOffThis) |
| break; |
| itThat.next(); |
| itThis.next(); |
| } |
| else if(tv < v) { |
| if(tv >= finalOffThat) |
| break; |
| itThat.next(); |
| } |
| else { |
| if(v >= finalOffThis) |
| break; |
| itThis.next(); |
| } |
| } |
| } |
| |
| @Override |
| public void preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) { |
| final int nCol = that._colIndexes.size(); |
| final AOffsetIterator itThis = _indexes.getOffsetIterator(); |
| final AOffsetIterator itThat = that._indexes.getOffsetIterator(); |
| final int finalOffThis = _indexes.getOffsetToLast(); |
| final int finalOffThat = that._indexes.getOffsetToLast(); |
| int count = 0; |
| int tv = itThat.value(); |
| int v = itThis.value(); |
| while(tv < finalOffThat && v < finalOffThis) { |
| if(tv == v) { |
| count++; |
| tv = itThat.next(); |
| v = itThis.next(); |
| } |
| else if(tv < v) |
| tv = itThat.next(); |
| else |
| v = itThis.next(); |
| } |
| while(tv < finalOffThat && tv < v) |
| tv = itThat.next(); |
| while(v < finalOffThis && v < tv) |
| v = itThis.next(); |
| if(tv == v) |
| count++; |
| |
| that._dict.addToEntry(ret.getValues(), 0, 0, nCol, count); |
| |
| } |
| |
| @Override |
| protected void preAggregateThatRLEStructure(ColGroupRLE that, Dictionary ret) { |
| final int finalOff = _indexes.getOffsetToLast(); |
| final double[] v = ret.getValues(); |
| final int nv = that.getNumValues(); |
| final int nCol = that._colIndexes.size(); |
| for(int k = 0; k < nv; k++) { |
| final AOffsetIterator itThis = _indexes.getOffsetIterator(); |
| final int blen = that._ptr[k + 1]; |
| for(int apos = that._ptr[k], rs = 0, re = 0; apos < blen; apos += 2) { |
| rs = re + that._data[apos]; |
| re = rs + that._data[apos + 1]; |
| // if index is later than run continue |
| if(itThis.value() >= re || rs == re || rs > finalOff) |
| continue; |
| // while lower than run iterate through |
| while(itThis.value() < rs && itThis.value() != finalOff) |
| itThis.next(); |
| // process inside run |
| for(int rix = itThis.value(); rix < re; rix = itThis.value()) { // nice skip inside runs |
| that._dict.addToEntry(v, k, 0, nCol); |
| if(itThis.value() == finalOff) // break if final. |
| break; |
| itThis.next(); |
| } |
| } |
| } |
| } |
| |
| @Override |
| public int getPreAggregateSize() { |
| return 1; |
| } |
| |
| @Override |
| public AColGroup replace(double pattern, double replace) { |
| ADictionary replaced = _dict.replace(pattern, replace, _colIndexes.size()); |
| if(pattern == 0) { |
| double[] defaultTuple = new double[_colIndexes.size()]; |
| for(int i = 0; i < _colIndexes.size(); i++) |
| defaultTuple[i] = replace; |
| return ColGroupSDCSingle.create(_colIndexes, _numRows, replaced, defaultTuple, _indexes, getCachedCounts()); |
| } |
| return copyAndSet(replaced); |
| } |
| |
| @Override |
| protected void computeProduct(double[] c, int nRows) { |
| c[0] = 0; |
| } |
| |
| @Override |
| protected void computeColProduct(double[] c, int nRows) { |
| for(int i = 0; i < _colIndexes.size(); i++) |
| c[_colIndexes.get(i)] = 0; |
| } |
| |
| @Override |
| public double getCost(ComputationCostEstimator e, int nRows) { |
| final int nVals = getNumValues(); |
| final int nCols = getNumCols(); |
| final int nRowsScanned = getCounts()[0]; |
| return e.getCost(nRows, nRowsScanned, nCols, nVals, _dict.getSparsity()); |
| } |
| |
| @Override |
| protected int numRowsToMultiply() { |
| return getCounts()[0]; |
| } |
| |
| @Override |
| protected AColGroup allocateRightMultiplication(MatrixBlock right, IColIndex colIndexes, ADictionary preAgg) { |
| if(colIndexes != null && preAgg != null) |
| return create(colIndexes, _numRows, preAgg, _indexes, getCachedCounts()); |
| else |
| return null; |
| } |
| |
| @Override |
| public AColGroup sliceRows(int rl, int ru) { |
| OffsetSliceInfo off = _indexes.slice(rl, ru); |
| if(off.lIndex == -1) |
| return null; |
| return create(_colIndexes, ru - rl, _dict, off.offsetSlice, null); |
| } |
| |
| @Override |
| protected AColGroup copyAndSet(IColIndex colIndexes, ADictionary newDictionary) { |
| return create(colIndexes, _numRows, newDictionary, _indexes, getCachedCounts()); |
| } |
| |
| @Override |
| public AColGroup append(AColGroup g) { |
| return null; |
| } |
| |
| @Override |
| public AColGroup appendNInternal(AColGroup[] g) { |
| int sumRows = getNumRows(); |
| for(int i = 1; i < g.length; i++) { |
| if(!_colIndexes.equals(g[i]._colIndexes)) { |
| LOG.warn("Not same columns therefore not appending \n" + _colIndexes + "\n\n" + g[i]._colIndexes); |
| return null; |
| } |
| |
| if(!(g[i] instanceof ColGroupSDCSingleZeros)) { |
| LOG.warn("Not SDCFOR but " + g[i].getClass().getSimpleName()); |
| return null; |
| } |
| |
| final ColGroupSDCSingleZeros gc = (ColGroupSDCSingleZeros) g[i]; |
| if(!gc._dict.equals(_dict)) { |
| LOG.warn("Not same Dictionaries therefore not appending \n" + _dict + "\n\n" + gc._dict); |
| return null; |
| } |
| sumRows += gc.getNumRows(); |
| } |
| AOffset no = _indexes.appendN(Arrays.copyOf(g, g.length, AOffsetsGroup[].class), getNumRows()); |
| return create(_colIndexes, sumRows, _dict, no, null); |
| } |
| |
| @Override |
| public ICLAScheme getCompressionScheme() { |
| return null; |
| } |
| |
| @Override |
| public AColGroup recompress() { |
| return this; |
| } |
| |
| @Override |
| public CompressedSizeInfoColGroup getCompressionInfo(int nRow) { |
| throw new NotImplementedException(); |
| } |
| |
| @Override |
| public IEncode getEncoding() { |
| return EncodingFactory.create(new MapToZero(getCounts()[0]), _indexes, _numRows); |
| } |
| |
| @Override |
| public String toString() { |
| StringBuilder sb = new StringBuilder(); |
| sb.append(super.toString()); |
| sb.append(String.format("\n%15s", "Indexes: ")); |
| sb.append(_indexes.toString()); |
| return sb.toString(); |
| } |
| } |