blob: 872e512adf262f113b7ab33adfa57d9a09ac07f7 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysds.runtime.compress.estim.encoding;
import java.util.Map;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.sysds.runtime.compress.CompressionSettings;
import org.apache.sysds.runtime.compress.DMLCompressionException;
import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
import org.apache.sysds.runtime.compress.colgroup.offset.AIterator;
import org.apache.sysds.runtime.compress.colgroup.offset.AOffset;
import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory;
import org.apache.sysds.runtime.compress.estim.EstimationFactors;
import org.apache.sysds.runtime.compress.utils.IntArrayList;
/**
* A Encoding that contain a default value that is not encoded and every other value is encoded in the map. The logic is
* similar to the SDC column group.
*/
public class SparseEncoding implements IEncode {
/** A map to the distinct values contained */
protected final AMapToData map;
/** A Offset index structure to indicate space of zero values */
protected final AOffset off;
/** Total number of rows encoded */
protected final int nRows;
protected SparseEncoding(AMapToData map, AOffset off, int nRows) {
this.map = map;
this.off = off;
this.nRows = nRows;
}
@Override
public IEncode combine(IEncode e) {
if(e instanceof EmptyEncoding || e instanceof ConstEncoding)
return this;
else if(e instanceof SparseEncoding) {
SparseEncoding es = (SparseEncoding) e;
if(es.off == off && es.map == map)
return this;
return combineSparse(es);
}
else
return e.combine(this);
}
@Override
public Pair<IEncode, Map<Integer, Integer>> combineWithMap(IEncode e) {
if(e instanceof EmptyEncoding || e instanceof ConstEncoding)
return new ImmutablePair<>(this, null);
else if(e instanceof SparseEncoding) {
SparseEncoding es = (SparseEncoding) e;
if(es.off == off && es.map == map)
return new ImmutablePair<>(this, null);
return new ImmutablePair<>(combineSparseNoResize(es), null);
}
else
throw new DMLCompressionException("Not allowed other to be dense");
}
protected IEncode combineSparse(SparseEncoding e) {
final int maxUnique = e.getUnique() * getUnique();
final int[] d = new int[maxUnique - 1];
final int fl = off.getOffsetToLast();
final int fr = e.off.getOffsetToLast();
final AIterator itl = off.getIterator();
final AIterator itr = e.off.getIterator();
final int nVl = getUnique();
final int nVr = e.getUnique();
final int sl = map.size();
final int sr = e.map.size();
if(sl + sr > nRows / 2)
return combineSparseToDense(map, e.map, itl, itr, fl, fr, nVl, nVr, d, nRows, maxUnique);
final IntArrayList retOff = new IntArrayList(Math.max(sr, sl));
final IntArrayList tmpVals = new IntArrayList(Math.max(sr, sl));
final int unique = combineSparse(map, e.map, itl, itr, retOff, tmpVals, fl, fr, nVl, nVr, d);
if(retOff.size() < nRows / 4) {
final AOffset o = OffsetFactory.createOffset(retOff);
final AMapToData retMap = MapToFactory.create(tmpVals.size(), tmpVals.extractValues(), unique - 1);
return new SparseEncoding(retMap, o, nRows);
}
else {
// there will always be a zero therefore unique is not subtracted one.
// if there is no zeros this will not be valid and crash.
final AMapToData retMap = MapToFactory.create(nRows, unique);
for(int i = 0; i < retOff.size(); i++)
retMap.set(retOff.get(i), tmpVals.get(i) + 1);
return new DenseEncoding(retMap);
}
}
private IEncode combineSparseNoResize(SparseEncoding e) {
// for now just use the dense... and lets continue.
// TODO add sparse combine with sparse output.
return combineSparseNoResizeDense(e);
}
private IEncode combineSparseNoResizeDense(SparseEncoding e) {
final int fl = off.getOffsetToLast();
final int fr = e.off.getOffsetToLast();
final AIterator itl = off.getIterator();
final AIterator itr = e.off.getIterator();
final int nVl = getUnique();
final int nVr = e.getUnique();
final AMapToData retMap = MapToFactory.create(nRows, (nVl + 1) * (nVr + 1));
int il = itl.value();
// parse through one side set all values into the dense.
while(il < fl) {
retMap.set(il, map.getIndex(itl.getDataIndex()) + 1);
il = itl.next();
}
retMap.set(fl, map.getIndex(itl.getDataIndex()) + 1);
int ir = itr.value();
// parse through other side set all values with offset based on what already is there.
while(ir < fr) {
final int vl = retMap.getIndex(ir); // probably 0
final int vr = e.map.getIndex(itr.getDataIndex()) + 1;
retMap.set(ir, vl + vr * nVl);
ir = itr.next();
}
retMap.set(fr, retMap.getIndex(fr) + (e.map.getIndex(itr.getDataIndex()) + 1) * nVl);
return new DenseEncoding(retMap);
}
private static int combineSparse(AMapToData lMap, AMapToData rMap, AIterator itl, AIterator itr,
final IntArrayList retOff, final IntArrayList tmpVals, final int fl, final int fr, final int nVl, final int nVr,
final int[] d) {
final int defR = (nVr - 1) * nVl;
final int defL = nVl - 1;
int newUID = 1;
int il = itl.value();
int ir = itr.value();
while(il < fl && ir < fr) {
if(il == ir) {// Both sides have a value same row.
final int nv = lMap.getIndex(itl.getDataIndex()) + rMap.getIndex(itr.getDataIndex()) * nVl;
newUID = addVal(nv, il, d, newUID, tmpVals, retOff);
il = itl.next();
ir = itr.next();
}
else if(il < ir) { // left side have a value before right
final int nv = lMap.getIndex(itl.getDataIndex()) + defR;
newUID = addVal(nv, il, d, newUID, tmpVals, retOff);
il = itl.next();
}
else {// right side have a value before left
final int nv = rMap.getIndex(itr.getDataIndex()) * nVl + defL;
newUID = addVal(nv, ir, d, newUID, tmpVals, retOff);
ir = itr.next();
}
}
newUID = combineSparseTail(lMap, rMap, itl, itr, retOff, tmpVals, fl, fr, nVl, nVr, d, newUID);
return newUID;
}
private static int combineSparseTail(AMapToData lMap, AMapToData rMap, AIterator itl, AIterator itr,
final IntArrayList retOff, final IntArrayList tmpVals, final int fl, final int fr, final int nVl, final int nVr,
final int[] d, int newUID) {
final int defR = (nVr - 1) * nVl;
final int defL = nVl - 1;
int il = itl.value();
int ir = itr.value();
if(il == fl && ir == fr) {
if(fl == fr) {
final int nv = lMap.getIndex(itl.getDataIndex()) + rMap.getIndex(itr.getDataIndex()) * nVl;
return addVal(nv, il, d, newUID, tmpVals, retOff);
}
else if(fl < fr) {// fl is first
int nv = lMap.getIndex(itl.getDataIndex()) + defR;
newUID = addVal(nv, il, d, newUID, tmpVals, retOff);
nv = rMap.getIndex(itr.getDataIndex()) * nVl + defL;
newUID = addVal(nv, fr, d, newUID, tmpVals, retOff);
}
else {// fl is last
int nv = rMap.getIndex(itr.getDataIndex()) * nVl + defL;
newUID = addVal(nv, fr, d, newUID, tmpVals, retOff);
nv = lMap.getIndex(itl.getDataIndex()) + defR;
newUID = addVal(nv, il, d, newUID, tmpVals, retOff);
}
}
else if(il < fl) {
if(fl < fr) {
while(il < fl) {
final int nv = lMap.getIndex(itl.getDataIndex()) + defR;
newUID = addVal(nv, il, d, newUID, tmpVals, retOff);
il = itl.next();
}
int nv = lMap.getIndex(itl.getDataIndex()) + defR;
newUID = addVal(nv, il, d, newUID, tmpVals, retOff);
nv = rMap.getIndex(itr.getDataIndex()) * nVl + defL;
newUID = addVal(nv, fr, d, newUID, tmpVals, retOff);
return newUID;
}
else {
while(il < fr) {
final int nv = lMap.getIndex(itl.getDataIndex()) + defR;
newUID = addVal(nv, il, d, newUID, tmpVals, retOff);
il = itl.next();
}
if(fl == fr) {
final int nv = lMap.getIndex(itl.getDataIndex()) + rMap.getIndex(itr.getDataIndex()) * nVl;
return addVal(nv, il, d, newUID, tmpVals, retOff);
}
else if(il == fr) {
final int nv = lMap.getIndex(itl.getDataIndex()) + rMap.getIndex(itr.getDataIndex()) * nVl;
newUID = addVal(nv, il, d, newUID, tmpVals, retOff);
il = itl.next();
}
else {
final int nv = rMap.getIndex(itr.getDataIndex()) * nVl + defL;
newUID = addVal(nv, fr, d, newUID, tmpVals, retOff);
}
while(il < fl) {
final int nv = lMap.getIndex(itl.getDataIndex()) + defR;
newUID = addVal(nv, il, d, newUID, tmpVals, retOff);
il = itl.next();
}
final int nv = lMap.getIndex(itl.getDataIndex()) + defR;
newUID = addVal(nv, il, d, newUID, tmpVals, retOff);
}
}
else { // if(ir < fr)
if(fr < fl) {
while(ir < fr) {
final int nv = rMap.getIndex(itr.getDataIndex()) * nVl + defL;
newUID = addVal(nv, ir, d, newUID, tmpVals, retOff);
ir = itr.next();
}
int nv = rMap.getIndex(itr.getDataIndex()) * nVl + defL;
newUID = addVal(nv, ir, d, newUID, tmpVals, retOff);
nv = lMap.getIndex(itl.getDataIndex()) + defR;
newUID = addVal(nv, fl, d, newUID, tmpVals, retOff);
return newUID;
}
else {
while(ir < fl) {
final int nv = rMap.getIndex(itr.getDataIndex()) * nVl + defL;
newUID = addVal(nv, ir, d, newUID, tmpVals, retOff);
ir = itr.next();
}
if(fr == fl) {
final int nv = lMap.getIndex(itl.getDataIndex()) + rMap.getIndex(itr.getDataIndex()) * nVl;
return addVal(nv, ir, d, newUID, tmpVals, retOff);
}
else if(ir == fl) {
final int nv = lMap.getIndex(itl.getDataIndex()) + rMap.getIndex(itr.getDataIndex()) * nVl;
newUID = addVal(nv, ir, d, newUID, tmpVals, retOff);
ir = itr.next();
}
else {
final int nv = lMap.getIndex(itl.getDataIndex()) + defR;
newUID = addVal(nv, fl, d, newUID, tmpVals, retOff);
}
while(ir < fr) {
final int nv = rMap.getIndex(itr.getDataIndex()) * nVl + defL;
newUID = addVal(nv, ir, d, newUID, tmpVals, retOff);
ir = itr.next();
}
final int nv = rMap.getIndex(itr.getDataIndex()) * nVl + defL;
newUID = addVal(nv, ir, d, newUID, tmpVals, retOff);
}
}
return newUID;
}
private static int addVal(int nv, int offset, int[] d, int newUID, IntArrayList tmpVals, IntArrayList offsets) {
int mapV = d[nv];
if(mapV == 0)
mapV = d[nv] = newUID++;
tmpVals.appendValue(mapV - 1);
offsets.appendValue(offset);
return newUID;
}
private static DenseEncoding combineSparseToDense(AMapToData lMap, AMapToData rMap, AIterator itl, AIterator itr,
int fl, int fr, int nVl, int nVr, int[] d, int nRows, int maxUnique) {
final AMapToData retMap = MapToFactory.create(nRows, (nVl + 1) * (nVr + 1));
int il = itl.value();
// parse through one side set all values into the dense.
while(il < fl) {
retMap.set(il, lMap.getIndex(itl.getDataIndex()) + 1);
il = itl.next();
}
retMap.set(fl, lMap.getIndex(itl.getDataIndex()) + 1);
int ir = itr.value();
// parse through other side set all values with offset based on what already is there.
while(ir < fr) {
final int vl = retMap.getIndex(ir); // probably 0
final int vr = rMap.getIndex(itr.getDataIndex()) + 1;
retMap.set(ir, vl + vr * nVl);
ir = itr.next();
}
retMap.set(fr, retMap.getIndex(fr) + (rMap.getIndex(itr.getDataIndex()) + 1) * nVl);
// parse through entire output reducing number of unique.
final AMapToData tmpMap = MapToFactory.create(maxUnique, maxUnique + 1);
int newUID = 1;
for(int r = 0; r < retMap.size(); r++) {
int nv = retMap.getIndex(r);
int mv = tmpMap.getIndex(nv);
if(mv == 0)
mv = tmpMap.setAndGet(nv, newUID++);
retMap.set(r, mv - 1);
}
// parse though other side and use all ret to set correctly.
retMap.setUnique(newUID - 1);
return new DenseEncoding(retMap);
}
@Override
public int getUnique() {
return map.getUnique() + 1;
}
@Override
public EstimationFactors extractFacts(int nRows, double tupleSparsity, double matrixSparsity,
CompressionSettings cs) {
final int largestOffs = nRows - map.size(); // known largest off is zero tuples
tupleSparsity = Math.min((double) map.size() / (double) nRows, tupleSparsity);
final int[] counts = map.getCounts(new int[map.getUnique()]);
if(cs.isRLEAllowed())
return new EstimationFactors(map.getUnique(), map.size(), largestOffs, counts, 0, nRows, map.countRuns(off),
false, true, matrixSparsity, tupleSparsity);
else
return new EstimationFactors(map.getUnique(), map.size(), largestOffs, counts, 0, nRows, false, true,
matrixSparsity, tupleSparsity);
}
@Override
public boolean isDense() {
return false;
}
public AOffset getOffsets() {
return off;
}
public AMapToData getMap() {
return map;
}
public int getNumRows() {
return nRows;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(this.getClass().getSimpleName());
sb.append("\n");
sb.append("mapping: ");
sb.append(map);
sb.append("\n");
sb.append("offsets: ");
sb.append(off);
return sb.toString();
}
}