blob: c6a2e5370a7b9bb9cd7be36a814f2b2cdb0b1c7d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysds.runtime.compress.colgroup;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;
import org.apache.sysds.runtime.functionobjects.Builtin;
import org.apache.sysds.runtime.functionobjects.KahanFunction;
import org.apache.sysds.runtime.functionobjects.KahanPlus;
import org.apache.sysds.runtime.instructions.cp.KahanObject;
import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
import org.apache.sysds.utils.MemoryEstimates;
/**
* This dictionary class aims to encapsulate the storage and operations over unique floating point values of a column
* group. The primary reason for its introduction was to provide an entry point for specialization such as shared
* dictionaries, which require additional information.
*/
public class Dictionary extends IDictionary {
// Linearized row major.
// v11 v12
// v21 v22
// ||
// \/
// v11 v12 v21 v22
protected final double[] _values;
public Dictionary(double[] values) {
_values = values;
}
public double[] getValues() {
return _values;
}
public double getValue(int i) {
return _values[i];
}
public long getInMemorySize() {
// object + values array + double
return getInMemorySize(_values.length);
}
public static long getInMemorySize(int valuesCount) {
// object + values array
return 16 + MemoryEstimates.doubleArrayCost(valuesCount);
}
public int hasZeroTuple(int ncol) {
int len = _values.length / ncol;
for(int i = 0, off = 0; i < len; i++, off += ncol) {
boolean allZeros = true;
for(int j = 0; j < ncol; j++)
allZeros &= (_values[off + j] == 0);
if(allZeros)
return i;
}
return -1;
}
public double aggregate(double init, Builtin fn) {
// full aggregate can disregard tuple boundaries
int len = _values.length;
double ret = init;
for(int i = 0; i < len; i++)
ret = fn.execute(ret, _values[i]);
return ret;
}
public IDictionary apply(ScalarOperator op) {
// in-place modification of the dictionary
int len = _values.length;
for(int i = 0; i < len; i++)
_values[i] = op.executeScalar(_values[i]);
return this; // fluent API
}
@Override
public IDictionary clone() {
return new Dictionary(_values.clone());
}
@Override
public int getValuesLength() {
return _values.length;
}
public static Dictionary read(DataInput in) throws IOException {
int numVals = in.readInt();
// read distinct values
double[] values = new double[numVals];
for(int i = 0; i < numVals; i++)
values[i] = in.readDouble();
return new Dictionary(values);
}
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(_values.length);
for(int i = 0; i < _values.length; i++)
out.writeDouble(_values[i]);
}
@Override
public long getExactSizeOnDisk() {
return 4 + 8 * _values.length;
}
public static Dictionary materializeZeroValueFull(Dictionary OldDictionary, int numCols) {
return new Dictionary(Arrays.copyOf(OldDictionary._values, OldDictionary._values.length + numCols));
}
public int getNumberOfValues(int ncol) {
return _values.length / ncol;
}
@Override
protected double[] sumAllRowsToDouble(KahanFunction kplus, KahanObject kbuff, int nrColumns, boolean allocNew) {
if(nrColumns == 1 && kplus instanceof KahanPlus)
return getValues(); // shallow copy of values
// pre-aggregate value tuple
final int numVals = _values.length / nrColumns;
double[] ret = allocNew ? new double[numVals] : ColGroupValue.allocDVector(numVals, false);
for(int k = 0; k < numVals; k++) {
ret[k] = sumRow(k, kplus, kbuff, nrColumns);
}
return ret;
}
@Override
protected double sumRow(int k, KahanFunction kplus, KahanObject kbuff, int nrColumns) {
kbuff.set(0, 0);
int valOff = k * nrColumns;
for(int i = 0; i < nrColumns; i++)
kplus.execute2(kbuff, _values[valOff + i]);
return kbuff._sum;
}
}