blob: 67df8210bade781698d09c46547848aaf7c4b051 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysds.runtime.compress.colgroup;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.sysds.runtime.functionobjects.Builtin;
import org.apache.sysds.runtime.functionobjects.KahanFunction;
import org.apache.sysds.runtime.functionobjects.ValueFunction;
import org.apache.sysds.runtime.instructions.cp.KahanObject;
import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
/**
* This dictionary class aims to encapsulate the storage and operations over unique floating point values of a column
* group.
*/
public abstract class ADictionary {
/**
* Get all the values contained in the dictionary as a linearized double array.
*
* @return linearized double array
*/
public abstract double[] getValues();
/**
* Get Specific value contained in the dictionary at index.
*
* @param i The index to extract the value from
* @return The value contained at the index
*/
public abstract double getValue(int i);
/**
* Determines if the content has a zero tuple. meaning all values at a specific row are zero value. This is useful
* information to find out if the dictionary is used in a dense context. To improve some specific operations.
*
* @param ncol The number of columns in the dictionary.
* @return The index at which the zero tuple is located.
*/
public abstract int hasZeroTuple(int ncol);
/**
* Returns the memory usage of the dictionary.
*
* @return a long value in number of bytes for the dictionary.
*/
public abstract long getInMemorySize();
/**
* Aggregate all the contained values, useful in value only computations where the operation is iterating through
* all values contained in the dictionary.
*
* @param init The initial Value, in cases such as Max value, this could be -infinity
* @param fn The Function to apply to values
* @return The aggregated value as a double.
*/
public abstract double aggregate(double init, Builtin fn);
/**
* returns the count of values contained in the dictionary.
*
* @return an integer of count of values.
*/
public abstract int getValuesLength();
/**
* Applies the scalar operation on the dictionary. Note that this operation modifies the underlying data, and
* normally require a copy of the original Dictionary to preserve old objects.
*
* @param op The operator to apply to the dictionary values.
* @return this dictionary with modified values.
*/
public abstract ADictionary apply(ScalarOperator op);
/**
* Applies the scalar operation on the dictionary. The returned dictionary should contain a new instance of the
* underlying data. Therefore it will not modify the previous object.
*
* @param op The operator to apply to the dictionary values.
* @param newVal The value to append to the dictionary.
* @param numCols The number of columns stored in the dictionary.
* @return Another dictionary with modified values.
*/
public abstract ADictionary applyScalarOp(ScalarOperator op, double newVal, int numCols);
public abstract ADictionary applyBinaryRowOp(ValueFunction fn, double[] v, boolean sparseSafe, int[] colIndexes);
/**
* Returns a deep clone of the dictionary.
*/
public abstract ADictionary clone();
/**
* Aggregates the columns into the target double array provided.
*
* @param c The target double array, this contains the full number of columns, therefore the colIndexes for
* this specific dictionary is needed.
* @param fn The function to apply to individual columns
* @param colIndexes The mapping to the target columns from the individual columns
*/
public void aggregateCols(double[] c, Builtin fn, int[] colIndexes) {
int ncol = colIndexes.length;
int vlen = getValuesLength() / ncol;
// double[] ret = init;
// System.out.println(c.length + " " + ncol);
for(int k = 0; k < vlen; k++)
for(int j = 0, valOff = k * ncol; j < ncol; j++)
c[colIndexes[j]] = fn.execute(c[colIndexes[j]], getValue(valOff + j));
// return c;
}
/**
* The read function to instantiate the dictionary.
*
* @param in The data input source to read the stored dictionary from
* @param lossy Boolean specifying if the dictionary stored was lossy.
* @return The concrete dictionary.
* @throws IOException if the reading source throws it.
*/
public static ADictionary read(DataInput in, boolean lossy) throws IOException {
return lossy ? QDictionary.read(in) : Dictionary.read(in);
}
/**
* Write the dictionary to a DataOutput.
*
* @param out the output sink to write the dictionary to.
* @throws IOException if the sink fails.
*/
public abstract void write(DataOutput out) throws IOException;
/**
* Calculate the space consumption if the dictionary is stored on disk.
*
* @return the long count of bytes to store the dictionary.
*/
public abstract long getExactSizeOnDisk();
/**
* Get the number of values given that the column group has n columns
*
* @param ncol The number of Columns in the ColumnGroup.
* @return the number of value tuples contained in the dictionary.
*/
public abstract int getNumberOfValues(int ncol);
/**
* Materializes a Zero tuple at the last index of the dictionary.
*
* @param numCols The number of columns in the dictionary
* @return the new Dictionary with materialized zero tuple.
*/
// public abstract IDictionary materializeZeroValue(int numCols);
/**
* Method used as a pre-aggregate of each tuple in the dictionary, to single double values.
*
* Note if the number of columns is one the actual dictionaries values are simply returned.
*
* @param kplus The function to apply to each value in the rows
* @param kbuff The buffer to use to aggregate the value.
* @param nrColumns The number of columns in the ColGroup to know how to get the values from the dictionary.
* @return a double array containing the row sums from this dictionary.
*/
protected abstract double[] sumAllRowsToDouble(KahanFunction kplus, KahanObject kbuff, int nrColumns);
/**
* Sum the values at a specific row.
*
* @param k The row index to sum
* @param kplus The operator to use
* @param kbuff The buffer to aggregate inside.
* @param nrColumns The number of columns
* @return The sum of the row.
*/
protected abstract double sumRow(int k, KahanFunction kplus, KahanObject kbuff, int nrColumns);
protected abstract void colSum(double[] c, int[] counts, int[] colIndexes, KahanFunction kplus);
protected abstract double sum(int[] counts, int ncol, KahanFunction kplus);
public abstract StringBuilder getString(StringBuilder sb, int colIndexes);
}