src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupQuan.java - systemds - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.sysds.runtime.compress.colgroup;

 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.DoubleSummaryStatistics;
 import java.util.Iterator;

 import org.apache.commons.lang.NotImplementedException;
 import org.apache.sysds.runtime.DMLCompressionException;
 import org.apache.sysds.runtime.DMLRuntimeException;
 import org.apache.sysds.runtime.DMLScriptException;
 import org.apache.sysds.runtime.compress.UncompressedBitmap;
 import org.apache.sysds.runtime.functionobjects.Builtin;
 import org.apache.sysds.runtime.functionobjects.Builtin.BuiltinCode;
 import org.apache.sysds.runtime.functionobjects.KahanPlus;
 import org.apache.sysds.runtime.functionobjects.KahanPlusSq;
 import org.apache.sysds.runtime.functionobjects.Multiply;
 import org.apache.sysds.runtime.functionobjects.ReduceAll;
 import org.apache.sysds.runtime.functionobjects.ReduceCol;
 import org.apache.sysds.runtime.functionobjects.ReduceRow;
 import org.apache.sysds.runtime.matrix.data.IJV;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.matrix.operators.AggregateUnaryOperator;
 import org.apache.sysds.runtime.matrix.operators.ScalarOperator;

 public class ColGroupQuan extends ColGroup {

 	private static final long serialVersionUID = -9157476271360522008L;

 	protected double _scale;
 	protected byte[] _values;

 	protected ColGroupQuan() {
 		super();
 	}

 	protected ColGroupQuan(int[] colIndexes, int numRows, UncompressedBitmap ubm) {
 		super(colIndexes, numRows);
 		_values = new byte[ubm.getNumColumns() * numRows];

 		double[] valuesFullPrecision = ubm.getValues();
 		DoubleSummaryStatistics stat = Arrays.stream(valuesFullPrecision).summaryStatistics();
 		double max = Math.abs(Math.max(stat.getMax(), Math.abs(stat.getMin())));
 		if(Double.isInfinite(max)){
 			throw new DMLCompressionException("Invalid ColGroupQuan, can't quantize Infinite value.");
 		} else if (max == 0){
 			_scale = 1;
 			LOG.error("ColGroup! column with only 0 values good excuse to make new ColGroup");
 		} else{
 			_scale = max / (double) (Byte.MAX_VALUE);
 		}
 		for (int i = 0; i < valuesFullPrecision.length; i++) {
 			int[] runs = ubm.getOffsetsList(i).extractValues();
 			double curV = valuesFullPrecision[i];
 			double scaledVal = curV / _scale;
 			if(Double.isNaN(scaledVal) || Double.isInfinite(scaledVal)){
 				throw new DMLRuntimeException("Something went wrong in scaling values");
 			}
 			byte scaledValQuan = (byte) (scaledVal);
 			for (int j = 0; j < ubm.getOffsetsList(i).size(); j++) {
 				_values[runs[j]] = scaledValQuan;
 			}
 		}
 	}

 	@Override
 	public boolean getIfCountsType(){
 		return false;
 	}

 	private ColGroupQuan(int[] colIndexes, double scale, byte[] values) {
 		super(colIndexes, values.length / colIndexes.length);
 		this._scale = scale;
 		this._values = values;
 	}

 	@Override
 	public CompressionType getCompType() {
 		return CompressionType.QUAN;
 	}

 	@Override
 	protected ColGroupType getColGroupType() {
 		return ColGroupType.QUAN8S;
 	}

 	@Override
 	public void decompressToBlock(MatrixBlock target, int rl, int ru) {
 		if (_values == null || _values.length == 0) {
 			return;
 		}
 		for (int row = rl; row < ru; row++) {
 			for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
 				int col = _colIndexes[colIx];
 				byte qVal = _values[row * colIx + row];
 				double val = qVal * _scale;
 				target.quickSetValue(row, col, val);
 			}
 		}
 	}

 	@Override
 	public void decompressToBlock(MatrixBlock target, int[] colIndexTargets) {
 		if (_values == null || _values.length == 0) {
 			return;
 		}
 		for (int row = 0; row < _numRows; row++) {
 			for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
 				int col = _colIndexes[colIx];
 				double val = _values[row * colIx + row] * _scale;
 				target.quickSetValue(row, col, val);
 			}
 		}
 	}

 	@Override
 	public void decompressToBlock(MatrixBlock target, int colpos) {
 		if (_values == null || _values.length == 0)
 			return;

 		/**
 		 * target.getDenseBlockValues() because this decompress is used for
 		 * TransposeSelfMatrixMult meaning that the result is allocated directly into
 		 * the result row or col matrix with the same code !
 		 */
 		// double[] c = target.getDenseBlockValues();

 		// for (int row = 0; row < _numRows; row++) {
 		// c[row] = (double)_values[row * colpos + row] * _scale;
 		// }
 		// target.setNonZeros(_numRows);

 		double[] c = target.getDenseBlockValues();
 		int nnz = 0;

 		for (int row = 0; row < _numRows; row++) {
 			double val = _values[row * colpos + row];
 			if (val != 0) {
 				nnz++;
 			}
 			c[row] = val * _scale;
 		}
 		target.setNonZeros(nnz);
 	}

 	@Override
 	public void write(DataOutput out) throws IOException {

 		out.writeInt(_numRows);
 		out.writeInt(_colIndexes.length);

 		for (int i = 0; i < _colIndexes.length; i++)
 			out.writeInt(_colIndexes[i]);

 		for (int i = 0; i < _values.length; i++)
 			out.writeByte(_values[i]);

 		out.writeDouble(_scale);
 	}

 	@Override
 	public void readFields(DataInput in) throws IOException {
 		_numRows = in.readInt();
 		int numCols = in.readInt();

 		_colIndexes = new int[numCols];
 		for (int i = 0; i < _colIndexes.length; i++)
 			_colIndexes[i] = in.readInt();

 		_values = new byte[_numRows * numCols];
 		for (int i = 0; i < _values.length; i++)
 			_values[i] = in.readByte();

 		_scale = in.readDouble();
 	}

 	@Override
 	public long getExactSizeOnDisk() {
 		long ret = 8; // header
 		ret += 4 * _colIndexes.length;
 		ret += _values.length;
 		return ret;
 	}

 	@Override
 	public double get(int r, int c) {
 		int colIx = Arrays.binarySearch(_colIndexes, c);
 		return _values[r * colIx + r] * _scale;
 	}

 	@Override
 	public void rightMultByVector(MatrixBlock vector, MatrixBlock result, int rl, int ru) {
 		double[] b = ColGroupConverter.getDenseVector(vector);
 		double[] c = result.getDenseBlockValues();

 		// prepare reduced rhs w/ relevant values
 		double[] sb = new double[_colIndexes.length];
 		for (int j = 0; j < _colIndexes.length; j++) {
 			sb[j] = b[_colIndexes[j]];
 		}

 		for (int row = rl; row < ru; row++) {
 			for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
 				c[row] += (_values[row * colIx + row] * _scale) * sb[colIx];
 			}
 		}
 	}

 	@Override
 	public void leftMultByRowVector(MatrixBlock vector, MatrixBlock result) {
 		double[] a = ColGroupConverter.getDenseVector(vector);
 		double[] c = result.getDenseBlockValues();

 		for (int row = 0; row < _numRows; row++) {
 			double val = _values[row] * _scale;
 			for (int col = 0; col < _colIndexes.length; col++) {
 				double value = val * a[row * col + row];
 				c[_colIndexes[col]] += value;
 			}
 		}

 	}

 	@Override
 	public void leftMultByRowVector(ColGroupDDC vector, MatrixBlock result) {
 		throw new NotImplementedException();
 	}

 	@Override
 	public ColGroup scalarOperation(ScalarOperator op) {
 		if (op.fn instanceof Multiply) {
 			return new ColGroupQuan(_colIndexes, op.executeScalar(_scale), _values);
 		}
 		double[] temp = new double[_values.length];
 		double max = op.executeScalar((double)_values[0] * _scale);
 		temp[0] = max;
 		for (int i = 1; i < _values.length; i++) {
 			temp[i] = op.executeScalar((double)_values[i] * _scale);
 			double absTemp = Math.abs(temp[i]);
 			if (absTemp > max) {
 				max = absTemp;
 			}
 		}
 		byte[] newValues = new byte[_values.length];
 		double newScale = max / (double) (Byte.MAX_VALUE);
 		for (int i = 0; i < _values.length; i++) {
 			newValues[i] = (byte) ((double)temp[i] / newScale);
 		}

 		return new ColGroupQuan(_colIndexes, newScale, newValues);
 	}

 	@Override
 	public void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result) {
 		unaryAggregateOperations(op, result, 0, getNumRows());
 	}

 	@Override
 	public void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result, int rl, int ru) {

 		if (op.aggOp.increOp.fn instanceof KahanPlus) {

 			// Not using KahnObject because we already lost some of that precision anyway in
 			// quantization.
 			if (op.indexFn instanceof ReduceAll)
 				computeSum(result);
 			else if (op.indexFn instanceof ReduceCol)
 				computeRowSums(result, rl, ru);
 			else if (op.indexFn instanceof ReduceRow)
 				computeColSums(result);
 		} else if (op.aggOp.increOp.fn instanceof KahanPlusSq) {
 			if (op.indexFn instanceof ReduceAll)
 				computeSumSq(result);
 			else if (op.indexFn instanceof ReduceCol)
 				computeRowSumsSq(result, rl, ru);
 			else if (op.indexFn instanceof ReduceRow)
 				computeColSumsSq(result);
 		} else if (op.aggOp.increOp.fn instanceof Builtin
 				&& (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX
 						|| ((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MIN)) {
 			Builtin builtin = (Builtin) op.aggOp.increOp.fn;
 			// min and max (reduceall/reducerow over tuples only)

 			if (op.indexFn instanceof ReduceAll)
 				computeMxx(result, builtin, _zeros);
 			else if (op.indexFn instanceof ReduceCol)
 				computeRowMxx(result, builtin, rl, ru);
 			else if (op.indexFn instanceof ReduceRow)
 				computeColMxx(result, builtin, _zeros);
 		} else {
 			throw new DMLScriptException("Unknown UnaryAggregate operator on CompressedMatrixBlock");
 		}
 	}

 	protected void computeSum(MatrixBlock result) {
 		// TODO Potential speedup use vector instructions/group in batches of 32
 		long sum = 0L;
 		for (int i = 0; i < _values.length; i++) {
 			sum += (long) _values[i];
 		}
 		result.quickSetValue(0, 0, result.getValue(0, 0) + (double) sum * _scale);
 	}

 	protected void computeSumSq(MatrixBlock result) {

 		double sumsq = 0;
 		for (int i = 0; i < _values.length; i++) {
 			double v =  _values[i] * _scale;
 			sumsq += v*v;
 		}
 		result.quickSetValue(0, 0, result.getValue(0, 0) + sumsq);
 	}

 	protected void computeRowSums(MatrixBlock result, int rl, int ru) {
 		if (_colIndexes.length < 256) {
 			short[] rowSums = new short[ru - rl];
 			for (int row = rl; row < ru; row++) {
 				for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
 					rowSums[row - rl] += _values[row * colIx + row];
 				}
 			}
 			for (int row = rl; row < ru; row++) {
 				result.quickSetValue(row, 0, result.getValue(row, 0) + (double) rowSums[row - rl] * _scale);
 			}
 		} else {
 			throw new NotImplementedException("Not Implemented number of columns in ColGroupQuan row sum");
 		}
 	}

 	protected void computeRowSumsSq(MatrixBlock result, int rl, int ru) {
 		if (_colIndexes.length < 256) {
 			float[] rowSumSq = new float[ru - rl];
 			for (int row = rl; row < ru; row++) {
 				for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
 					double v = (double) _values[row * colIx + row] * _scale;
 					rowSumSq[row - rl] += v*v;
 				}
 			}

 			for (int row = rl; row < ru; row++) {
 				result.quickSetValue(row, 0, result.getValue(row, 0) + rowSumSq[row - rl]);
 			}

 		} else {
 			throw new NotImplementedException("Not Implemented number of columns in ColGroupQuan row sum");
 		}
 	}

 	protected void computeColSums(MatrixBlock result) {
 		if (_numRows < 256) {
 			short[] colSums = new short[_colIndexes.length];
 			for (int i = 0; i < _values.length; i++) {
 				colSums[i / _numRows] += _values[i];
 			}

 			for (int col = 0; col < _colIndexes.length; col++) {
 				result.quickSetValue(0, _colIndexes[col], colSums[col] * _scale);
 			}
 		} else if (_numRows < 16777216) { // (Int max + 1) / (short max + 1)
 			int[] colSums = new int[_colIndexes.length];
 			for (int i = 0; i < _values.length; i++) {
 				colSums[i / _numRows] += _values[i];
 			}

 			for (int col = 0; col < _colIndexes.length; col++) {
 				result.quickSetValue(0, _colIndexes[col], colSums[col] * _scale);
 			}
 		} else {
 			double[] colSums = new double[_colIndexes.length];
 			for (int i = 0; i < _values.length; i++) {
 				colSums[i / _numRows] += _values[i];
 			}

 			for (int col = 0; col < _colIndexes.length; col++) {
 				result.quickSetValue(0, _colIndexes[col], colSums[col] * _scale);
 			}
 		}
 	}

 	protected void computeColSumsSq(MatrixBlock result) {

 		double[] sumsq = new double[_colIndexes.length];
 		for (int i = 0; i < _values.length; i++) {
 			double v =  _values[i] * _scale;
 			sumsq[i / _numRows] += v*v;
 		}

 		for (int col = 0; col < _colIndexes.length; col++) {
 			result.quickSetValue(0, _colIndexes[col], sumsq[col]);
 		}

 	}

 	protected void computeRowMxx(MatrixBlock result, Builtin builtin, int rl, int ru) {
 		double[] c = result.getDenseBlockValues();
 		for (int row = rl; row < ru; row++) {
 			for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
 				double v = ((double)_values[row * colIx + row]) * _scale;
 				// System.out.println(v);
 				c[row] = builtin.execute(c[row], v);
 			}
 		}

 	}

 	protected void computeMxx(MatrixBlock result, Builtin builtin, boolean zeros) {

 		double res = 0;
 		for (int i = 0; i < _values.length; i++) {
 			res = builtin.execute(res, _values[i] * _scale);
 		}
 		result.quickSetValue(0, 0, res);
 	}

 	protected void computeColMxx(MatrixBlock result, Builtin builtin, boolean zeros) {
 		double[] colRes = new double[_colIndexes.length];
 		for (int i = 0; i < _values.length; i++) {
 			colRes[i / _numRows] = builtin.execute(colRes[i / _numRows], _values[i] * _scale);
 		}

 		for (int col = 0; col < _colIndexes.length; col++) {
 			result.quickSetValue(0, _colIndexes[col], colRes[col]);
 		}
 	}

 	@Override
 	public Iterator<IJV> getIterator(int rl, int ru, boolean inclZeros, boolean rowMajor) {
 		return new QuanValueIterator();
 	}

 	private class QuanValueIterator implements Iterator<IJV> {

 		@Override
 		public boolean hasNext() {
 			throw new NotImplementedException("Not Implemented");
 		}

 		@Override
 		public IJV next() {
 			throw new NotImplementedException("Not Implemented");
 		}

 	}

 	@Override
 	public ColGroupRowIterator getRowIterator(int rl, int ru) {

 		return new QuanRowIterator();
 	}

 	private class QuanRowIterator extends ColGroupRowIterator {

 		@Override
 		public void next(double[] buff, int rowIx, int segIx, boolean last) {
 			throw new NotImplementedException("Not Implemented");
 		}

 	}

 	@Override
 	public void countNonZerosPerRow(int[] rnnz, int rl, int ru) {
 		// TODO Auto-generated method stub
 		for (int row = rl; row < ru; row++) {
 			int lnnz = 0;
 			for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
 				lnnz += (_values[row * colIx + row] != 0) ? 1 : 0;
 			}
 			rnnz[row - rl] += lnnz;
 		}
 	}

 	@Override
 	public MatrixBlock getValuesAsBlock() {
 		// TODO Auto-generated method stub
 		MatrixBlock target = new MatrixBlock(_numRows, _colIndexes.length, 0.0);
 		decompressToBlock(target, _colIndexes);
 		return target;
 	}

 	@Override
 	public int[] getCounts() {
 		throw new DMLCompressionException(
 				"Invalid function call, the counts in Uncompressed Col Group is always 1 for each value");
 	}

 	@Override
 	public int[] getCounts(boolean includeZero) {
 		throw new DMLCompressionException(
 				"Invalid function call, the counts in Uncompressed Col Group is always 1 for each value");
 	}

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package org.apache.sysds.runtime.compress.colgroup;

	import java.io.DataInput;
	import java.io.DataOutput;
	import java.io.IOException;
	import java.util.Arrays;
	import java.util.DoubleSummaryStatistics;
	import java.util.Iterator;

	import org.apache.commons.lang.NotImplementedException;
	import org.apache.sysds.runtime.DMLCompressionException;
	import org.apache.sysds.runtime.DMLRuntimeException;
	import org.apache.sysds.runtime.DMLScriptException;
	import org.apache.sysds.runtime.compress.UncompressedBitmap;
	import org.apache.sysds.runtime.functionobjects.Builtin;
	import org.apache.sysds.runtime.functionobjects.Builtin.BuiltinCode;
	import org.apache.sysds.runtime.functionobjects.KahanPlus;
	import org.apache.sysds.runtime.functionobjects.KahanPlusSq;
	import org.apache.sysds.runtime.functionobjects.Multiply;
	import org.apache.sysds.runtime.functionobjects.ReduceAll;
	import org.apache.sysds.runtime.functionobjects.ReduceCol;
	import org.apache.sysds.runtime.functionobjects.ReduceRow;
	import org.apache.sysds.runtime.matrix.data.IJV;
	import org.apache.sysds.runtime.matrix.data.MatrixBlock;
	import org.apache.sysds.runtime.matrix.operators.AggregateUnaryOperator;
	import org.apache.sysds.runtime.matrix.operators.ScalarOperator;

	public class ColGroupQuan extends ColGroup {

	private static final long serialVersionUID = -9157476271360522008L;

	protected double _scale;
	protected byte[] _values;

	protected ColGroupQuan() {
	super();
	}

	protected ColGroupQuan(int[] colIndexes, int numRows, UncompressedBitmap ubm) {
	super(colIndexes, numRows);
	_values = new byte[ubm.getNumColumns() * numRows];

	double[] valuesFullPrecision = ubm.getValues();
	DoubleSummaryStatistics stat = Arrays.stream(valuesFullPrecision).summaryStatistics();
	double max = Math.abs(Math.max(stat.getMax(), Math.abs(stat.getMin())));
	if(Double.isInfinite(max)){
	throw new DMLCompressionException("Invalid ColGroupQuan, can't quantize Infinite value.");
	} else if (max == 0){
	_scale = 1;
	LOG.error("ColGroup! column with only 0 values good excuse to make new ColGroup");
	} else{
	_scale = max / (double) (Byte.MAX_VALUE);
	}
	for (int i = 0; i < valuesFullPrecision.length; i++) {
	int[] runs = ubm.getOffsetsList(i).extractValues();
	double curV = valuesFullPrecision[i];
	double scaledVal = curV / _scale;
	if(Double.isNaN(scaledVal) \|\| Double.isInfinite(scaledVal)){
	throw new DMLRuntimeException("Something went wrong in scaling values");
	}
	byte scaledValQuan = (byte) (scaledVal);
	for (int j = 0; j < ubm.getOffsetsList(i).size(); j++) {
	_values[runs[j]] = scaledValQuan;
	}
	}
	}

	@Override
	public boolean getIfCountsType(){
	return false;
	}

	private ColGroupQuan(int[] colIndexes, double scale, byte[] values) {
	super(colIndexes, values.length / colIndexes.length);
	this._scale = scale;
	this._values = values;
	}

	@Override
	public CompressionType getCompType() {
	return CompressionType.QUAN;
	}

	@Override
	protected ColGroupType getColGroupType() {
	return ColGroupType.QUAN8S;
	}

	@Override
	public void decompressToBlock(MatrixBlock target, int rl, int ru) {
	if (_values == null \|\| _values.length == 0) {
	return;
	}
	for (int row = rl; row < ru; row++) {
	for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
	int col = _colIndexes[colIx];
	byte qVal = _values[row * colIx + row];
	double val = qVal * _scale;
	target.quickSetValue(row, col, val);
	}
	}
	}

	@Override
	public void decompressToBlock(MatrixBlock target, int[] colIndexTargets) {
	if (_values == null \|\| _values.length == 0) {
	return;
	}
	for (int row = 0; row < _numRows; row++) {
	for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
	int col = _colIndexes[colIx];
	double val = _values[row * colIx + row] * _scale;
	target.quickSetValue(row, col, val);
	}
	}
	}

	@Override
	public void decompressToBlock(MatrixBlock target, int colpos) {
	if (_values == null \|\| _values.length == 0)
	return;

	/**
	* target.getDenseBlockValues() because this decompress is used for
	* TransposeSelfMatrixMult meaning that the result is allocated directly into
	* the result row or col matrix with the same code !
	*/
	// double[] c = target.getDenseBlockValues();

	// for (int row = 0; row < _numRows; row++) {
	// c[row] = (double)_values[row * colpos + row] * _scale;
	// }
	// target.setNonZeros(_numRows);

	double[] c = target.getDenseBlockValues();
	int nnz = 0;

	for (int row = 0; row < _numRows; row++) {
	double val = _values[row * colpos + row];
	if (val != 0) {
	nnz++;
	}
	c[row] = val * _scale;
	}
	target.setNonZeros(nnz);
	}

	@Override
	public void write(DataOutput out) throws IOException {

	out.writeInt(_numRows);
	out.writeInt(_colIndexes.length);

	for (int i = 0; i < _colIndexes.length; i++)
	out.writeInt(_colIndexes[i]);

	for (int i = 0; i < _values.length; i++)
	out.writeByte(_values[i]);

	out.writeDouble(_scale);
	}

	@Override
	public void readFields(DataInput in) throws IOException {
	_numRows = in.readInt();
	int numCols = in.readInt();

	_colIndexes = new int[numCols];
	for (int i = 0; i < _colIndexes.length; i++)
	_colIndexes[i] = in.readInt();

	_values = new byte[_numRows * numCols];
	for (int i = 0; i < _values.length; i++)
	_values[i] = in.readByte();

	_scale = in.readDouble();
	}

	@Override
	public long getExactSizeOnDisk() {
	long ret = 8; // header
	ret += 4 * _colIndexes.length;
	ret += _values.length;
	return ret;
	}

	@Override
	public double get(int r, int c) {
	int colIx = Arrays.binarySearch(_colIndexes, c);
	return _values[r * colIx + r] * _scale;
	}

	@Override
	public void rightMultByVector(MatrixBlock vector, MatrixBlock result, int rl, int ru) {
	double[] b = ColGroupConverter.getDenseVector(vector);
	double[] c = result.getDenseBlockValues();

	// prepare reduced rhs w/ relevant values
	double[] sb = new double[_colIndexes.length];
	for (int j = 0; j < _colIndexes.length; j++) {
	sb[j] = b[_colIndexes[j]];
	}

	for (int row = rl; row < ru; row++) {
	for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
	c[row] += (_values[row * colIx + row] * _scale) * sb[colIx];
	}
	}
	}

	@Override
	public void leftMultByRowVector(MatrixBlock vector, MatrixBlock result) {
	double[] a = ColGroupConverter.getDenseVector(vector);
	double[] c = result.getDenseBlockValues();

	for (int row = 0; row < _numRows; row++) {
	double val = _values[row] * _scale;
	for (int col = 0; col < _colIndexes.length; col++) {
	double value = val * a[row * col + row];
	c[_colIndexes[col]] += value;
	}
	}

	}

	@Override
	public void leftMultByRowVector(ColGroupDDC vector, MatrixBlock result) {
	throw new NotImplementedException();
	}

	@Override
	public ColGroup scalarOperation(ScalarOperator op) {
	if (op.fn instanceof Multiply) {
	return new ColGroupQuan(_colIndexes, op.executeScalar(_scale), _values);
	}
	double[] temp = new double[_values.length];
	double max = op.executeScalar((double)_values[0] * _scale);
	temp[0] = max;
	for (int i = 1; i < _values.length; i++) {
	temp[i] = op.executeScalar((double)_values[i] * _scale);
	double absTemp = Math.abs(temp[i]);
	if (absTemp > max) {
	max = absTemp;
	}
	}
	byte[] newValues = new byte[_values.length];
	double newScale = max / (double) (Byte.MAX_VALUE);
	for (int i = 0; i < _values.length; i++) {
	newValues[i] = (byte) ((double)temp[i] / newScale);
	}

	return new ColGroupQuan(_colIndexes, newScale, newValues);
	}

	@Override
	public void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result) {
	unaryAggregateOperations(op, result, 0, getNumRows());
	}

	@Override
	public void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result, int rl, int ru) {

	if (op.aggOp.increOp.fn instanceof KahanPlus) {

	// Not using KahnObject because we already lost some of that precision anyway in
	// quantization.
	if (op.indexFn instanceof ReduceAll)
	computeSum(result);
	else if (op.indexFn instanceof ReduceCol)
	computeRowSums(result, rl, ru);
	else if (op.indexFn instanceof ReduceRow)
	computeColSums(result);
	} else if (op.aggOp.increOp.fn instanceof KahanPlusSq) {
	if (op.indexFn instanceof ReduceAll)
	computeSumSq(result);
	else if (op.indexFn instanceof ReduceCol)
	computeRowSumsSq(result, rl, ru);
	else if (op.indexFn instanceof ReduceRow)
	computeColSumsSq(result);
	} else if (op.aggOp.increOp.fn instanceof Builtin
	&& (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX
	\|\| ((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MIN)) {
	Builtin builtin = (Builtin) op.aggOp.increOp.fn;
	// min and max (reduceall/reducerow over tuples only)

	if (op.indexFn instanceof ReduceAll)
	computeMxx(result, builtin, _zeros);
	else if (op.indexFn instanceof ReduceCol)
	computeRowMxx(result, builtin, rl, ru);
	else if (op.indexFn instanceof ReduceRow)
	computeColMxx(result, builtin, _zeros);
	} else {
	throw new DMLScriptException("Unknown UnaryAggregate operator on CompressedMatrixBlock");
	}
	}

	protected void computeSum(MatrixBlock result) {
	// TODO Potential speedup use vector instructions/group in batches of 32
	long sum = 0L;
	for (int i = 0; i < _values.length; i++) {
	sum += (long) _values[i];
	}
	result.quickSetValue(0, 0, result.getValue(0, 0) + (double) sum * _scale);
	}

	protected void computeSumSq(MatrixBlock result) {

	double sumsq = 0;
	for (int i = 0; i < _values.length; i++) {
	double v = _values[i] * _scale;
	sumsq += v*v;
	}
	result.quickSetValue(0, 0, result.getValue(0, 0) + sumsq);
	}

	protected void computeRowSums(MatrixBlock result, int rl, int ru) {
	if (_colIndexes.length < 256) {
	short[] rowSums = new short[ru - rl];
	for (int row = rl; row < ru; row++) {
	for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
	rowSums[row - rl] += _values[row * colIx + row];
	}
	}
	for (int row = rl; row < ru; row++) {
	result.quickSetValue(row, 0, result.getValue(row, 0) + (double) rowSums[row - rl] * _scale);
	}
	} else {
	throw new NotImplementedException("Not Implemented number of columns in ColGroupQuan row sum");
	}
	}

	protected void computeRowSumsSq(MatrixBlock result, int rl, int ru) {
	if (_colIndexes.length < 256) {
	float[] rowSumSq = new float[ru - rl];
	for (int row = rl; row < ru; row++) {
	for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
	double v = (double) _values[row * colIx + row] * _scale;
	rowSumSq[row - rl] += v*v;
	}
	}

	for (int row = rl; row < ru; row++) {
	result.quickSetValue(row, 0, result.getValue(row, 0) + rowSumSq[row - rl]);
	}

	} else {
	throw new NotImplementedException("Not Implemented number of columns in ColGroupQuan row sum");
	}
	}

	protected void computeColSums(MatrixBlock result) {
	if (_numRows < 256) {
	short[] colSums = new short[_colIndexes.length];
	for (int i = 0; i < _values.length; i++) {
	colSums[i / _numRows] += _values[i];
	}

	for (int col = 0; col < _colIndexes.length; col++) {
	result.quickSetValue(0, _colIndexes[col], colSums[col] * _scale);
	}
	} else if (_numRows < 16777216) { // (Int max + 1) / (short max + 1)
	int[] colSums = new int[_colIndexes.length];
	for (int i = 0; i < _values.length; i++) {
	colSums[i / _numRows] += _values[i];
	}

	for (int col = 0; col < _colIndexes.length; col++) {
	result.quickSetValue(0, _colIndexes[col], colSums[col] * _scale);
	}
	} else {
	double[] colSums = new double[_colIndexes.length];
	for (int i = 0; i < _values.length; i++) {
	colSums[i / _numRows] += _values[i];
	}

	for (int col = 0; col < _colIndexes.length; col++) {
	result.quickSetValue(0, _colIndexes[col], colSums[col] * _scale);
	}
	}
	}

	protected void computeColSumsSq(MatrixBlock result) {

	double[] sumsq = new double[_colIndexes.length];
	for (int i = 0; i < _values.length; i++) {
	double v = _values[i] * _scale;
	sumsq[i / _numRows] += v*v;
	}

	for (int col = 0; col < _colIndexes.length; col++) {
	result.quickSetValue(0, _colIndexes[col], sumsq[col]);
	}

	}

	protected void computeRowMxx(MatrixBlock result, Builtin builtin, int rl, int ru) {
	double[] c = result.getDenseBlockValues();
	for (int row = rl; row < ru; row++) {
	for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
	double v = ((double)_values[row * colIx + row]) * _scale;
	// System.out.println(v);
	c[row] = builtin.execute(c[row], v);
	}
	}

	}

	protected void computeMxx(MatrixBlock result, Builtin builtin, boolean zeros) {

	double res = 0;
	for (int i = 0; i < _values.length; i++) {
	res = builtin.execute(res, _values[i] * _scale);
	}
	result.quickSetValue(0, 0, res);
	}

	protected void computeColMxx(MatrixBlock result, Builtin builtin, boolean zeros) {
	double[] colRes = new double[_colIndexes.length];
	for (int i = 0; i < _values.length; i++) {
	colRes[i / _numRows] = builtin.execute(colRes[i / _numRows], _values[i] * _scale);
	}

	for (int col = 0; col < _colIndexes.length; col++) {
	result.quickSetValue(0, _colIndexes[col], colRes[col]);
	}
	}

	@Override
	public Iterator<IJV> getIterator(int rl, int ru, boolean inclZeros, boolean rowMajor) {
	return new QuanValueIterator();
	}

	private class QuanValueIterator implements Iterator<IJV> {

	@Override
	public boolean hasNext() {
	throw new NotImplementedException("Not Implemented");
	}

	@Override
	public IJV next() {
	throw new NotImplementedException("Not Implemented");
	}

	}

	@Override
	public ColGroupRowIterator getRowIterator(int rl, int ru) {

	return new QuanRowIterator();
	}

	private class QuanRowIterator extends ColGroupRowIterator {

	@Override
	public void next(double[] buff, int rowIx, int segIx, boolean last) {
	throw new NotImplementedException("Not Implemented");
	}

	}

	@Override
	public void countNonZerosPerRow(int[] rnnz, int rl, int ru) {
	// TODO Auto-generated method stub
	for (int row = rl; row < ru; row++) {
	int lnnz = 0;
	for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
	lnnz += (_values[row * colIx + row] != 0) ? 1 : 0;
	}
	rnnz[row - rl] += lnnz;
	}
	}

	@Override
	public MatrixBlock getValuesAsBlock() {
	// TODO Auto-generated method stub
	MatrixBlock target = new MatrixBlock(_numRows, _colIndexes.length, 0.0);
	decompressToBlock(target, _colIndexes);
	return target;
	}

	@Override
	public int[] getCounts() {
	throw new DMLCompressionException(
	"Invalid function call, the counts in Uncompressed Col Group is always 1 for each value");
	}

	@Override
	public int[] getCounts(boolean includeZero) {
	throw new DMLCompressionException(
	"Invalid function call, the counts in Uncompressed Col Group is always 1 for each value");
	}

	}