[SYSTEMDS-372] ColGroup Base Data to Byte

This commit enables all col groups to change fundamental data to byte
by applying a scaling value.

The current setup scales to the range -127 to 127, to enable encoding
negative numbers.

This commit also include fixes to the count distinct algorithm.

- Distinct count for compressed Matrix
- fix support for multiple blocks in input count distinct
- Extended CountDistinct Tests
- Cleanup CountDistinct
- Lossy compression setting
- Relocation of bitmap
- Compression Settings Now include CoCoding planning
- add comments on compression Settings
- add docs for CompressionSettingsBuilder
- parse Valid ColGroups from Settings file
- Update Sum operation to leverage Quantized values
diff --git a/dev/Tasks-obsolete.txt b/dev/Tasks-obsolete.txt
index 7d30fdc..aa5afc7 100644
--- a/dev/Tasks-obsolete.txt
+++ b/dev/Tasks-obsolete.txt
@@ -246,14 +246,14 @@
  * 273a Redesign allocation of ColGroups in ColGroupFactory
  * 274 Make the DDC Compression dictionary share correctly            OK
  * 275 Include compressionSettings in DMLConfiguration
- * 276 Allow Uncompressed Columns to be in sparse formats
+ * 276 Allow Uncompressed Columns to be in sparse formats             OK
  * 277 Sampling based estimators fix
  * 278 Compression-CoCode algorithm optimization
- * 278a Return ColGroups estimated compression ratio to Factory
+ * 278a Return ColGroups estimated compression ratio to Factory       OK
  * 279 Add missing standard lossless compression techniques
  * 279a ColGroup FOR (Frame of reference) encoding
  * 279b ColGroup DEL (Delta) encoding
- * MINOR Reduce memory usage for compression statistics.
+ * MINOR Reduce memory usage for compression statistics.              OK
  * MINOR Make ContainsAZero() method in UncompressedBitMap
 
 SYSTEMDS-280 New Rewrites
@@ -321,7 +321,7 @@
 
 SYSTEMDS-370 Lossy Compression Blocks
  * 371 ColGroup Quantization                                          OK (Naive Q8)
- * 321 ColGroup Base Data change (from Double to ??)
+ * 372 ColGroup Base Data change (from Double to Naive Q8)            OK
 
 SYSTEMDS-380 Memory Footprint
  * 381 Matrix Block Memory footprint update
diff --git a/src/main/java/org/apache/sysds/conf/DMLConfig.java b/src/main/java/org/apache/sysds/conf/DMLConfig.java
index 74d4457..184e50f 100644
--- a/src/main/java/org/apache/sysds/conf/DMLConfig.java
+++ b/src/main/java/org/apache/sysds/conf/DMLConfig.java
@@ -67,6 +67,8 @@
 	public static final String CP_PARALLEL_OPS      = "sysds.cp.parallel.ops";
 	public static final String CP_PARALLEL_IO       = "sysds.cp.parallel.io";
 	public static final String COMPRESSED_LINALG    = "sysds.compressed.linalg"; //auto, true, false
+	public static final String COMPRESSED_LOSSY     = "sysds.compressed.lossy";
+	public static final String COMPRESSED_VALID_COMPRESSIONS = "sysds.compressed.valid.compressions";
 	public static final String NATIVE_BLAS          = "sysds.native.blas";
 	public static final String NATIVE_BLAS_DIR      = "sysds.native.blas.directory";
 	public static final String CODEGEN              = "sysds.codegen.enabled"; //boolean
@@ -113,6 +115,8 @@
 		_defaultVals.put(CP_PARALLEL_OPS,        "true" );
 		_defaultVals.put(CP_PARALLEL_IO,         "true" );
 		_defaultVals.put(COMPRESSED_LINALG,      Compression.CompressConfig.AUTO.name() );
+		_defaultVals.put(COMPRESSED_LOSSY,       "false" );
+		_defaultVals.put(COMPRESSED_VALID_COMPRESSIONS, "DDC,OLE,RLE");
 		_defaultVals.put(CODEGEN,                "false" );
 		_defaultVals.put(CODEGEN_COMPILER,       CompilerType.AUTO.name() );
 		_defaultVals.put(CODEGEN_OPTIMIZER,      PlanSelector.FUSE_COST_BASED_V2.name() );
@@ -374,7 +378,7 @@
 		String[] tmpConfig = new String[] { 
 			LOCAL_TMP_DIR,SCRATCH_SPACE,OPTIMIZATION_LEVEL, DEFAULT_BLOCK_SIZE,
 			CP_PARALLEL_OPS, CP_PARALLEL_IO, NATIVE_BLAS, NATIVE_BLAS_DIR,
-			COMPRESSED_LINALG,
+			COMPRESSED_LINALG, COMPRESSED_LOSSY, COMPRESSED_VALID_COMPRESSIONS,
 			CODEGEN, CODEGEN_COMPILER, CODEGEN_OPTIMIZER, CODEGEN_PLANCACHE, CODEGEN_LITERALS,
 			STATS_MAX_WRAP_LEN, PRINT_GPU_MEMORY_INFO,
 			AVAILABLE_GPUS, SYNCHRONIZE_GPU, EAGER_CUDA_FREE, FLOATING_POINT_PRECISION, GPU_EVICTION_POLICY, 
diff --git a/src/main/java/org/apache/sysds/runtime/compress/AbstractCompressedMatrixBlock.java b/src/main/java/org/apache/sysds/runtime/compress/AbstractCompressedMatrixBlock.java
index 2e5b6c7..3277ae4 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/AbstractCompressedMatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/AbstractCompressedMatrixBlock.java
@@ -60,6 +60,7 @@
 
 	protected List<ColGroup> _colGroups;
 
+
 	/**
 	 * Constructor for building an empty Compressed Matrix block object.
 	 */
@@ -95,14 +96,10 @@
 		nonZeros = that.getNonZeros();
 	}
 
-	public abstract boolean isCompressed();
-
 	public abstract MatrixBlock decompress();
 
 	@Override
 	public boolean isEmptyBlock(boolean safe) {
-		if(!isCompressed())
-			return super.isEmptyBlock(safe);
 		return(_colGroups == null || getNonZeros() == 0);
 	}
 
@@ -117,10 +114,8 @@
 		size += 4; // clen
 		size += 1; // a single boolean fills 8 bits !
 		size += 8; // NonZeros.
-
 		size += 8; // Object reference DenseBlock
 		size += 8; // Object reference Sparse Block
-
 		size += 4; // estimated NNzs Per Row
 
 		if(size % 8 != 0)
@@ -135,14 +130,14 @@
 	@Override
 	public MatrixBlock unaryOperations(UnaryOperator op, MatrixValue result) {
 		printDecompressWarning("unaryOperations");
-		MatrixBlock tmp = isCompressed() ? decompress() : this;
+		MatrixBlock tmp =  decompress();
 		return tmp.unaryOperations(op, result);
 	}
 
 	@Override
 	public MatrixBlock binaryOperations(BinaryOperator op, MatrixValue thatValue, MatrixValue result) {
 		printDecompressWarning("binaryOperations", (MatrixBlock) thatValue);
-		MatrixBlock left = isCompressed() ? decompress() : this;
+		MatrixBlock left =  decompress();
 		MatrixBlock right = getUncompressed(thatValue);
 		return left.binaryOperations(op, right, result);
 	}
@@ -150,7 +145,7 @@
 	@Override
 	public MatrixBlock binaryOperationsInPlace(BinaryOperator op, MatrixValue thatValue) {
 		printDecompressWarning("binaryOperationsInPlace", (MatrixBlock) thatValue);
-		MatrixBlock left = isCompressed() ? decompress() : this;
+		MatrixBlock left =  decompress();
 		MatrixBlock right = getUncompressed(thatValue);
 		left.binaryOperationsInPlace(op, right);
 		return this;
@@ -170,7 +165,7 @@
 	@Override
 	public MatrixBlock reorgOperations(ReorgOperator op, MatrixValue ret, int startRow, int startColumn, int length) {
 		printDecompressWarning("reorgOperations");
-		MatrixBlock tmp = isCompressed() ? decompress() : this;
+		MatrixBlock tmp = decompress();
 		return tmp.reorgOperations(op, ret, startRow, startColumn, length);
 	}
 
@@ -179,7 +174,7 @@
 		if(cbind) // use supported operation
 			return append(that, ret);
 		printDecompressWarning("append-rbind", that);
-		MatrixBlock left = isCompressed() ? decompress() : this;
+		MatrixBlock left = decompress();
 		MatrixBlock right = getUncompressed(that);
 		return left.append(right, ret, cbind);
 	}
@@ -188,7 +183,7 @@
 	public void append(MatrixValue v2, ArrayList<IndexedMatrixValue> outlist, int blen, boolean cbind, boolean m2IsLast,
 		int nextNCol) {
 		printDecompressWarning("append", (MatrixBlock) v2);
-		MatrixBlock left = isCompressed() ? decompress() : this;
+		MatrixBlock left = decompress();
 		MatrixBlock right = getUncompressed(v2);
 		left.append(right, outlist, blen, cbind, m2IsLast, nextNCol);
 	}
@@ -201,7 +196,7 @@
 	@Override
 	public void permutationMatrixMultOperations(MatrixValue m2Val, MatrixValue out1Val, MatrixValue out2Val, int k) {
 		printDecompressWarning("permutationMatrixMultOperations", (MatrixBlock) m2Val);
-		MatrixBlock left = isCompressed() ? decompress() : this;
+		MatrixBlock left = decompress();
 		MatrixBlock right = getUncompressed(m2Val);
 		left.permutationMatrixMultOperations(right, out1Val, out2Val, k);
 	}
@@ -210,7 +205,7 @@
 	public MatrixBlock leftIndexingOperations(MatrixBlock rhsMatrix, int rl, int ru, int cl, int cu, MatrixBlock ret,
 		UpdateType update) {
 		printDecompressWarning("leftIndexingOperations");
-		MatrixBlock left = isCompressed() ? decompress() : this;
+		MatrixBlock left = decompress();
 		MatrixBlock right = getUncompressed(rhsMatrix);
 		return left.leftIndexingOperations(right, rl, ru, cl, cu, ret, update);
 	}
@@ -218,14 +213,14 @@
 	@Override
 	public MatrixBlock leftIndexingOperations(ScalarObject scalar, int rl, int cl, MatrixBlock ret, UpdateType update) {
 		printDecompressWarning("leftIndexingOperations");
-		MatrixBlock tmp = isCompressed() ? decompress() : this;
+		MatrixBlock tmp = decompress();
 		return tmp.leftIndexingOperations(scalar, rl, cl, ret, update);
 	}
 
 	@Override
-	public MatrixBlock slice(int rl, int ru, int cl, int cu, CacheBlock ret) {
+	public MatrixBlock slice(int rl, int ru, int cl, int cu, boolean deep, CacheBlock ret) {
 		printDecompressWarning("slice");
-		MatrixBlock tmp = isCompressed() ? decompress() : this;
+		MatrixBlock tmp = decompress();
 		return tmp.slice(rl, ru, cl, cu, ret);
 	}
 
@@ -234,7 +229,7 @@
 		int boundaryRlen, int boundaryClen) {
 		printDecompressWarning("slice");
 		try {
-			MatrixBlock tmp = isCompressed() ? decompress() : this;
+			MatrixBlock tmp = decompress();
 			tmp.slice(outlist, range, rowCut, colCut, blen, boundaryRlen, boundaryClen);
 		}
 		catch(DMLRuntimeException ex) {
@@ -245,20 +240,19 @@
 	@Override
 	public MatrixBlock zeroOutOperations(MatrixValue result, IndexRange range, boolean complementary) {
 		printDecompressWarning("zeroOutOperations");
-		MatrixBlock tmp = isCompressed() ? decompress() : this;
+		MatrixBlock tmp = decompress();
 		return tmp.zeroOutOperations(result, range, complementary);
 	}
 
 	@Override
 	public CM_COV_Object cmOperations(CMOperator op) {
 		printDecompressWarning("cmOperations");
-		if(!isCompressed() || isEmptyBlock())
+		if(isEmptyBlock())
 			return super.cmOperations(op);
 		ColGroup grp = _colGroups.get(0);
-
 		MatrixBlock vals = grp.getValuesAsBlock();
 		if(grp.getIfCountsType()){
-			MatrixBlock counts = ColGroupValue.getCountsAsBlock(grp.getCounts(true));
+			MatrixBlock counts = ColGroupValue.getCountsAsBlock(grp.getCounts());
 			return vals.cmOperations(op, counts);
 		}else{
 			return vals.cmOperations(op);
@@ -269,7 +263,7 @@
 	public CM_COV_Object cmOperations(CMOperator op, MatrixBlock weights) {
 		printDecompressWarning("cmOperations");
 		MatrixBlock right = getUncompressed(weights);
-		if(!isCompressed() || isEmptyBlock())
+		if(isEmptyBlock())
 			return super.cmOperations(op, right);
 		ColGroup grp = _colGroups.get(0);
 		if(grp instanceof ColGroupUncompressed)
@@ -280,7 +274,7 @@
 	@Override
 	public CM_COV_Object covOperations(COVOperator op, MatrixBlock that) {
 		printDecompressWarning("covOperations");
-		MatrixBlock left = isCompressed() ? decompress() : this;
+		MatrixBlock left = decompress();
 		MatrixBlock right = getUncompressed(that);
 		return left.covOperations(op, right);
 	}
@@ -288,7 +282,7 @@
 	@Override
 	public CM_COV_Object covOperations(COVOperator op, MatrixBlock that, MatrixBlock weights) {
 		printDecompressWarning("covOperations");
-		MatrixBlock left = isCompressed() ? decompress() : this;
+		MatrixBlock left = decompress();
 		MatrixBlock right1 = getUncompressed(that);
 		MatrixBlock right2 = getUncompressed(weights);
 		return left.covOperations(op, right1, right2);
@@ -298,15 +292,13 @@
 	public MatrixBlock sortOperations(MatrixValue weights, MatrixBlock result) {
 		printDecompressWarning("sortOperations");
 		MatrixBlock right = getUncompressed(weights);
-		if(!isCompressed())
-			return super.sortOperations(right, result);
 		ColGroup grp = _colGroups.get(0);
 		if(grp.getIfCountsType() != true)
 			return grp.getValuesAsBlock().sortOperations(right, result);
 
 		if(right == null) {
 			MatrixBlock vals = grp.getValuesAsBlock();
-			int[] counts = grp.getCounts(true);
+			int[] counts = grp.getCounts();
 			double[] data = (vals.getDenseBlock() != null) ? vals.getDenseBlockValues() : null;
 			SortUtils.sortByValue(0, vals.getNumRows(), data, counts);
 			MatrixBlock counts2 = ColGroupValue.getCountsAsBlock(counts);
@@ -320,7 +312,7 @@
 	public MatrixBlock aggregateBinaryOperations(MatrixIndexes m1Index, MatrixBlock m1Value, MatrixIndexes m2Index,
 		MatrixBlock m2Value, MatrixBlock result, AggregateBinaryOperator op) {
 		printDecompressWarning("aggregateBinaryOperations");
-		MatrixBlock left = isCompressed() ? decompress() : this;
+		MatrixBlock left = decompress();
 		MatrixBlock right = getUncompressed(m2Value);
 		return left.aggregateBinaryOperations(m1Index, left, m2Index, right, result, op);
 	}
@@ -329,7 +321,7 @@
 	public MatrixBlock aggregateTernaryOperations(MatrixBlock m1, MatrixBlock m2, MatrixBlock m3, MatrixBlock ret,
 		AggregateTernaryOperator op, boolean inCP) {
 		printDecompressWarning("aggregateTernaryOperations");
-		MatrixBlock left = isCompressed() ? decompress() : this;
+		MatrixBlock left = decompress();
 		MatrixBlock right1 = getUncompressed(m2);
 		MatrixBlock right2 = getUncompressed(m3);
 		return left.aggregateTernaryOperations(left, right1, right2, ret, op, inCP);
@@ -339,7 +331,7 @@
 	public MatrixBlock uaggouterchainOperations(MatrixBlock mbLeft, MatrixBlock mbRight, MatrixBlock mbOut,
 		BinaryOperator bOp, AggregateUnaryOperator uaggOp) {
 		printDecompressWarning("uaggouterchainOperations");
-		MatrixBlock left = isCompressed() ? decompress() : this;
+		MatrixBlock left = decompress();
 		MatrixBlock right = getUncompressed(mbRight);
 		return left.uaggouterchainOperations(left, right, mbOut, bOp, uaggOp);
 	}
@@ -354,7 +346,7 @@
 	public MatrixBlock groupedAggOperations(MatrixValue tgt, MatrixValue wghts, MatrixValue ret, int ngroups,
 		Operator op, int k) {
 		printDecompressWarning("groupedAggOperations");
-		MatrixBlock left = isCompressed() ? decompress() : this;
+		MatrixBlock left = decompress();
 		MatrixBlock right = getUncompressed(wghts);
 		return left.groupedAggOperations(left, right, ret, ngroups, op, k);
 	}
@@ -362,14 +354,14 @@
 	@Override
 	public MatrixBlock removeEmptyOperations(MatrixBlock ret, boolean rows, boolean emptyReturn, MatrixBlock select) {
 		printDecompressWarning("removeEmptyOperations");
-		MatrixBlock tmp = isCompressed() ? decompress() : this;
+		MatrixBlock tmp = decompress();
 		return tmp.removeEmptyOperations(ret, rows, emptyReturn, select);
 	}
 
 	@Override
 	public MatrixBlock removeEmptyOperations(MatrixBlock ret, boolean rows, boolean emptyReturn) {
 		printDecompressWarning("removeEmptyOperations");
-		MatrixBlock tmp = isCompressed() ? decompress() : this;
+		MatrixBlock tmp = decompress();
 		return tmp.removeEmptyOperations(ret, rows, emptyReturn);
 	}
 
@@ -377,14 +369,14 @@
 	public MatrixBlock rexpandOperations(MatrixBlock ret, double max, boolean rows, boolean cast, boolean ignore,
 		int k) {
 		printDecompressWarning("rexpandOperations");
-		MatrixBlock tmp = isCompressed() ? decompress() : this;
+		MatrixBlock tmp = decompress();
 		return tmp.rexpandOperations(ret, max, rows, cast, ignore, k);
 	}
 
 	@Override
 	public MatrixBlock replaceOperations(MatrixValue result, double pattern, double replacement) {
 		printDecompressWarning("replaceOperations");
-		MatrixBlock tmp = isCompressed() ? decompress() : this;
+		MatrixBlock tmp = decompress();
 		return tmp.replaceOperations(result, pattern, replacement);
 	}
 
@@ -392,7 +384,7 @@
 	public void ctableOperations(Operator op, double scalar, MatrixValue that, CTableMap resultMap,
 		MatrixBlock resultBlock) {
 		printDecompressWarning("ctableOperations");
-		MatrixBlock left = isCompressed() ? decompress() : this;
+		MatrixBlock left = decompress();
 		MatrixBlock right = getUncompressed(that);
 		left.ctableOperations(op, scalar, right, resultMap, resultBlock);
 	}
@@ -401,7 +393,7 @@
 	public void ctableOperations(Operator op, double scalar, double scalar2, CTableMap resultMap,
 		MatrixBlock resultBlock) {
 		printDecompressWarning("ctableOperations");
-		MatrixBlock tmp = isCompressed() ? decompress() : this;
+		MatrixBlock tmp = decompress();
 		tmp.ctableOperations(op, scalar, scalar2, resultMap, resultBlock);
 	}
 
@@ -409,7 +401,7 @@
 	public void ctableOperations(Operator op, MatrixIndexes ix1, double scalar, boolean left, int brlen,
 		CTableMap resultMap, MatrixBlock resultBlock) {
 		printDecompressWarning("ctableOperations");
-		MatrixBlock tmp = isCompressed() ? decompress() : this;
+		MatrixBlock tmp = decompress();
 		tmp.ctableOperations(op, ix1, scalar, left, brlen, resultMap, resultBlock);
 	}
 
@@ -417,7 +409,7 @@
 	public void ctableOperations(Operator op, MatrixValue that, double scalar, boolean ignoreZeros, CTableMap resultMap,
 		MatrixBlock resultBlock) {
 		printDecompressWarning("ctableOperations");
-		MatrixBlock left = isCompressed() ? decompress() : this;
+		MatrixBlock left = decompress();
 		MatrixBlock right = getUncompressed(that);
 		left.ctableOperations(op, right, scalar, ignoreZeros, resultMap, resultBlock);
 	}
@@ -432,7 +424,7 @@
 	@Override
 	public void ctableOperations(Operator op, MatrixValue that, MatrixValue that2, CTableMap resultMap) {
 		printDecompressWarning("ctableOperations");
-		MatrixBlock left = isCompressed() ? decompress() : this;
+		MatrixBlock left = decompress();
 		MatrixBlock right1 = getUncompressed(that);
 		MatrixBlock right2 = getUncompressed(that2);
 		left.ctableOperations(op, right1, right2, resultMap);
@@ -442,7 +434,7 @@
 	public void ctableOperations(Operator op, MatrixValue that, MatrixValue that2, CTableMap resultMap,
 		MatrixBlock resultBlock) {
 		printDecompressWarning("ctableOperations");
-		MatrixBlock left = isCompressed() ? decompress() : this;
+		MatrixBlock left = decompress();
 		MatrixBlock right1 = getUncompressed(that);
 		MatrixBlock right2 = getUncompressed(that2);
 		left.ctableOperations(op, right1, right2, resultMap, resultBlock);
@@ -451,7 +443,7 @@
 	@Override
 	public MatrixBlock ternaryOperations(TernaryOperator op, MatrixBlock m2, MatrixBlock m3, MatrixBlock ret) {
 		printDecompressWarning("ternaryOperations");
-		MatrixBlock left = isCompressed() ? decompress() : this;
+		MatrixBlock left = decompress();
 		MatrixBlock right1 = getUncompressed(m2);
 		MatrixBlock right2 = getUncompressed(m3);
 		return left.ternaryOperations(op, right1, right2, ret);
@@ -467,7 +459,7 @@
 	public MatrixBlock quaternaryOperations(QuaternaryOperator qop, MatrixBlock um, MatrixBlock vm, MatrixBlock wm,
 		MatrixBlock out, int k) {
 		printDecompressWarning("quaternaryOperations");
-		MatrixBlock left = isCompressed() ? decompress() : this;
+		MatrixBlock left = decompress();
 		MatrixBlock right1 = getUncompressed(um);
 		MatrixBlock right2 = getUncompressed(vm);
 		MatrixBlock right3 = getUncompressed(wm);
@@ -491,7 +483,7 @@
 	}
 
 	private static boolean isCompressed(MatrixBlock mb) {
-		return(mb instanceof CompressedMatrixBlock && ((CompressedMatrixBlock) mb).isCompressed());
+		return(mb instanceof CompressedMatrixBlock);
 	}
 
 	private static MatrixBlock getUncompressed(MatrixValue mVal) {
@@ -499,15 +491,29 @@
 	}
 
 	private void printDecompressWarning(String operation) {
-		if(isCompressed()) {
-			LOG.warn("Operation '" + operation + "' not supported yet - decompressing for ULA operations.");
-		}
+		LOG.warn("Operation '" + operation + "' not supported yet - decompressing for ULA operations.");
+		
 	}
 
 	private void printDecompressWarning(String operation, MatrixBlock m2) {
-		if(isCompressed() || isCompressed(m2)) {
+		if(isCompressed(m2)) {
 			LOG.warn("Operation '" + operation + "' not supported yet - decompressing for ULA operations.");
 		}
 	}
 
+
+	@Override
+	public boolean isShallowSerialize() {
+		return false;
+	}
+
+	@Override
+	public boolean isShallowSerialize(boolean inclConvert) {
+		return false;
+	}
+
+	@Override
+	public void toShallowSerializeBlock() {
+		// do nothing
+	}
 }
\ No newline at end of file
diff --git a/src/main/java/org/apache/sysds/runtime/compress/BitmapEncoder.java b/src/main/java/org/apache/sysds/runtime/compress/BitmapEncoder.java
index 9f1a7d0..a7f3f74 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/BitmapEncoder.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/BitmapEncoder.java
@@ -19,8 +19,9 @@
 
 package org.apache.sysds.runtime.compress;
 
-import java.util.ArrayList;
-
+import org.apache.sysds.runtime.compress.utils.AbstractBitmap;
+import org.apache.sysds.runtime.compress.utils.Bitmap;
+import org.apache.sysds.runtime.compress.utils.BitmapLossy;
 import org.apache.sysds.runtime.compress.utils.DblArray;
 import org.apache.sysds.runtime.compress.utils.DblArrayIntListHashMap;
 import org.apache.sysds.runtime.compress.utils.DoubleIntListHashMap;
@@ -32,15 +33,7 @@
  * Static functions for encoding bitmaps in various ways.
  */
 public class BitmapEncoder {
-	/** Size of the blocks used in a blocked bitmap representation. */
-	// Note it is one more than Character.MAX_VALUE.
-	public static final int BITMAP_BLOCK_SZ = 65536;
 
-	public static boolean MATERIALIZE_ZEROS = false;
-
-	public static int getAlignedBlocksize(int blklen) {
-		return blklen + ((blklen % BITMAP_BLOCK_SZ != 0) ? BITMAP_BLOCK_SZ - blklen % BITMAP_BLOCK_SZ : 0);
-	}
 
 	/**
 	 * Generate uncompressed bitmaps for a set of columns in an uncompressed matrix block.
@@ -50,215 +43,65 @@
 	 * @param compSettings The compression settings used for the compression.
 	 * @return uncompressed bitmap representation of the columns
 	 */
-	public static UncompressedBitmap extractBitmap(int[] colIndices, MatrixBlock rawBlock,
+	public static AbstractBitmap extractBitmap(int[] colIndices, MatrixBlock rawBlock,
 		CompressionSettings compSettings) {
 		// note: no sparse column selection reader because low potential
 		// single column selection
+		Bitmap res = null;
 		if(colIndices.length == 1) {
-			return extractBitmap(colIndices[0], rawBlock, !MATERIALIZE_ZEROS, compSettings);
+			res = extractBitmap(colIndices[0], rawBlock, compSettings);
 		}
 		// multiple column selection (general case)
 		else {
 			ReaderColumnSelection reader = null;
 			if(rawBlock.isInSparseFormat() && compSettings.transposeInput)
-				reader = new ReaderColumnSelectionSparse(rawBlock, colIndices, !MATERIALIZE_ZEROS, compSettings);
+				reader = new ReaderColumnSelectionSparse(rawBlock, colIndices, compSettings);
 			else
-				reader = new ReaderColumnSelectionDense(rawBlock, colIndices, !MATERIALIZE_ZEROS, compSettings);
+				reader = new ReaderColumnSelectionDense(rawBlock, colIndices, compSettings);
 
-			return extractBitmap(colIndices, rawBlock, reader);
+			res = extractBitmap(colIndices, rawBlock, reader);
 		}
-	}
-
-	public static UncompressedBitmap extractBitmapFromSample(int[] colIndices, MatrixBlock rawBlock,
-		int[] sampleIndexes, CompressionSettings compSettings) {
-		// note: no sparse column selection reader because low potential
-
-		// single column selection
-		if(colIndices.length == 1) {
-			return extractBitmap(colIndices[0], rawBlock, sampleIndexes, !MATERIALIZE_ZEROS, compSettings);
+		if(compSettings.lossy) {
+			return BitmapLossy.makeBitmapLossy(res);
 		}
-		// multiple column selection (general case)
 		else {
-			return extractBitmap(colIndices,
-				rawBlock,
-				new ReaderColumnSelectionDenseSample(rawBlock, colIndices, sampleIndexes, !MATERIALIZE_ZEROS,
-					compSettings));
+			return res;
 		}
 	}
 
 	/**
-	 * Encodes the bitmap as a series of run lengths and offsets.
+	 * Extract Bitmap from a single column. It will always skip all zero values. It also counts the instances of zero.
 	 * 
-	 * @param offsets uncompressed offset list
-	 * @param len     logical length of the given offset list
-	 * @return compressed version of said bitmap
+	 * @param colIndex     The index of the column
+	 * @param rawBlock     The Raw matrix block (that can be transposed)
+	 * @param compSettings The Compression settings used, in this instance to know if the raw block is transposed.
+	 * @return Bitmap containing the Information of the column.
 	 */
-	public static char[] genRLEBitmap(int[] offsets, int len) {
-		if(len == 0)
-			return new char[0]; // empty list
-
-		// Use an ArrayList for correctness at the expense of temp space
-		ArrayList<Character> buf = new ArrayList<>();
-
-		// 1 + (position of last 1 in the previous run of 1's)
-		// We add 1 because runs may be of length zero.
-		int lastRunEnd = 0;
-
-		// Offset between the end of the previous run of 1's and the first 1 in
-		// the current run. Initialized below.
-		int curRunOff;
-
-		// Length of the most recent run of 1's
-		int curRunLen = 0;
-
-		// Current encoding is as follows:
-		// Negative entry: abs(Entry) encodes the offset to the next lone 1 bit.
-		// Positive entry: Entry encodes offset to next run of 1's. The next
-		// entry in the bitmap holds a run length.
-
-		// Special-case the first run to simplify the loop below.
-		int firstOff = offsets[0];
-
-		// The first run may start more than a short's worth of bits in
-		while(firstOff > Character.MAX_VALUE) {
-			buf.add(Character.MAX_VALUE);
-			buf.add((char) 0);
-			firstOff -= Character.MAX_VALUE;
-			lastRunEnd += Character.MAX_VALUE;
-		}
-
-		// Create the first run with an initial size of 1
-		curRunOff = firstOff;
-		curRunLen = 1;
-
-		// Process the remaining offsets
-		for(int i = 1; i < len; i++) {
-
-			int absOffset = offsets[i];
-
-			// 1 + (last position in run)
-			int curRunEnd = lastRunEnd + curRunOff + curRunLen;
-
-			if(absOffset > curRunEnd || curRunLen >= Character.MAX_VALUE) {
-				// End of a run, either because we hit a run of 0's or because the
-				// number of 1's won't fit in 16 bits. Add run to bitmap and start a new one.
-				buf.add((char) curRunOff);
-				buf.add((char) curRunLen);
-
-				lastRunEnd = curRunEnd;
-				curRunOff = absOffset - lastRunEnd;
-
-				while(curRunOff > Character.MAX_VALUE) {
-					// SPECIAL CASE: Offset to next run doesn't fit into 16 bits.
-					// Add zero-length runs until the offset is small enough.
-					buf.add(Character.MAX_VALUE);
-					buf.add((char) 0);
-					lastRunEnd += Character.MAX_VALUE;
-					curRunOff -= Character.MAX_VALUE;
-				}
-
-				curRunLen = 1;
-			}
-			else {
-				// Middle of a run
-				curRunLen++;
-			}
-		}
-
-		if(curRunLen >= 1) {
-			// Edge case, if the last run overlaps the character length bound.
-			if(curRunOff + curRunLen > Character.MAX_VALUE) {
-				buf.add(Character.MAX_VALUE);
-				buf.add((char) 0);
-				curRunOff -= Character.MAX_VALUE;
-			}
-
-			buf.add((char) curRunOff);
-			buf.add((char) curRunLen);
-		}
-
-		// Convert wasteful ArrayList to packed array.
-		char[] ret = new char[buf.size()];
-		for(int i = 0; i < buf.size(); i++)
-			ret[i] = buf.get(i);
-		return ret;
-	}
-
-	/**
-	 * Encodes the bitmap in blocks of offsets. Within each block, the bits are stored as absolute offsets from the
-	 * start of the block.
-	 * 
-	 * @param offsets uncompressed offset list
-	 * @param len     logical length of the given offset list
-	 * 
-	 * @return compressed version of said bitmap
-	 */
-	public static char[] genOffsetBitmap(int[] offsets, int len) {
-		int lastOffset = offsets[len - 1];
-
-		// Build up the blocks
-		int numBlocks = (lastOffset / BITMAP_BLOCK_SZ) + 1;
-		// To simplify the logic, we make two passes.
-		// The first pass divides the offsets by block.
-		int[] blockLengths = new int[numBlocks];
-
-		for(int ix = 0; ix < len; ix++) {
-			int val = offsets[ix];
-			int blockForVal = val / BITMAP_BLOCK_SZ;
-			blockLengths[blockForVal]++;
-		}
-
-		// The second pass creates the blocks.
-		int totalSize = numBlocks;
-		for(int block = 0; block < numBlocks; block++) {
-			totalSize += blockLengths[block];
-		}
-		char[] encodedBlocks = new char[totalSize];
-
-		int inputIx = 0;
-		int blockStartIx = 0;
-		for(int block = 0; block < numBlocks; block++) {
-			int blockSz = blockLengths[block];
-
-			// First entry in the block is number of bits
-			encodedBlocks[blockStartIx] = (char) blockSz;
-
-			for(int i = 0; i < blockSz; i++) {
-				encodedBlocks[blockStartIx + i + 1] = (char) (offsets[inputIx + i] % BITMAP_BLOCK_SZ);
-			}
-
-			inputIx += blockSz;
-			blockStartIx += blockSz + 1;
-		}
-
-		return encodedBlocks;
-	}
-
-	private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawBlock, boolean skipZeros,
-		CompressionSettings compSettings) {
+	private static Bitmap extractBitmap(int colIndex, MatrixBlock rawBlock, CompressionSettings compSettings) {
 		// probe map for distinct items (for value or value groups)
 		DoubleIntListHashMap distinctVals = new DoubleIntListHashMap();
 
 		// scan rows and probe/build distinct items
 		final int m = compSettings.transposeInput ? rawBlock.getNumColumns() : rawBlock.getNumRows();
+		int numZeros = 0;
 
-		if(rawBlock.isInSparseFormat() // SPARSE
-			&& compSettings.transposeInput) {
+		if(rawBlock.isInSparseFormat() && compSettings.transposeInput) { // SPARSE and Transposed.
 			SparseBlock a = rawBlock.getSparseBlock();
 			if(a != null && !a.isEmpty(colIndex)) {
 				int apos = a.pos(colIndex);
 				int alen = a.size(colIndex);
+				numZeros = m - alen;
 				int[] aix = a.indexes(colIndex);
 				double[] avals = a.values(colIndex);
 
-				IntArrayList lstPtr0 = new IntArrayList(); // for 0 values
-				int last = -1;
+				// IntArrayList lstPtr0 = new IntArrayList(); // for 0 values
+				// int last = -1;
 				// iterate over non-zero entries but fill in zeros
 				for(int j = apos; j < apos + alen; j++) {
 					// fill in zero values
-					if(!skipZeros)
-						for(int k = last + 1; k < aix[j]; k++)
-							lstPtr0.appendValue(k);
+					// if(!skipZeros)
+					// for(int k = last + 1; k < aix[j]; k++)
+					// lstPtr0.appendValue(k);
 					// handle non-zero value
 					IntArrayList lstPtr = distinctVals.get(avals[j]);
 					if(lstPtr == null) {
@@ -266,29 +109,29 @@
 						distinctVals.appendValue(avals[j], lstPtr);
 					}
 					lstPtr.appendValue(aix[j]);
-					last = aix[j];
+					// last = aix[j];
 				}
 				// fill in remaining zero values
-				if(!skipZeros) {
-					for(int k = last + 1; k < m; k++)
-						lstPtr0.appendValue(k);
-					if(lstPtr0.size() > 0)
-						distinctVals.appendValue(0, lstPtr0);
-				}
+				// if(!skipZeros) {
+				// for(int k = last + 1; k < m; k++)
+				// lstPtr0.appendValue(k);
+				// if(lstPtr0.size() > 0)
+				// distinctVals.appendValue(0, lstPtr0);
+				// }
 			}
-			else if(!skipZeros) { // full 0 column
-				IntArrayList lstPtr = new IntArrayList();
-				for(int i = 0; i < m; i++)
-					lstPtr.appendValue(i);
-				distinctVals.appendValue(0, lstPtr);
-			}
+			// else if(!skipZeros) { // full 0 column
+			// IntArrayList lstPtr = new IntArrayList();
+			// for(int i = 0; i < m; i++)
+			// lstPtr.appendValue(i);
+			// distinctVals.appendValue(0, lstPtr);
+			// }
 		}
 		else // GENERAL CASE
 		{
 			for(int i = 0; i < m; i++) {
 				double val = compSettings.transposeInput ? rawBlock.quickGetValue(colIndex, i) : rawBlock
 					.quickGetValue(i, colIndex);
-				if(val != 0 || !skipZeros) {
+				if(val != 0) {
 					IntArrayList lstPtr = distinctVals.get(val);
 					if(lstPtr == null) {
 						lstPtr = new IntArrayList();
@@ -296,45 +139,23 @@
 					}
 					lstPtr.appendValue(i);
 				}
-			}
-		}
-
-		return new UncompressedBitmap(distinctVals);
-	}
-
-	private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawBlock, int[] sampleIndexes,
-		boolean skipZeros, CompressionSettings compSettings) {
-		// note: general case only because anyway binary search for small samples
-
-		// probe map for distinct items (for value or value groups)
-		DoubleIntListHashMap distinctVals = new DoubleIntListHashMap();
-
-		// scan rows and probe/build distinct items
-		final int m = sampleIndexes.length;
-		for(int i = 0; i < m; i++) {
-			int rowIndex = sampleIndexes[i];
-			double val = compSettings.transposeInput ? rawBlock.quickGetValue(colIndex, rowIndex) : rawBlock
-				.quickGetValue(rowIndex, colIndex);
-			if(val != 0 || !skipZeros) {
-				IntArrayList lstPtr = distinctVals.get(val);
-				if(lstPtr == null) {
-					lstPtr = new IntArrayList();
-					distinctVals.appendValue(val, lstPtr);
+				else {
+					numZeros++;
 				}
-				lstPtr.appendValue(i);
 			}
 		}
 
-		return new UncompressedBitmap(distinctVals);
+		return Bitmap.makeBitmap(distinctVals, numZeros);
 	}
 
-	private static UncompressedBitmap extractBitmap(int[] colIndices, MatrixBlock rawBlock,
-		ReaderColumnSelection rowReader) {
+	private static Bitmap extractBitmap(int[] colIndices, MatrixBlock rawBlock, ReaderColumnSelection rowReader) {
 		// probe map for distinct items (for value or value groups)
 		DblArrayIntListHashMap distinctVals = new DblArrayIntListHashMap();
 
 		// scan rows and probe/build distinct items
 		DblArray cellVals = null;
+
+		int zero = 0;
 		while((cellVals = rowReader.nextRow()) != null) {
 			IntArrayList lstPtr = distinctVals.get(cellVals);
 			if(lstPtr == null) {
@@ -342,9 +163,12 @@
 				lstPtr = new IntArrayList();
 				distinctVals.appendValue(new DblArray(cellVals), lstPtr);
 			}
+			zero += DblArray.isZero(cellVals) ? 1 : 0;
+
 			lstPtr.appendValue(rowReader.getCurrentRowIndex());
 		}
 
-		return new UncompressedBitmap(distinctVals, colIndices.length);
+		return Bitmap.makeBitmap(distinctVals, colIndices.length, zero);
 	}
+
 }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java
index 22a810c..2913e99 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java
@@ -33,6 +33,7 @@
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Future;
 
+import org.apache.commons.lang.NotImplementedException;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysds.lops.MMTSJ.MMTSJType;
@@ -41,10 +42,8 @@
 import org.apache.sysds.runtime.compress.colgroup.ColGroup;
 import org.apache.sysds.runtime.compress.colgroup.ColGroup.CompressionType;
 import org.apache.sysds.runtime.compress.colgroup.ColGroupConverter;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupDDC;
 import org.apache.sysds.runtime.compress.colgroup.ColGroupDDC1;
 import org.apache.sysds.runtime.compress.colgroup.ColGroupIO;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupOffset;
 import org.apache.sysds.runtime.compress.colgroup.ColGroupUncompressed;
 import org.apache.sysds.runtime.compress.colgroup.ColGroupValue;
 import org.apache.sysds.runtime.compress.colgroup.DenseRowIterator;
@@ -80,13 +79,16 @@
 	private static final Log LOG = LogFactory.getLog(CompressedMatrixBlock.class.getName());
 	private static final long serialVersionUID = 7319372019143154058L;
 
-	// Threshold for when to parallelize the aggregation functions.
-	private static final long MIN_PAR_AGG_THRESHOLD = 16 * 1024 * 1024; // 16MB
-	protected CompressionStatistics _stats = null;
+	/** Threshold for when to parallelize the aggregation functions. */
+	private static final long MIN_PAR_AGG_THRESHOLD = 8 * 1024 * 1024; // 8MB
+
+	protected boolean _lossy;
 	protected boolean _sharedDDC1Dict = false;
 
 	/**
 	 * Constructor for building an empty Compressed Matrix block object.
+	 * 
+	 * OBS! Only to be used for serialization.
 	 */
 	public CompressedMatrixBlock() {
 		super();
@@ -99,7 +101,7 @@
 	 * @param cl     number of columns
 	 * @param sparse true if the UNCOMPRESSED representation of the block should be sparse
 	 */
-	public CompressedMatrixBlock(int rl, int cl, boolean sparse) {
+	protected CompressedMatrixBlock(int rl, int cl, boolean sparse) {
 		super(rl, cl, sparse);
 	}
 
@@ -109,7 +111,7 @@
 	 * 
 	 * @param that matrix block
 	 */
-	public CompressedMatrixBlock(MatrixBlock that) {
+	protected CompressedMatrixBlock(MatrixBlock that) {
 		super(that.getNumRows(), that.getNumColumns(), that.isInSparseFormat());
 
 		// shallow copy (deep copy on compression, prevents unnecessary copy)
@@ -120,13 +122,9 @@
 		nonZeros = that.getNonZeros();
 	}
 
-	public CompressionStatistics getCompressionStatistics() {
-		return _stats;
-	}
-
-	public boolean isCompressed() {
-		return(_colGroups != null);
-	}
+	// public CompressionStatistics getCompressionStatistics() {
+	// return _stats;
+	// }
 
 	public boolean isSingleUncompressedGroup() {
 		return(_colGroups != null && _colGroups.size() == 1 &&
@@ -147,9 +145,6 @@
 	 * @return a new uncompressed matrix block containing the contents of this block
 	 */
 	public MatrixBlock decompress() {
-		// early abort for not yet compressed blocks
-		if(!isCompressed())
-			return new MatrixBlock(this);
 
 		Timing time = new Timing(true);
 
@@ -187,13 +182,11 @@
 	 * @return a new uncompressed matrix block containing the contents of this block
 	 */
 	public MatrixBlock decompress(int k) {
-		// early abort for not yet compressed blocks
-		if(!isCompressed())
-			return new MatrixBlock(this);
+
 		if(k <= 1)
 			return decompress();
 
-		Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
+		Timing time = new Timing(true);
 
 		MatrixBlock ret = new MatrixBlock(rlen, clen, sparse, nonZeros).allocateBlock();
 
@@ -201,7 +194,7 @@
 		try {
 			ExecutorService pool = CommonThreadPool.get(k);
 			int rlen = getNumRows();
-			int blklen = BitmapEncoder.getAlignedBlocksize((int) (Math.ceil((double) rlen / k)));
+			int blklen = getAlignedBlockSize((int) (Math.ceil((double) rlen / k)));
 			ArrayList<DecompressTask> tasks = new ArrayList<>();
 			for(int i = 0; i < k & i * blklen < getNumRows(); i++)
 				tasks.add(new DecompressTask(_colGroups, ret, i * blklen, Math.min((i + 1) * blklen, rlen)));
@@ -210,15 +203,15 @@
 			for(Future<Object> rt : rtasks)
 				rt.get(); // error handling
 		}
-		catch(Exception ex) {
-			throw new DMLRuntimeException(ex);
+		catch(InterruptedException | ExecutionException ex) {
+			LOG.error("Parallel decompression failed defaulting to non parallel implementation " + ex.getMessage());
+			return decompress();
 		}
 
 		// post-processing
 		ret.setNonZeros(nonZeros);
 
-		if(LOG.isDebugEnabled())
-			LOG.debug("decompressed block w/ k=" + k + " in " + time.stop() + "ms.");
+		LOG.debug("decompressed block w/ k=" + k + " in " + time.stop() + "ms.");
 
 		return ret;
 	}
@@ -229,12 +222,8 @@
 	 * @return an upper bound on the memory used to store this compressed block considering class overhead.
 	 */
 	public long estimateCompressedSizeInMemory() {
-		if(!isCompressed())
-			return Long.MAX_VALUE;
-
 		long total = baseSizeInMemory();
 
-		// TODO scale up based on number of col groups.
 		for(ColGroup grp : _colGroups)
 			total += grp.estimateInMemorySize();
 
@@ -258,11 +247,7 @@
 
 		total += 40; // Matrix Block elements
 		total += 8; // Col Group Ref
-		total += 1 + 7; // Booleans plus padding
-
-		// TODO: Reduce memory usage from CompressionStatistics
-		total += 8; // Stats object reference
-		total += CompressionStatistics.getSizeInMemory();
+		total += 2 + 6; // Booleans plus padding
 
 		total += 40; // Col Group Array List
 		return total;
@@ -270,17 +255,17 @@
 
 	@Override
 	public double quickGetValue(int r, int c) {
-		if(!isCompressed()) {
-			return super.quickGetValue(r, c);
-		}
 
-		// find column group according to col index
+		// TODO Optimize Quick Get Value, to located the correct column group without having to search for it
+
 		ColGroup grp = null;
-		for(ColGroup group : _colGroups)
+		for(ColGroup group : _colGroups) {
+
 			if(Arrays.binarySearch(group.getColIndices(), c) >= 0) {
 				grp = group;
 				break;
 			}
+		}
 
 		// find row value
 		return grp.get(r, c);
@@ -295,65 +280,38 @@
 		long ret = 22;
 		for(ColGroup grp : _colGroups) {
 			ret += 1; // type info
+			// TODO: Handle shared dictionary
+
 			ret += grp.getExactSizeOnDisk();
 		}
 		return ret;
 	}
 
 	@Override
-	public boolean isShallowSerialize() {
-		return false;
-	}
-
-	@Override
-	public boolean isShallowSerialize(boolean inclConvert) {
-		return false;
-	}
-
-	@Override
-	public void toShallowSerializeBlock() {
-		// do nothing
-	}
-
-	@Override
 	public void readFields(DataInput in) throws IOException {
-		boolean compressed = in.readBoolean();
-
-		// deserialize uncompressed block
-		if(!compressed) {
-			super.readFields(in);
-			return;
-		}
 		// deserialize compressed block
 		rlen = in.readInt();
 		clen = in.readInt();
 		nonZeros = in.readLong();
 		_sharedDDC1Dict = in.readBoolean();
+		_lossy = in.readBoolean();
 		_colGroups = ColGroupIO.readGroups(in, _sharedDDC1Dict);
 	}
 
 	@Override
 	public void write(DataOutput out) throws IOException {
-		out.writeBoolean(isCompressed());
-
-		// serialize uncompressed block
-		if(!isCompressed()) {
-			super.write(out);
-			return;
-		}
-
 		// serialize compressed matrix block
 		out.writeInt(rlen);
 		out.writeInt(clen);
 		out.writeLong(nonZeros);
 		out.writeBoolean(_sharedDDC1Dict);
-
+		out.writeBoolean(_lossy);
 		ColGroupIO.writeGroups(out, _sharedDDC1Dict, _colGroups);
 	}
 
 	/**
 	 * Redirects the default java serialization via externalizable to our default hadoop writable serialization for
-	 * efficient broadcast/rdd deserialization.
+	 * efficient broadcast/rdd de-serialization.
 	 * 
 	 * @param is object input
 	 * @throws IOException if IOException occurs
@@ -403,10 +361,6 @@
 
 	@Override
 	public MatrixBlock scalarOperations(ScalarOperator sop, MatrixValue result) {
-		// call uncompressed matrix scalar if necessary
-		if(!isCompressed()) {
-			return super.scalarOperations(sop, result);
-		}
 
 		// allocate the output matrix block
 		CompressedMatrixBlock ret = null;
@@ -431,12 +385,6 @@
 
 	@Override
 	public MatrixBlock append(MatrixBlock that, MatrixBlock ret) {
-		// call uncompressed matrix append if necessary
-		if(!isCompressed()) {
-			if(that instanceof CompressedMatrixBlock)
-				that = ((CompressedMatrixBlock) that).decompress();
-			return super.append(that, ret, true);
-		}
 
 		final int m = rlen;
 		final int n = clen + that.getNumColumns();
@@ -458,7 +406,7 @@
 
 		// copy of rhs column groups w/ col index shifting
 		if(!(that instanceof CompressedMatrixBlock)) {
-			that = CompressedMatrixBlockFactory.compress(that);
+			that = CompressedMatrixBlockFactory.compress(that).getLeft();
 		}
 
 		List<ColGroup> inColGroups = ((CompressedMatrixBlock) that)._colGroups;
@@ -475,10 +423,6 @@
 
 	@Override
 	public MatrixBlock chainMatrixMultOperations(MatrixBlock v, MatrixBlock w, MatrixBlock out, ChainType ctype) {
-		// call uncompressed matrix mult if necessary
-		if(!isCompressed()) {
-			return super.chainMatrixMultOperations(v, w, out, ctype);
-		}
 
 		if(this.getNumColumns() != v.getNumRows())
 			throw new DMLRuntimeException(
@@ -522,10 +466,6 @@
 	@Override
 	public MatrixBlock chainMatrixMultOperations(MatrixBlock v, MatrixBlock w, MatrixBlock out, ChainType ctype,
 		int k) {
-		// call uncompressed matrix mult if necessary
-		if(!isCompressed()) {
-			return super.chainMatrixMultOperations(v, w, out, ctype, k);
-		}
 
 		if(this.getNumColumns() != v.getNumRows())
 			throw new DMLRuntimeException(
@@ -572,10 +512,6 @@
 	@Override
 	public MatrixBlock aggregateBinaryOperations(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret,
 		AggregateBinaryOperator op) {
-		// call uncompressed matrix mult if necessary
-		if(!isCompressed()) {
-			return super.aggregateBinaryOperations(m1, m2, ret, op);
-		}
 
 		// Should not happen that it is a single uncompressed group.
 		// multi-threaded MM of single uncompressed ColGroup
@@ -614,10 +550,7 @@
 				// prepare the other input (including decompression if necessary)
 			boolean right = (m1 == this);
 			MatrixBlock that = right ? m2 : m1;
-			if(that instanceof CompressedMatrixBlock) {
-				that = ((CompressedMatrixBlock) that).isCompressed() ? ((CompressedMatrixBlock) that)
-					.decompress() : that;
-			}
+			that = that instanceof CompressedMatrixBlock ? ((CompressedMatrixBlock) that).decompress() : that;
 
 			// transpose for sequential repeated column access
 			if(right) {
@@ -662,17 +595,13 @@
 	@Override
 	public MatrixBlock aggregateUnaryOperations(AggregateUnaryOperator op, MatrixValue result, int blen,
 		MatrixIndexes indexesIn, boolean inCP) {
-		// call uncompressed matrix mult if necessary
-		if(!isCompressed()) {
-			return super.aggregateUnaryOperations(op, result, blen, indexesIn, inCP);
-		}
 
 		// check for supported operations
 		if(!(op.aggOp.increOp.fn instanceof KahanPlus || op.aggOp.increOp.fn instanceof KahanPlusSq ||
 			(op.aggOp.increOp.fn instanceof Builtin &&
 				(((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MIN ||
 					((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX)))) {
-			throw new DMLRuntimeException("Unary aggregates other than sum/sumsq/min/max not supported yet.");
+			throw new NotImplementedException("Unary aggregate " + op.aggOp.increOp.fn + " not supported yet.");
 		}
 
 		Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
@@ -730,8 +659,7 @@
 				ExecutorService pool = CommonThreadPool.get(op.getNumThreads());
 				ArrayList<UnaryAggregateTask> tasks = new ArrayList<>();
 				if(op.indexFn instanceof ReduceCol && grpParts.length > 0) {
-					int blklen = BitmapEncoder
-						.getAlignedBlocksize((int) (Math.ceil((double) rlen / op.getNumThreads())));
+					int blklen = getAlignedBlockSize((int) (Math.ceil((double) rlen / op.getNumThreads())));
 					for(int i = 0; i < op.getNumThreads() & i * blklen < rlen; i++)
 						tasks.add(
 							new UnaryAggregateTask(grpParts[0], ret, i * blklen, Math.min((i + 1) * blklen, rlen), op));
@@ -810,21 +738,21 @@
 		int rl, int ru) {
 
 		// Seems misplaced logic for when to use CacheDDC
-		boolean cacheDDC1 = op.indexFn instanceof ReduceCol &&
-			op.aggOp.increOp.fn instanceof KahanPlus // rowSums
-			&& ColGroupOffset.ALLOW_CACHE_CONSCIOUS_ROWSUMS && ru - rl > ColGroupOffset.WRITE_CACHE_BLKSZ / 2;
+		boolean cacheDDC1 = false;
+		// op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof KahanPlus // rowSums
+		// && ColGroupOffset.ALLOW_CACHE_CONSCIOUS_ROWSUMS && ru - rl > CompressionSettings.BITMAP_BLOCK_SZ;
 
 		// process cache-conscious DDC1 groups (adds to output)
 		// TODO: Fix such that is is able to sharing even if ColGroupDDC2
-		if(cacheDDC1) {
-			ArrayList<ColGroupDDC1> tmp = new ArrayList<>();
-			for(ColGroup grp : groups)
-				if(grp instanceof ColGroupDDC1)
-					tmp.add((ColGroupDDC1) grp);
-			if(!tmp.isEmpty())
-				ColGroupDDC1
-					.computeRowSums(tmp.toArray(new ColGroupDDC1[0]), ret, KahanPlus.getKahanPlusFnObject(), rl, ru);
-		}
+		// if(cacheDDC1) {
+		// ArrayList<ColGroupDDC1> tmp = new ArrayList<>();
+		// for(ColGroup grp : groups)
+		// if(grp instanceof ColGroupDDC1)
+		// tmp.add((ColGroupDDC1) grp);
+		// if(!tmp.isEmpty())
+		// ColGroupDDC1
+		// .computeRowSums(tmp.toArray(new ColGroupDDC1[0]), ret, KahanPlus.getKahanPlusFnObject(), rl, ru);
+		// }
 
 		// process remaining groups (adds to output)
 		// note: UC group never passed into this function
@@ -835,15 +763,6 @@
 
 	@Override
 	public MatrixBlock transposeSelfMatrixMultOperations(MatrixBlock out, MMTSJType tstype) {
-		// call uncompressed matrix mult if necessary
-		if(!isCompressed()) {
-			return super.transposeSelfMatrixMultOperations(out, tstype);
-		}
-
-		// single-threaded transpose self MM of single uncompressed ColGroup
-		if(isSingleUncompressedGroup()) {
-			return ((ColGroupUncompressed) _colGroups.get(0)).getData().transposeSelfMatrixMultOperations(out, tstype);
-		}
 
 		Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
 
@@ -874,15 +793,9 @@
 
 	@Override
 	public MatrixBlock transposeSelfMatrixMultOperations(MatrixBlock out, MMTSJType tstype, int k) {
-		// call uncompressed matrix mult if necessary
-		if(!isCompressed()) {
-			return super.transposeSelfMatrixMultOperations(out, tstype, k);
-		}
 
-		// multi-threaded transpose self MM of single uncompressed ColGroup
-		if(isSingleUncompressedGroup()) {
-			return ((ColGroupUncompressed) _colGroups.get(0)).getData()
-				.transposeSelfMatrixMultOperations(out, tstype, k);
+		if(k <= 1) {
+			return transposeSelfMatrixMultOperations(out, tstype);
 		}
 
 		Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
@@ -939,7 +852,7 @@
 		result.allocateDenseBlock();
 
 		// delegate matrix-vector operation to each column group
-		rightMultByVector(_colGroups, vector, result, true, 0, result.getNumRows());
+		rightMultByVector(_colGroups, vector, result, 0, result.getNumRows());
 
 		// post-processing
 		result.recomputeNonZeros();
@@ -967,7 +880,7 @@
 			// compute remaining compressed column groups in parallel
 			ExecutorService pool = CommonThreadPool.get(k);
 			int rlen = getNumRows();
-			int blklen = BitmapEncoder.getAlignedBlocksize((int) (Math.ceil((double) rlen / k)));
+			int blklen = getAlignedBlockSize((int) (Math.ceil((double) rlen / k)));
 			ArrayList<RightMatrixMultTask> tasks = new ArrayList<>();
 			for(int i = 0; i < k & i * blklen < getNumRows(); i++)
 				tasks.add(
@@ -986,20 +899,20 @@
 		}
 	}
 
-	private static void rightMultByVector(List<ColGroup> groups, MatrixBlock vect, MatrixBlock ret, boolean inclUC,
-		int rl, int ru) {
+	private static void rightMultByVector(List<ColGroup> groups, MatrixBlock vect, MatrixBlock ret, int rl, int ru) {
 		ColGroupValue.setupThreadLocalMemory(getMaxNumValues(groups));
 
-		boolean cacheDDC1 = ru - rl > ColGroupOffset.WRITE_CACHE_BLKSZ;
+		boolean cacheDDC1 = ru - rl > CompressionSettings.BITMAP_BLOCK_SZ * 2;
 
 		// process uncompressed column group (overwrites output)
-		if(inclUC) {
-			for(ColGroup grp : groups)
-				if(grp instanceof ColGroupUncompressed)
-					grp.rightMultByVector(vect, ret, rl, ru);
-		}
+		// if(inclUC) {
+		// for(ColGroup grp : groups)
+		// if(grp instanceof ColGroupUncompressed)
+		// grp.rightMultByVector(vect, ret, rl, ru);
+		// }
 
 		// process cache-conscious DDC1 groups (adds to output)
+
 		if(cacheDDC1) {
 			ArrayList<ColGroupDDC1> tmp = new ArrayList<>();
 			for(ColGroup grp : groups)
@@ -1008,13 +921,18 @@
 			if(!tmp.isEmpty())
 				ColGroupDDC1.rightMultByVector(tmp.toArray(new ColGroupDDC1[0]), vect, ret, rl, ru);
 		}
-
 		// process remaining groups (adds to output)
-		for(ColGroup grp : groups)
-			if(!(grp instanceof ColGroupUncompressed) && !(cacheDDC1 && grp instanceof ColGroupDDC1))
+
+		for(ColGroup grp : groups) {
+			if(!(cacheDDC1 && grp instanceof ColGroupDDC1)) {
+
 				grp.rightMultByVector(vect, ret, rl, ru);
 
+			}
+		}
+
 		ColGroupValue.cleanupThreadLocalMemory();
+
 	}
 
 	/**
@@ -1053,15 +971,15 @@
 		result.recomputeNonZeros();
 	}
 
-	private static void leftMultByVectorTranspose(List<ColGroup> colGroups, ColGroupDDC vector, MatrixBlock result) {
-		// initialize and allocate the result
-		result.reset();
-		// delegate matrix-vector operation to each column group
-		for(ColGroup grp : colGroups)
-			grp.leftMultByRowVector(vector, result);
-		// post-processing
-		result.recomputeNonZeros();
-	}
+	// private static void leftMultByVectorTranspose(List<ColGroup> colGroups, ColGroupDDC vector, MatrixBlock result) {
+	// // initialize and allocate the result
+	// result.reset();
+	// // delegate matrix-vector operation to each column group
+	// for(ColGroup grp : colGroups)
+	// grp.leftMultByRowVector(vector, result);
+	// // post-processing
+	// result.recomputeNonZeros();
+	// }
 
 	/**
 	 * Multi-thread version of leftMultByVectorTranspose.
@@ -1114,7 +1032,7 @@
 	private static void leftMultByTransposeSelf(List<ColGroup> groups, MatrixBlock result, int gl, int gu) {
 		final int numRows = groups.get(0).getNumRows();
 		final int numGroups = groups.size();
-		final boolean containsUC = containsUncompressedColGroup(groups);
+		// final boolean containsUC = containsUncompressedColGroup(groups);
 
 		// preallocated dense tmp matrix blocks
 		MatrixBlock lhs = new MatrixBlock(1, numRows, false);
@@ -1133,28 +1051,28 @@
 			int[] ixgroup = group.getColIndices();
 			List<ColGroup> tmpList = groups.subList(i, numGroups);
 
-			if(group instanceof ColGroupDDC // single DDC group
-				&& ixgroup.length == 1 && !containsUC && numRows < BitmapEncoder.BITMAP_BLOCK_SZ) {
-				// compute vector-matrix partial result
-				leftMultByVectorTranspose(tmpList, (ColGroupDDC) group, tmpret);
+			// if(group instanceof ColGroupDDC // single DDC group
+			// && ixgroup.length == 1 && !containsUC && numRows < CompressionSettings.BITMAP_BLOCK_SZ) {
+			// // compute vector-matrix partial result
+			// leftMultByVectorTranspose(tmpList, (ColGroupDDC) group, tmpret);
 
-				// write partial results (disjoint non-zeros)
-				LinearAlgebraUtils.copyNonZerosToUpperTriangle(result, tmpret, ixgroup[0]);
-			}
-			else {
-				// for all uncompressed lhs columns vectors
-				for(int j = 0; j < ixgroup.length; j++) {
-					group.decompressToBlock(lhs, j);
+			// // write partial results (disjoint non-zeros)
+			// LinearAlgebraUtils.copyNonZerosToUpperTriangle(result, tmpret, ixgroup[0]);
+			// }
+			// else {
+			// for all uncompressed lhs columns vectors
+			for(int j = 0; j < ixgroup.length; j++) {
+				group.decompressToBlock(lhs, j);
 
-					if(!lhs.isEmptyBlock(false)) {
-						// compute vector-matrix partial result
-						leftMultByVectorTranspose(tmpList, lhs, tmpret, false, false);
+				if(!lhs.isEmptyBlock(false)) {
+					// compute vector-matrix partial result
+					leftMultByVectorTranspose(tmpList, lhs, tmpret, false, false);
 
-						// write partial results (disjoint non-zeros)
-						LinearAlgebraUtils.copyNonZerosToUpperTriangle(result, tmpret, ixgroup[j]);
-					}
+					// write partial results (disjoint non-zeros)
+					LinearAlgebraUtils.copyNonZerosToUpperTriangle(result, tmpret, ixgroup[j]);
 				}
 			}
+			// }
 		}
 
 		// post processing
@@ -1205,12 +1123,12 @@
 		return null;
 	}
 
-	private static boolean containsUncompressedColGroup(List<ColGroup> groups) {
-		for(ColGroup grp : groups)
-			if(grp instanceof ColGroupUncompressed)
-				return true;
-		return false;
-	}
+	// private static boolean containsUncompressedColGroup(List<ColGroup> groups) {
+	// 	for(ColGroup grp : groups)
+	// 		if(grp instanceof ColGroupUncompressed)
+	// 			return true;
+	// 	return false;
+	// }
 
 	private static class LeftMatrixMultTask implements Callable<Object> {
 		private final ArrayList<ColGroup> _groups;
@@ -1254,7 +1172,7 @@
 
 		@Override
 		public Long call() {
-			rightMultByVector(_groups, _vect, _ret, false, _rl, _ru);
+			rightMultByVector(_groups, _vect, _ret, _rl, _ru);
 			return _ret.recomputeNonZeros(_rl, _ru - 1, 0, 0);
 		}
 	}
@@ -1353,4 +1271,15 @@
 		}
 	}
 
+	/**
+	 * Calculates the Aligned block size if the block is a certain length.
+	 * 
+	 * @param blklen The Entered block length
+	 * @return The total size of aligned blocks rounded the entered value up to the next BITMAP_BLOCK_SZ
+	 */
+	private static int getAlignedBlockSize(int blklen) {
+		return blklen + ((blklen % CompressionSettings.BITMAP_BLOCK_SZ != 0) ? CompressionSettings.BITMAP_BLOCK_SZ -
+			blklen % CompressionSettings.BITMAP_BLOCK_SZ : 0);
+	}
+
 }
\ No newline at end of file
diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlockFactory.java b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlockFactory.java
index 77e94bc..0cbd8af 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlockFactory.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlockFactory.java
@@ -24,6 +24,8 @@
 import java.util.List;
 import java.util.Map.Entry;
 
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.commons.lang3.tuple.Pair;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysds.runtime.DMLRuntimeException;
@@ -49,16 +51,16 @@
 	private static final Log LOG = LogFactory.getLog(CompressedMatrixBlockFactory.class.getName());
 	private static final CompressionSettings defaultCompressionSettings = new CompressionSettingsBuilder().create();
 
-	public static MatrixBlock compress(MatrixBlock mb) {
+	public static Pair<MatrixBlock, CompressionStatistics> compress(MatrixBlock mb) {
 		// Default sequential execution of compression
 		return compress(mb, 1, defaultCompressionSettings);
 	}
 
-	public static MatrixBlock compress(MatrixBlock mb, CompressionSettings customSettings) {
+	public static Pair<MatrixBlock, CompressionStatistics> compress(MatrixBlock mb, CompressionSettings customSettings) {
 		return compress(mb, 1, customSettings);
 	}
 
-	public static MatrixBlock compress(MatrixBlock mb, int k) {
+	public static Pair<MatrixBlock, CompressionStatistics> compress(MatrixBlock mb, int k) {
 		return compress(mb, k, defaultCompressionSettings);
 	}
 
@@ -77,9 +79,9 @@
 	 * @param compSettings The Compression settings used
 	 * @return A compressed matrix block.
 	 */
-	public static MatrixBlock compress(MatrixBlock mb, int k, CompressionSettings compSettings) {
+	public static Pair<MatrixBlock, CompressionStatistics> compress(MatrixBlock mb, int k, CompressionSettings compSettings) {
 		// Check for redundant compression
-		if(mb instanceof CompressedMatrixBlock && ((CompressedMatrixBlock) mb).isCompressed()) {
+		if(mb instanceof CompressedMatrixBlock) {
 			throw new DMLRuntimeException("Redundant compression, block already compressed.");
 		}
 
@@ -117,14 +119,14 @@
 
 		if(sizeInfos.colsC.isEmpty()) {
 			LOG.warn("Abort block compression because all columns are incompressible.");
-			return new MatrixBlock().copyShallow(mb);
+			return new ImmutablePair<>(new MatrixBlock().copyShallow(mb), _stats);
 		}
 		// --------------------------------------------------
 
 		// --------------------------------------------------
 		// PHASE 2: Grouping columns
 		// Divide the columns into column groups.
-		List<int[]> coCodeColGroups = PlanningCoCoder.findCocodesByPartitioning(sizeEstimator, sizeInfos, numRows, k);
+		List<int[]> coCodeColGroups = PlanningCoCoder.findCoCodesByPartitioning(sizeEstimator, sizeInfos, numRows, k, compSettings);
 		_stats.setNextTimePhase(time.stop());
 		LOG.debug("--compression phase 2: " + _stats.getLastTimePhase());
 
@@ -174,7 +176,7 @@
 
 		if(_stats.ratio < 1) {
 			LOG.warn("Abort block compression because compression ratio is less than 1.");
-			return new MatrixBlock().copyShallow(mb);
+			return new ImmutablePair<>(new MatrixBlock().copyShallow(mb), _stats);
 		}
 
 		// Final cleanup (discard uncompressed block)
@@ -191,15 +193,12 @@
 		LOG.debug("--compressed size: " + _stats.size);
 		LOG.debug("--compression ratio: " + _stats.ratio);
 
-		// Set the statistics object.
-		// For better compression ratios this could be removed, since it is around 64 Bytes.
-		res._stats = _stats;
+		res._lossy = compSettings.lossy;
 
-		return res;
+		return new ImmutablePair<>(res, _stats);
 		// --------------------------------------------------
 	}
 
-
 	/**
 	 * Dictionary sharing between DDC ColGroups.
 	 * 
@@ -218,7 +217,7 @@
 				final double[] values = grpDDC1.getValues();
 				double min = Double.POSITIVE_INFINITY;
 				double max = Double.NEGATIVE_INFINITY;
-				for(int i=0; i<values.length; i++) {
+				for(int i = 0; i < values.length; i++) {
 					vals.add(values[i]);
 					min = Math.min(min, values[i]);
 					max = Math.max(max, values[i]);
@@ -236,13 +235,13 @@
 		// build consolidated shared dictionary
 		double[] values = vals.stream().mapToDouble(Double::doubleValue).toArray();
 		int[] colIndexes = new int[numDDC1];
-		double[] extrema = new double[2*numDDC1];
+		double[] extrema = new double[2 * numDDC1];
 		int pos = 0;
-		for( Entry<Integer, Double> e : mins.entrySet() ) {
+		for(Entry<Integer, Double> e : mins.entrySet()) {
 			colIndexes[pos] = e.getKey();
-			extrema[2*pos] = e.getValue();
-			extrema[2*pos+1] = maxs.get(e.getKey());
-			pos ++;
+			extrema[2 * pos] = e.getValue();
+			extrema[2 * pos + 1] = maxs.get(e.getKey());
+			pos++;
 		}
 		return new DictionaryShared(values, colIndexes, extrema);
 	}
@@ -253,7 +252,7 @@
 		double[] values = dict.getValues();
 		for(int i = 0; i < values.length; i++)
 			map.put(values[i], i);
-		
+
 		// recode data of all relevant DDC1 groups
 		for(ColGroup grp : colGroups)
 			if(grp.getNumCols() == 1 && grp instanceof ColGroupDDC1) {
diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java
index 3deb168..0e0a017 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java
@@ -19,57 +19,80 @@
 
 package org.apache.sysds.runtime.compress;
 
-import java.util.List;
+import java.util.Set;
 
+import org.apache.sysds.runtime.compress.cocode.PlanningCoCoder.PartitionerType;
 import org.apache.sysds.runtime.compress.colgroup.ColGroup.CompressionType;
 
 /**
- * Compression Settings class, used as a bundle of parameters inside the Compression framework.
- * See CompressionSettingsBuilder for default non static parameters.
+ * Compression Settings class, used as a bundle of parameters inside the Compression framework. See
+ * CompressionSettingsBuilder for default non static parameters.
  */
 public class CompressionSettings {
 
-	// Sorting of values by physical length helps by 10-20%, especially for serial, while
-	// slight performance decrease for parallel incl multi-threaded, hence not applied for
-	// distributed operations (also because compression time + garbage collection increases)
-	public static final boolean SORT_VALUES_BY_LENGTH = true;
+	/** Size of the blocks used in a blocked bitmap representation. Note it is one more than Character.MAX_VALUE. */
+	public static final int BITMAP_BLOCK_SZ = 65536;
 
-	// The sampling ratio used when choosing ColGroups.
-	// Note that, default behavior is to use exact estimator if the number of elements is below 1000.
+	/**
+	 * Sorting of values by physical length helps by 10-20%, especially for serial, while slight performance decrease
+	 * for parallel incl multi-threaded, hence not applied for distributed operations (also because compression time +
+	 * garbage collection increases)
+	 */
+	public final boolean sortValuesByLength;
+
+	/**
+	 * The sampling ratio used when choosing ColGroups. Note that, default behavior is to use exact estimator if the
+	 * number of elements is below 1000.
+	 */
 	public final double samplingRatio;
 
-	// Share DDC Dictionaries between ColGroups.
-	// TODO FIX DDC Dictionarie sharing.
+	/**
+	 * Share DDC Dictionaries between ColGroups.
+	 * 
+	 * TODO Fix The DDC dictionary sharing.
+	 */
 	public final boolean allowSharedDDCDictionary;
 
-	// Transpose input matrix, to optimize performance, this reallocate the matrix to a more cache conscious allocation
-	// for iteration in columns.
+	/**
+	 * Transpose input matrix, to optimize performance, this reallocate the matrix to a more cache conscious allocation
+	 * for iteration in columns.
+	 */
 	public final boolean transposeInput;
 
-	// If the seed is -1 then the system used system millisecond time and class hash for seeding.
+	/** If the seed is -1 then the system used system millisecond time and class hash for seeding. */
 	public final int seed;
 
-	// Investigate the estimate.
+	/** Boolean specifying if the compression strategy should be investigated and monitored. */
 	public final boolean investigateEstimate;
 
+	/** True if lossy compression is enabled */
 	public final boolean lossy;
 
-	// Removed the option of LOW_LEVEL_OPT, (only effecting OLE and RLE.)
-	// public final boolean LOW_LEVEL_OPT;
+	/** The selected method for column partitioning used in CoCoding compressed columns */
+	public final PartitionerType columnPartitioner;
 
-	// Valid Compressions List, containing the ColGroup CompressionTypes that are allowed to be used for the compression
-	// Default is to always allow for Uncompromisable ColGroup.
-	public final List<CompressionType> validCompressions;
+	/** The maximum number of columns CoCoded if the Static CoCoding strategy is selected */
+	public final int maxStaticColGroupCoCode;
+
+	/**
+	 * Valid Compressions List, containing the ColGroup CompressionTypes that are allowed to be used for the compression
+	 * Default is to always allow for Uncompromisable ColGroup.
+	 */
+	public final Set<CompressionType> validCompressions;
 
 	protected CompressionSettings(double samplingRatio, boolean allowSharedDDCDictionary, boolean transposeInput,
-		int seed, boolean investigateEstimate, List<CompressionType> validCompressions) {
+		int seed, boolean investigateEstimate, boolean lossy, Set<CompressionType> validCompressions,
+		boolean sortValuesByLength, PartitionerType columnPartitioner, int maxStaticColGroupCoCode) {
 		this.samplingRatio = samplingRatio;
 		this.allowSharedDDCDictionary = allowSharedDDCDictionary;
 		this.transposeInput = transposeInput;
 		this.seed = seed;
 		this.investigateEstimate = investigateEstimate;
 		this.validCompressions = validCompressions;
-		this.lossy = validCompressions.contains(CompressionType.QUAN);
+		this.lossy = lossy;
+		this.sortValuesByLength = sortValuesByLength;
+		this.columnPartitioner = columnPartitioner;
+		this.maxStaticColGroupCoCode = maxStaticColGroupCoCode;
 	}
 
 	@Override
@@ -78,6 +101,8 @@
 		sb.append("\n" + super.toString());
 		sb.append("\n Valid Compressions: " + validCompressions);
 		sb.append("\n DDC1 share dict: " + allowSharedDDCDictionary);
+		sb.append("\n Partitioner: " + columnPartitioner);
+		sb.append("\n Lossy: " + lossy);
 		// If needed for debugging add more fields to the printing.
 		return sb.toString();
 	}
diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java
index 7de49c2..1abe605 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java
@@ -19,67 +19,141 @@
 
 package org.apache.sysds.runtime.compress;
 
-import java.util.ArrayList;
-import java.util.List;
+import java.util.EnumSet;
 
+import org.apache.sysds.conf.ConfigurationManager;
+import org.apache.sysds.conf.DMLConfig;
+import org.apache.sysds.runtime.compress.cocode.PlanningCoCoder.PartitionerType;
 import org.apache.sysds.runtime.compress.colgroup.ColGroup.CompressionType;
 
 /**
- * Builder pattern for Compression Settings.
- * See CompressionSettings for details on values.
+ * Builder pattern for Compression Settings. See CompressionSettings for details on values.
  */
 public class CompressionSettingsBuilder {
-	private double samplingRatio = 0.05;
-	private boolean allowSharedDDCDictionary = true;
+	private double samplingRatio = 1.0;
+	private boolean allowSharedDDCDictionary = false;
 	private boolean transposeInput = true;
 	private int seed = -1;
 	private boolean investigateEstimate = false;
-	private List<CompressionType> validCompressions = new ArrayList<>();
+	private boolean lossy = false;
+	private EnumSet<CompressionType> validCompressions;
+	private boolean sortValuesByLength = false;
+	private PartitionerType columnPartitioner = PartitionerType.STATIC; // BIN_PACKING or STATIC
+	private int maxStaticColGroupCoCode = 1;
 
 	public CompressionSettingsBuilder() {
-		validCompressions.add(CompressionType.DDC);
-		validCompressions.add(CompressionType.OLE);
-		validCompressions.add(CompressionType.RLE);
-		validCompressions.add(CompressionType.UNCOMPRESSED);
-		validCompressions.add(CompressionType.QUAN);
+
+		DMLConfig conf = ConfigurationManager.getDMLConfig();
+		this.lossy = conf.getBooleanValue(DMLConfig.COMPRESSED_LOSSY);
+		this.validCompressions = EnumSet.of(CompressionType.UNCOMPRESSED);
+		String[] validCompressionsString = conf.getTextValue(DMLConfig.COMPRESSED_VALID_COMPRESSIONS).split(",");;
+		for(String comp:  validCompressionsString){
+			validCompressions.add(CompressionType.valueOf(comp));
+		}
 	}
-	
-	public CompressionSettingsBuilder copySettings(CompressionSettings that){
+
+	/**
+	 * Copy the settings from another CompressionSettings Builder, modifies this, not that.
+	 * 
+	 * @param that The other CompressionSettingsBuilder to copy settings from.
+	 * @return The modified CompressionSettings in the same object.
+	 */
+	public CompressionSettingsBuilder copySettings(CompressionSettings that) {
 		this.samplingRatio = that.samplingRatio;
 		this.allowSharedDDCDictionary = that.allowSharedDDCDictionary;
 		this.transposeInput = that.transposeInput;
 		this.seed = that.seed;
 		this.investigateEstimate = that.investigateEstimate;
-		this.validCompressions = new ArrayList<>(that.validCompressions);
+		this.validCompressions = EnumSet.copyOf(that.validCompressions);
 		return this;
 	}
 
+	/**
+	 * Set the Compression to use Lossy compression.
+	 * 
+	 * @param lossy A boolean specifying if the compression should be lossy
+	 * @return The CompressionSettingsBuilder
+	 */
+	public CompressionSettingsBuilder setLossy(boolean lossy) {
+		this.lossy = lossy;
+		return this;
+	}
+
+	/**
+	 * Set the sampling ratio in percent to sample the input matrix. Input value should be in range 0.0 - 1.0
+	 * 
+	 * @param samplingRatio The ratio to sample from the input
+	 * @return The CompressionSettingsBuilder
+	 */
 	public CompressionSettingsBuilder setSamplingRatio(double samplingRatio) {
 		this.samplingRatio = samplingRatio;
 		return this;
 	}
 
+	/**
+	 * Set the sortValuesByLength flag. This sorts the dictionaries containing the data based on their occurences in the
+	 * ColGroup. Improving cache efficiency especially for diverse column groups.
+	 * 
+	 * @param sortValuesByLength A boolean specifying if the values should be sorted
+	 * @return The CompressionSettingsBuilder
+	 */
+	public CompressionSettingsBuilder setSortValuesByLength(boolean sortValuesByLength) {
+		this.sortValuesByLength = sortValuesByLength;
+		return this;
+	}
+
+	/**
+	 * Allow the Dictionaries to be shared between different column groups.
+	 * 
+	 * @param allowSharedDDCDictionary A boolean specifying if the dictionary can be shared between column groups.
+	 * @return The CompressionSettingsBuilder
+	 */
 	public CompressionSettingsBuilder setAllowSharedDDCDictionary(boolean allowSharedDDCDictionary) {
 		this.allowSharedDDCDictionary = allowSharedDDCDictionary;
 		return this;
 	}
 
+	/**
+	 * Specify if the input matrix should be transposed before compression. This improves cache efficiency while
+	 * compression the input matrix
+	 * 
+	 * @param transposeInput boolean specifying if the input should be transposed before compression
+	 * @return The CompressionSettingsBuilder
+	 */
 	public CompressionSettingsBuilder setTransposeInput(boolean transposeInput) {
 		this.transposeInput = transposeInput;
 		return this;
 	}
 
+	/**
+	 * Set the seed for the compression operation.
+	 * 
+	 * @param seed The seed used in sampling the matrix and general operations in the compression.
+	 * @return The CompressionSettingsBuilder
+	 */
 	public CompressionSettingsBuilder setSeed(int seed) {
 		this.seed = seed;
 		return this;
 	}
 
+	/**
+	 * Set if the compression should be investigated while compressing.
+	 * 
+	 * @param investigateEstimate A boolean specifying it the input should be estimated.
+	 * @return The CompressionSettingsBuilder
+	 */
 	public CompressionSettingsBuilder setInvestigateEstimate(boolean investigateEstimate) {
 		this.investigateEstimate = investigateEstimate;
 		return this;
 	}
 
-	public CompressionSettingsBuilder setValidCompressions(List<CompressionType> validCompressions) {
+	/**
+	 * Set the valid compression strategies used for the compression.
+	 * 
+	 * @param validCompressions An EnumSet of CompressionTypes to use in the compression
+	 * @return The CompressionSettingsBuilder
+	 */
+	public CompressionSettingsBuilder setValidCompressions(EnumSet<CompressionType> validCompressions) {
 		// should always contain Uncompressed as an option.
 		if(!validCompressions.contains(CompressionType.UNCOMPRESSED))
 			validCompressions.add(CompressionType.UNCOMPRESSED);
@@ -87,8 +161,59 @@
 		return this;
 	}
 
+	/**
+	 * Add a single valid compression type to the EnumSet of valid compressions.
+	 * 
+	 * @param cp The compression type to add to the valid ones.
+	 * @return The CompressionSettingsBuilder
+	 */
+	public CompressionSettingsBuilder addValidCompression(CompressionType cp) {
+		this.validCompressions.add(cp);
+		return this;
+	}
+
+	/**
+	 * Clear all the compression types allowed in the compression. This will only allow the Uncompressed ColGroup type.
+	 * Since this is required for operation of the compression
+	 * 
+	 * @return The CompressionSettingsBuilder
+	 */
+	public CompressionSettingsBuilder clearValidCompression() {
+		this.validCompressions = EnumSet.of(CompressionType.UNCOMPRESSED);
+		return this;
+	}
+
+	/**
+	 * Set the type of CoCoding Partitioner type to use for combining columns together.
+	 * 
+	 * @param columnPartitioner The Strategy to select from PartitionerType
+	 * @return The CompressionSettingsBuilder
+	 */
+	public CompressionSettingsBuilder setColumnPartitioner(PartitionerType columnPartitioner) {
+		this.columnPartitioner = columnPartitioner;
+		return this;
+	}
+
+	/**
+	 * Set the maximum number of columns to CoCode together in the static CoCoding strategy. Compression time increase
+	 * with higher numbers.
+	 * 
+	 * @param maxStaticColGroupCoCode The max selected.
+	 * @return The CompressionSettingsBuilder
+	 */
+	public CompressionSettingsBuilder setmaxStaticColGroupCoCode(int maxStaticColGroupCoCode) {
+		this.maxStaticColGroupCoCode = maxStaticColGroupCoCode;
+		return this;
+	}
+
+	/**
+	 * Create the CompressionSettings object to use in the compression.
+	 * 
+	 * @return The CompressionSettings
+	 */
 	public CompressionSettings create() {
 		return new CompressionSettings(samplingRatio, allowSharedDDCDictionary, transposeInput, seed,
-			investigateEstimate, validCompressions);
+			investigateEstimate, lossy, validCompressions, sortValuesByLength, columnPartitioner,
+			maxStaticColGroupCoCode);
 	}
 }
\ No newline at end of file
diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressionStatistics.java b/src/main/java/org/apache/sysds/runtime/compress/CompressionStatistics.java
index aa831d7..fc53dd1 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/CompressionStatistics.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/CompressionStatistics.java
@@ -110,22 +110,4 @@
 		return sb.toString();
 	}
 
-	public static long getSizeInMemory() {
-		long total = 16; // header
-		total += 8; // compression ratio
-		total += 8; // original size
-		total += 8; // estimated size col groups
-		total += 8; // estimated size cols
-		total += 8; // actual size
-
-		total += 8; // Array list Time phases
-		total += 8; // Map colGroup Counts
-
-		// TODO what happens if we scale number of col Groups...
-		// TODO Reduce memory usage for compression statistics.
-		total += 64; // HashMap col Groups.
-		total += 40; // ArrayList time phases
-
-		return total;
-	}
 }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/ReaderColumnSelection.java b/src/main/java/org/apache/sysds/runtime/compress/ReaderColumnSelection.java
index 7f064ac..60d9c5b 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/ReaderColumnSelection.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/ReaderColumnSelection.java
@@ -28,15 +28,15 @@
 	protected int[] _colIndexes = null;
 	protected int _numRows = -1;
 	protected int _lastRow = -1;
-	protected boolean _skipZeros = false;
+	// protected boolean _skipZeros = false;
 
 	protected CompressionSettings _compSettings;
 
-	protected ReaderColumnSelection(int[] colIndexes, int numRows, boolean skipZeros, CompressionSettings compSettings) {
+	protected ReaderColumnSelection(int[] colIndexes, int numRows, CompressionSettings compSettings) {
 		_colIndexes = colIndexes;
 		_numRows = numRows;
 		_lastRow = -1;
-		_skipZeros = skipZeros;
+		// _skipZeros = skipZeros;
 		_compSettings = compSettings;
 	}
 
diff --git a/src/main/java/org/apache/sysds/runtime/compress/ReaderColumnSelectionDense.java b/src/main/java/org/apache/sysds/runtime/compress/ReaderColumnSelectionDense.java
index 76ef66f..cae285f 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/ReaderColumnSelectionDense.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/ReaderColumnSelectionDense.java
@@ -30,8 +30,8 @@
 	private DblArray reusableReturn;
 	private double[] reusableArr;
 
-	public ReaderColumnSelectionDense(MatrixBlock data, int[] colIndices, boolean skipZeros, CompressionSettings compSettings) {
-		super(colIndices, compSettings.transposeInput ? data.getNumColumns() : data.getNumRows(), skipZeros, compSettings);
+	public ReaderColumnSelectionDense(MatrixBlock data, int[] colIndices, CompressionSettings compSettings) {
+		super(colIndices, compSettings.transposeInput ? data.getNumColumns() : data.getNumRows(), compSettings);
 		_data = data;
 		reusableArr = new double[colIndices.length];
 		reusableReturn = new DblArray(reusableArr);
@@ -39,14 +39,14 @@
 
 	@Override
 	public DblArray nextRow() {
-		if(_skipZeros) {
-			while((nonZeroReturn = getNextRow()) != null && DblArray.isZero(nonZeroReturn)) {
-			}
-			return nonZeroReturn;
+		// if(_skipZeros) {
+		while((nonZeroReturn = getNextRow()) != null && DblArray.isZero(nonZeroReturn)) {
 		}
-		else {
-			return getNextRow();
-		}
+		return nonZeroReturn;
+		// }
+		// else {
+		// return getNextRow();
+		// }
 	}
 
 	private DblArray getNextRow() {
@@ -54,8 +54,8 @@
 			return null;
 		_lastRow++;
 		for(int i = 0; i < _colIndexes.length; i++) {
-			reusableArr[i] = _compSettings.transposeInput ? _data.quickGetValue(_colIndexes[i],
-				_lastRow) : _data.quickGetValue(_lastRow, _colIndexes[i]);
+			reusableArr[i] = _compSettings.transposeInput ? _data.quickGetValue(_colIndexes[i], _lastRow) : _data
+				.quickGetValue(_lastRow, _colIndexes[i]);
 		}
 		return reusableReturn;
 	}
diff --git a/src/main/java/org/apache/sysds/runtime/compress/ReaderColumnSelectionDenseSample.java b/src/main/java/org/apache/sysds/runtime/compress/ReaderColumnSelectionDenseSample.java
index 7fd4b72..2ab76ce 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/ReaderColumnSelectionDenseSample.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/ReaderColumnSelectionDenseSample.java
@@ -38,8 +38,8 @@
 	private double[] reusableArr;
 
 	public ReaderColumnSelectionDenseSample(MatrixBlock data, int[] colIndexes, int[] sampleIndexes,
-		boolean skipZeros, CompressionSettings compSettings) {
-		super(colIndexes, -1, skipZeros, compSettings);
+		 CompressionSettings compSettings) {
+		super(colIndexes, -1,  compSettings);
 		_data = data;
 		_sampleIndexes = sampleIndexes;
 		reusableArr = new double[colIndexes.length];
@@ -48,14 +48,14 @@
 
 	@Override
 	public DblArray nextRow() {
-		if(_skipZeros) {
+		// if(_skipZeros) {
 			while((nonZeroReturn = getNextRow()) != null && DblArray.isZero(nonZeroReturn)) {
 			}
 			return nonZeroReturn;
-		}
-		else {
-			return getNextRow();
-		}
+		// }
+		// else {
+			// return getNextRow();
+		// }
 	}
 
 	private DblArray getNextRow() {
diff --git a/src/main/java/org/apache/sysds/runtime/compress/ReaderColumnSelectionSparse.java b/src/main/java/org/apache/sysds/runtime/compress/ReaderColumnSelectionSparse.java
index abdb723..ddf124c 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/ReaderColumnSelectionSparse.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/ReaderColumnSelectionSparse.java
@@ -43,8 +43,8 @@
 	private SparseRow[] sparseCols = null;
 	private int[] sparsePos = null;
 
-	public ReaderColumnSelectionSparse(MatrixBlock data, int[] colIndexes, boolean skipZeros, CompressionSettings compSettings) {
-		super(colIndexes, compSettings.transposeInput ? data.getNumColumns() : data.getNumRows(), skipZeros, compSettings);
+	public ReaderColumnSelectionSparse(MatrixBlock data, int[] colIndexes, CompressionSettings compSettings) {
+		super(colIndexes, compSettings.transposeInput ? data.getNumColumns() : data.getNumRows(), compSettings);
 		ZERO_DBL_ARRAY = new DblArray(new double[colIndexes.length], true);
 		reusableArr = new double[colIndexes.length];
 		reusableReturn = new DblArray(reusableArr);
@@ -62,14 +62,14 @@
 
 	@Override
 	public DblArray nextRow() {
-		if(_skipZeros) {
+		// if(_skipZeros) {
 			while((nonZeroReturn = getNextRow()) != null && nonZeroReturn == ZERO_DBL_ARRAY) {
 			}
 			return nonZeroReturn;
-		}
-		else {
-			return getNextRow();
-		}
+		// }
+		// else {
+			// return getNextRow();
+		// }
 	}
 
 	private DblArray getNextRow() {
diff --git a/src/main/java/org/apache/sysds/runtime/compress/cocode/ColumnGroupPartitioner.java b/src/main/java/org/apache/sysds/runtime/compress/cocode/ColumnGroupPartitioner.java
index 6abf874..65124b5 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/cocode/ColumnGroupPartitioner.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/cocode/ColumnGroupPartitioner.java
@@ -22,6 +22,7 @@
 import java.util.HashMap;
 import java.util.List;
 
+import org.apache.sysds.runtime.compress.CompressionSettings;
 import org.apache.sysds.runtime.compress.cocode.PlanningCoCoder.GroupableColInfo;
 
 public abstract class ColumnGroupPartitioner {
@@ -31,8 +32,9 @@
 	 * 
 	 * @param groupCols     list of columns
 	 * @param groupColsInfo list of column infos
+	 * @param cs            The Compression settings used for the compression
 	 * @return list of partitions (where each partition is a list of columns)
 	 */
 	public abstract List<int[]> partitionColumns(List<Integer> groupCols,
-		HashMap<Integer, GroupableColInfo> groupColsInfo);
+		HashMap<Integer, GroupableColInfo> groupColsInfo, CompressionSettings cs);
 }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/cocode/ColumnGroupPartitionerBinPacking.java b/src/main/java/org/apache/sysds/runtime/compress/cocode/ColumnGroupPartitionerBinPacking.java
index 435a1fb..776dd50 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/cocode/ColumnGroupPartitionerBinPacking.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/cocode/ColumnGroupPartitionerBinPacking.java
@@ -26,6 +26,7 @@
 import java.util.stream.Collectors;
 
 import org.apache.commons.lang.ArrayUtils;
+import org.apache.sysds.runtime.compress.CompressionSettings;
 import org.apache.sysds.runtime.compress.cocode.PlanningCoCoder.GroupableColInfo;
 import org.apache.sysds.runtime.compress.utils.IntArrayList;
 import org.apache.sysds.runtime.util.SortUtils;
@@ -43,7 +44,8 @@
 	public static double BIN_CAPACITY = 0.000032; // higher values, more grouping
 
 	@Override
-	public List<int[]> partitionColumns(List<Integer> groupCols, HashMap<Integer, GroupableColInfo> groupColsInfo) {
+	public List<int[]> partitionColumns(List<Integer> groupCols, HashMap<Integer, GroupableColInfo> groupColsInfo,
+		CompressionSettings cs) {
 		// obtain column weights
 		int[] items = new int[groupCols.size()];
 		double[] itemWeights = new double[groupCols.size()];
diff --git a/src/main/java/org/apache/sysds/runtime/compress/cocode/ColumnGroupPartitionerStatic.java b/src/main/java/org/apache/sysds/runtime/compress/cocode/ColumnGroupPartitionerStatic.java
index eb5fab6..241e5d4 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/cocode/ColumnGroupPartitionerStatic.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/cocode/ColumnGroupPartitionerStatic.java
@@ -23,18 +23,19 @@
 import java.util.HashMap;
 import java.util.List;
 
+import org.apache.sysds.runtime.compress.CompressionSettings;
 import org.apache.sysds.runtime.compress.cocode.PlanningCoCoder.GroupableColInfo;
 
 /**
  * Column group partitioning with static distribution heuristic.
  */
 public class ColumnGroupPartitionerStatic extends ColumnGroupPartitioner {
-	private static final int MAX_COL_PER_GROUP = 20;
 
 	@Override
-	public List<int[]> partitionColumns(List<Integer> groupCols, HashMap<Integer, GroupableColInfo> groupColsInfo) {
+	public List<int[]> partitionColumns(List<Integer> groupCols, HashMap<Integer, GroupableColInfo> groupColsInfo,
+		CompressionSettings cs) {
 		List<int[]> ret = new ArrayList<>();
-		int numParts = (int) Math.ceil((double) groupCols.size() / MAX_COL_PER_GROUP);
+		int numParts = (int) Math.ceil((double) groupCols.size() / cs.maxStaticColGroupCoCode);
 		int partSize = (int) Math.ceil((double) groupCols.size() / numParts);
 		for(int i = 0, pos = 0; i < numParts; i++, pos += partSize) {
 			int[] tmp = new int[Math.min(partSize, groupCols.size() - pos)];
diff --git a/src/main/java/org/apache/sysds/runtime/compress/cocode/PlanningCoCoder.java b/src/main/java/org/apache/sysds/runtime/compress/cocode/PlanningCoCoder.java
index 080bbcc..c239f7b 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/cocode/PlanningCoCoder.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/cocode/PlanningCoCoder.java
@@ -30,14 +30,13 @@
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysds.runtime.DMLRuntimeException;
+import org.apache.sysds.runtime.compress.CompressionSettings;
 import org.apache.sysds.runtime.compress.estim.CompressedSizeEstimator;
 import org.apache.sysds.runtime.compress.estim.CompressedSizeInfo;
 import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup;
 import org.apache.sysds.runtime.util.CommonThreadPool;
 
 public class PlanningCoCoder {
-	// internal configurations
-	private final static PartitionerType COLUMN_PARTITIONER = PartitionerType.BIN_PACKING;
 
 	private static final Log LOG = LogFactory.getLog(PlanningCoCoder.class.getName());
 
@@ -54,10 +53,11 @@
 	 * @param colInfos      The information already gathered on the individual ColGroups of columns.
 	 * @param numRows       The number of rows in the input matrix.
 	 * @param k             The concurrency degree allowed for this operation.
+	 * @param cs            The Compression Settings used in the compression.
 	 * @return The Estimated (hopefully) best groups of ColGroups.
 	 */
-	public static List<int[]> findCocodesByPartitioning(CompressedSizeEstimator sizeEstimator,
-		CompressedSizeInfo colInfos, int numRows, int k) {
+	public static List<int[]> findCoCodesByPartitioning(CompressedSizeEstimator sizeEstimator,
+		CompressedSizeInfo colInfos, int numRows, int k, CompressionSettings cs) {
 		// filtering out non-group-able columns as singleton groups
 		// weight is the ratio of its cardinality to the number of rows
 
@@ -76,7 +76,8 @@
 		}
 
 		// use column group partitioner to create partitions of columns
-		List<int[]> bins = createColumnGroupPartitioner(COLUMN_PARTITIONER).partitionColumns(groupCols, groupColsInfo);
+		List<int[]> bins = createColumnGroupPartitioner(cs.columnPartitioner)
+			.partitionColumns(groupCols, groupColsInfo, cs);
 
 		// brute force grouping within each partition
 		return (k > 1) ? getCocodingGroupsBruteForce(bins,
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroup.java
index d0e269f..582e769 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroup.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroup.java
@@ -34,8 +34,8 @@
 import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
 
 /**
- * Class that stores information about a column group within a compressed matrix
- * block. There are subclasses specific to each compression type.
+ * Class that stores information about a column group within a compressed matrix block. There are subclasses specific to
+ * each compression type.
  */
 public abstract class ColGroup implements Serializable {
 	protected static final Log LOG = LogFactory.getLog(ColGroup.class.getName());
@@ -44,8 +44,7 @@
 	/**
 	 * Public Group types supported
 	 * 
-	 * Note For instance DDC is called DDC not DDC1, or DDC2 which is a specific
-	 * subtype of the DDC.
+	 * Note For instance DDC is called DDC not DDC1, or DDC2 which is a specific subtype of the DDC.
 	 */
 	public enum CompressionType {
 		UNCOMPRESSED, // uncompressed sparse/dense
@@ -58,8 +57,7 @@
 	/**
 	 * Concrete ColGroupType
 	 * 
-	 * Protected such that outside the ColGroup package it should be unknown which
-	 * specific subtype is used.
+	 * Protected such that outside the ColGroup package it should be unknown which specific subtype is used.
 	 */
 	protected enum ColGroupType {
 		UNCOMPRESSED, // uncompressed sparse/dense
@@ -73,8 +71,12 @@
 	/** The ColGroup Indexes 0 offset, contained in the ColGroup */
 	protected int[] _colIndexes;
 
-	/** ColGroup Implementation Contains zero values */
+	/**
+	 * ColGroup Implementation Contains zero values NOTE This variable is moved here becuse that reduce the Object size
+	 * by 8
+	 */
 	protected boolean _zeros;
+	protected boolean _lossy;
 
 	/** Number of rows in the matrix, for use by child classes. */
 	protected int _numRows;
@@ -90,18 +92,17 @@
 	/**
 	 * Main constructor.
 	 * 
-	 * @param colIndices offsets of the columns in the matrix block that make up the
-	 *                   group
+	 * @param colIndices offsets of the columns in the matrix block that make up the group
 	 * @param numRows    total number of rows in the block
 	 */
 	protected ColGroup(int[] colIndices, int numRows) {
-		if (colIndices == null) {
+		if(colIndices == null) {
 			throw new DMLRuntimeException("null input to ColGroup is invalid");
 		}
-		if (colIndices.length == 0) {
+		if(colIndices.length == 0) {
 			throw new DMLRuntimeException("0 is an invalid number of columns in a ColGroup");
 		}
-		if (numRows < 1) {
+		if(numRows < 1) {
 			throw new DMLRuntimeException(numRows + " is an invalid number of rows in a ColGroup");
 		}
 		_colIndexes = colIndices;
@@ -153,35 +154,29 @@
 	public abstract CompressionType getCompType();
 
 	/**
-	 * Internally get the specific type of ColGroup, this could be extracted from
-	 * the object but that does not allow for nice switches in the code.
+	 * Internally get the specific type of ColGroup, this could be extracted from the object but that does not allow for
+	 * nice switches in the code.
 	 * 
 	 * @return ColGroupType of the object.
 	 */
 	protected abstract ColGroupType getColGroupType();
 
 	public void shiftColIndices(int offset) {
-		for (int i = 0; i < _colIndexes.length; i++)
+		for(int i = 0; i < _colIndexes.length; i++)
 			_colIndexes[i] += offset;
 	}
 
 	/**
-	 * Note: Must be overridden by child classes to account for additional data and
-	 * metadata
+	 * Note: Must be overridden by child classes to account for additional data and metadata
 	 * 
-	 * @return an upper bound on the number of bytes used to store this ColGroup in
-	 *         memory.
+	 * @return an upper bound on the number of bytes used to store this ColGroup in memory.
 	 */
-	public long estimateInMemorySize() {
-		return ColGroupSizes.estimateInMemorySizeGroup(_colIndexes.length);
-	}
+	public abstract long estimateInMemorySize();
 
 	/**
-	 * Decompress the contents of this column group into the specified full matrix
-	 * block.
+	 * Decompress the contents of this column group into the specified full matrix block.
 	 * 
-	 * @param target a matrix block where the columns covered by this column group
-	 *               have not yet been filled in.
+	 * @param target a matrix block where the columns covered by this column group have not yet been filled in.
 	 * @param rl     row lower
 	 * @param ru     row upper
 	 */
@@ -190,10 +185,9 @@
 	/**
 	 * Decompress the contents of this column group into uncompressed packed columns
 	 * 
-	 * @param target          a dense matrix block. The block must have enough space
-	 *                        to hold the contents of this column group.
-	 * @param colIndexTargets array that maps column indices in the original matrix
-	 *                        block to columns of target.
+	 * @param target          a dense matrix block. The block must have enough space to hold the contents of this column
+	 *                        group.
+	 * @param colIndexTargets array that maps column indices in the original matrix block to columns of target.
 	 */
 	public abstract void decompressToBlock(MatrixBlock target, int[] colIndexTargets);
 
@@ -232,20 +226,19 @@
 	 */
 	public abstract void readFields(DataInput in) throws IOException;
 
-	/**
-	 * Deserializes column group from data input.
-	 * 
-	 * @param in       data input
-	 * @param skipDict skip shared dictionary
-	 * @throws IOException if IOException occurs
-	 */
-	public void readFields(DataInput in, boolean skipDict) throws IOException {
-		readFields(in); // skipDict ignored by default
-	}
+	// /**
+	//  * Deserializes column group from data input.
+	//  * 
+	//  * @param in       data input
+	//  * @param skipDict skip shared dictionary
+	//  * @throws IOException if IOException occurs
+	//  */
+	// public void readFields(DataInput in, boolean skipDict) throws IOException {
+	// 	readFields(in); // skipDict ignored by default
+	// }
 
 	/**
-	 * Returns the exact serialized size of column group. This can be used for
-	 * example for buffer preallocation.
+	 * Returns the exact serialized size of column group. This can be used for example for buffer preallocation.
 	 * 
 	 * @return exact serialized size for column group
 	 */
@@ -261,64 +254,67 @@
 	public abstract double get(int r, int c);
 
 	/**
-	 * Multiply the slice of the matrix that this column group represents by a
-	 * vector on the right. Get the number of values. contained inside the ColGroup.
+	 * Multiply the slice of the matrix that this column group represents by a vector on the right. Get the number of
+	 * values. contained inside the ColGroup.
 	 * 
 	 * @return value at the row/column position
 	 */
 	// public abstract long getValuesSize();
 
 	/**
-	 * Returns the ColGroup as a MatrixBlock. Used as a fall back solution in case a
-	 * operation is not supported. Use in connection to getIfCountsType to get if
-	 * the values are repeated.
+	 * Get all the values in the colGroup. Note that this is only the stored values not the way they are stored. Making
+	 * the output a list of values used in that colGroup not the actual full column.
+	 * 
+	 * @return a double list of values.
+	 */
+	public abstract double[] getValues();
+
+	/**
+	 * Returns the ColGroup as a MatrixBlock. Used as a fall back solution in case a operation is not supported. Use in
+	 * connection to getIfCountsType to get if the values are repeated.
 	 * 
 	 * @return Matrix Block of the contained Values. Possibly contained in groups.
 	 */
 	public abstract MatrixBlock getValuesAsBlock();
 
 	/**
-	 * Returns true if in the getValuesAsBlock method returns values in groups (that
-	 * needs to be counted) or individually potentially repeated values
+	 * Returns true if in the getValuesAsBlock method returns values in groups (that needs to be counted) or
+	 * individually potentially repeated values
 	 * 
 	 * @return boolean
 	 */
 	public abstract boolean getIfCountsType();
 
 	/**
-	 * Returns the counts of values inside the MatrixBlock returned in
-	 * getValuesAsBlock Throws an exception if the getIfCountsType is false
+	 * Returns the counts of values inside the MatrixBlock returned in getValuesAsBlock Throws an exception if the
+	 * getIfCountsType is false
 	 * 
 	 * @return the count of each value in the MatrixBlock.
 	 */
 	public abstract int[] getCounts();
 
 	/**
-	 * Returns the counts of values inside the MatrixBlock returned in
-	 * getValuesAsBlock Throws an exception if the getIfCountsType is false
+	 * Returns the counts of values inside the MatrixBlock returned in getValuesAsBlock Throws an exception if the
+	 * getIfCountsType is false
 	 * 
-	 * @param includeZero Boolean to specify if zero should be included in the
-	 *                    count.
+	 * @param includeZero Boolean to specify if zero should be included in the count.
 	 * @return the count of each value in the MatrixBlock.
 	 */
-	public abstract int[] getCounts(boolean includeZero);
+	// public abstract int[] getCounts(boolean includeZero);
 
 	/**
-	 * Multiply the slice of the matrix that this column group represents by a
-	 * vector on the right.
+	 * Multiply the slice of the matrix that this column group represents by a vector on the right.
 	 * 
 	 * @param vector vector to multiply by (tall vector)
 	 * @param result accumulator for holding the result
 	 * @param rl     row lower
-	 * @param ru     row upper if the internal SystemML code that performs the
-	 *               multiplication experiences an error
+	 * @param ru     row upper if the internal SystemML code that performs the multiplication experiences an error
 	 */
 	public abstract void rightMultByVector(MatrixBlock vector, MatrixBlock result, int rl, int ru);
 
 	/**
-	 * Multiply the slice of the matrix that this column group represents by a row
-	 * vector on the left (the original column vector is assumed to be transposed
-	 * already i.e. its size now is 1xn).
+	 * Multiply the slice of the matrix that this column group represents by a row vector on the left (the original
+	 * column vector is assumed to be transposed already i.e. its size now is 1xn).
 	 * 
 	 * @param vector row vector
 	 * @param result matrix block result
@@ -326,11 +322,11 @@
 	public abstract void leftMultByRowVector(MatrixBlock vector, MatrixBlock result);
 
 	// additional vector-matrix multiplication to avoid DDC uncompression
-	public abstract void leftMultByRowVector(ColGroupDDC vector, MatrixBlock result);
+	// public abstract void leftMultByRowVector(ColGroupDDC vector, MatrixBlock result);
 
 	/**
-	 * Perform the specified scalar operation directly on the compressed column
-	 * group, without decompressing individual cells if possible.
+	 * Perform the specified scalar operation directly on the compressed column group, without decompressing individual
+	 * cells if possible.
 	 * 
 	 * @param op operation to perform
 	 * @return version of this column group with the operation applied
@@ -338,8 +334,8 @@
 	public abstract ColGroup scalarOperation(ScalarOperator op);
 
 	/**
-	 * Unary Aggregate operator, since aggregate operators require new object
-	 * output, the output becomes an uncompressed matrix.
+	 * Unary Aggregate operator, since aggregate operators require new object output, the output becomes an uncompressed
+	 * matrix.
 	 * 
 	 * @param op     The operator used
 	 * @param result Rhe output matrix block.
@@ -347,8 +343,8 @@
 	public abstract void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result);
 
 	/**
-	 * Unary Aggregate operator, since aggregate operators require new object
-	 * output, the output becomes an uncompressed matrix.
+	 * Unary Aggregate operator, since aggregate operators require new object output, the output becomes an uncompressed
+	 * matrix.
 	 * 
 	 * @param op     The operator used
 	 * @param result The output matrix block.
@@ -369,8 +365,8 @@
 	public abstract Iterator<IJV> getIterator(int rl, int ru, boolean inclZeros, boolean rowMajor);
 
 	/**
-	 * Create a dense row iterator for a row index range. This iterator implies the
-	 * inclusion of zeros and row-major iteration order.
+	 * Create a dense row iterator for a row index range. This iterator implies the inclusion of zeros and row-major
+	 * iteration order.
 	 * 
 	 * @param rl row lower index, inclusive
 	 * @param ru row upper index, exclusive
@@ -388,10 +384,17 @@
 	public abstract void countNonZerosPerRow(int[] rnnz, int rl, int ru);
 
 	/**
-	 * Base class for column group row iterators. We do not implement the default
-	 * Iterator interface in order to avoid unnecessary value copies per group.
+	 * Base class for column group row iterators. We do not implement the default Iterator interface in order to avoid
+	 * unnecessary value copies per group.
 	 */
 	protected abstract class ColGroupRowIterator {
 		public abstract void next(double[] buff, int rowIx, int segIx, boolean last);
 	}
+
+	/**
+	 * Is Lossy
+	 * @return returns if the ColGroup is compressed in a lossy manner.
+	 */
+	public abstract boolean isLossy();
+
 }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
index 0f975d5..993cff7 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
@@ -23,9 +23,13 @@
 import java.util.Iterator;
 
 import org.apache.commons.lang.NotImplementedException;
-import org.apache.sysds.runtime.compress.UncompressedBitmap;
+import org.apache.sysds.runtime.compress.CompressionSettings;
+import org.apache.sysds.runtime.compress.utils.AbstractBitmap;
+import org.apache.sysds.runtime.data.DenseBlock;
 import org.apache.sysds.runtime.functionobjects.Builtin;
 import org.apache.sysds.runtime.functionobjects.KahanFunction;
+import org.apache.sysds.runtime.functionobjects.KahanPlus;
+import org.apache.sysds.runtime.functionobjects.KahanPlusSq;
 import org.apache.sysds.runtime.instructions.cp.KahanObject;
 import org.apache.sysds.runtime.matrix.data.IJV;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
@@ -48,8 +52,8 @@
 		super();
 	}
 
-	protected ColGroupDDC(int[] colIndices, int numRows, UncompressedBitmap ubm) {
-		super(colIndices, numRows, ubm);
+	protected ColGroupDDC(int[] colIndices, int numRows, AbstractBitmap ubm, CompressionSettings cs) {
+		super(colIndices, numRows, ubm, cs);
 	}
 
 	protected ColGroupDDC(int[] colIndices, int numRows, double[] values) {
@@ -58,10 +62,11 @@
 
 	@Override
 	public void decompressToBlock(MatrixBlock target, int rl, int ru) {
+		double[] dictionary = getValues();
 		for(int i = rl; i < ru; i++) {
 			for(int colIx = 0; colIx < _colIndexes.length; colIx++) {
 				int col = _colIndexes[colIx];
-				double cellVal = getData(i, colIx);
+				double cellVal = getData(i, colIx, dictionary);
 				target.quickSetValue(i, col, cellVal);
 			}
 		}
@@ -71,11 +76,12 @@
 	public void decompressToBlock(MatrixBlock target, int[] colIndexTargets) {
 		int nrow = getNumRows();
 		int ncol = getNumCols();
+		double[] dictionary = getValues();
 		for(int i = 0; i < nrow; i++) {
 			for(int colIx = 0; colIx < ncol; colIx++) {
 				int origMatrixColIx = getColIndex(colIx);
 				int col = colIndexTargets[origMatrixColIx];
-				double cellVal = getData(i, colIx);
+				double cellVal = getData(i, colIx, dictionary);
 				target.quickSetValue(i, col, cellVal);
 			}
 		}
@@ -86,8 +92,8 @@
 		throw new NotImplementedException("Old Function Not In use");
 		// int nrow = getNumRows();
 		// for(int i = 0; i < nrow; i++) {
-		// 	double cellVal = getData(i, colpos);
-		// 	target.quickSetValue(i, 0, cellVal);
+		// double cellVal = getData(i, colpos);
+		// target.quickSetValue(i, 0, cellVal);
 		// }
 	}
 
@@ -99,7 +105,7 @@
 			throw new RuntimeException("Column index " + c + " not in DDC group.");
 
 		// get value
-		return getData(r, ix);
+		return _dict.getValue(getIndex(r, ix));
 	}
 
 	@Override
@@ -108,36 +114,60 @@
 		for(int i = rl; i < ru; i++) {
 			int lnnz = 0;
 			for(int colIx = 0; colIx < ncol; colIx++)
-				lnnz += (getData(i, colIx) != 0) ? 1 : 0;
+				lnnz += (_dict.getValue(getIndex(i, colIx)) != 0) ? 1 : 0;
 			rnnz[i - rl] += lnnz;
 		}
 	}
 
-
+	@Override
 	protected void computeSum(MatrixBlock result, KahanFunction kplus) {
-		int nrow = getNumRows();
-		int ncol = getNumCols();
-		KahanObject kbuff = new KahanObject(result.quickGetValue(0, 0), result.quickGetValue(0, 1));
+		final int ncol = getNumCols();
+		final int numVals = getNumValues();
 
-		for(int i = 0; i < nrow; i++)
-			for(int j = 0; j < ncol; j++)
-				kplus.execute2(kbuff, getData(i, j));
+		// if(numVals < MAX_TMP_VALS) {
+		// iterative over codes and count per code
 
-		result.quickSetValue(0, 0, kbuff._sum);
-		result.quickSetValue(0, 1, kbuff._correction);
+		final int[] counts = getCounts();
+		if(_dict instanceof QDictionary && !(kplus instanceof KahanPlusSq)) {
+			final QDictionary values = ((QDictionary) _dict);
+			long sum = 0;
+			for(int k = 0, valOff = 0; k < numVals; k++, valOff += ncol) {
+				int cntk = counts[k];
+				for(int j = 0; j < ncol; j++)
+					sum += values.getValueByte(valOff + j) * cntk;
+			}
+			result.quickSetValue(0, 0, result.quickGetValue(0, 0) + sum * values._scale);
+			result.quickSetValue(0, 1, 0);
+		}
+		else {
+			double[] values = getValues();
+			// post-scaling of pre-aggregate with distinct values
+			KahanObject kbuff = new KahanObject(result.quickGetValue(0, 0), result.quickGetValue(0, 1));
+			for(int k = 0, valOff = 0; k < numVals; k++, valOff += ncol) {
+				int cntk = counts[k];
+				for(int j = 0; j < ncol; j++)
+					kplus.execute3(kbuff, values[valOff + j], cntk);
+			}
+			result.quickSetValue(0, 0, kbuff._sum);
+			result.quickSetValue(0, 1, kbuff._correction);
+		}
 	}
 
 	protected void computeColSums(MatrixBlock result, KahanFunction kplus) {
 		int nrow = getNumRows();
 		int ncol = getNumCols();
+		double[] values = _dict.getValues();
+
 		KahanObject[] kbuff = new KahanObject[getNumCols()];
 		for(int j = 0; j < ncol; j++)
 			kbuff[j] = new KahanObject(result.quickGetValue(0, _colIndexes[j]),
 				result.quickGetValue(1, _colIndexes[j]));
 
-		for(int i = 0; i < nrow; i++)
+		for(int i = 0; i < nrow; i++) {
+			int rowIndex = getIndex(i);
 			for(int j = 0; j < ncol; j++)
-				kplus.execute2(kbuff[j], getData(i, j));
+				kplus.execute2(kbuff[j], values[rowIndex + j]);
+		}
 
 		for(int j = 0; j < ncol; j++) {
 			result.quickSetValue(0, _colIndexes[j], kbuff[j]._sum);
@@ -145,26 +175,74 @@
 		}
 	}
 
-	protected void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
-		int ncol = getNumCols();
-		KahanObject kbuff = new KahanObject(0, 0);
+	// protected void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
+	// int ncol = getNumCols();
+	// KahanObject kbuff = new KahanObject(0, 0);
+	// double[] values = getValues();
+	// for(int i = rl; i < ru; i++) {
+	// kbuff.set(result.quickGetValue(i, 0), result.quickGetValue(i, 1));
+	// int rowIndex = getIndex(i);
+	// for(int j = 0; j < ncol; j++)
+	// kplus.execute2(kbuff, values[rowIndex + j]);
+	// result.quickSetValue(i, 0, kbuff._sum);
+	// result.quickSetValue(i, 1, kbuff._correction);
+	// }
+	// }
 
-		for(int i = rl; i < ru; i++) {
-			kbuff.set(result.quickGetValue(i, 0), result.quickGetValue(i, 1));
-			for(int j = 0; j < ncol; j++)
-				kplus.execute2(kbuff, getData(i, j));
-			result.quickSetValue(i, 0, kbuff._sum);
-			result.quickSetValue(i, 1, kbuff._correction);
+	@Override
+	protected void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
+		// note: due to corrections the output might be a large dense block
+		DenseBlock c = result.getDenseBlock();
+
+		if(_dict instanceof QDictionary && !(kplus instanceof KahanPlusSq)) {
+			final QDictionary qDict = ((QDictionary) _dict);
+			if(_colIndexes.length == 1) {
+				byte[] vals = qDict._values;
+				for(int i = rl; i < ru; i++) {
+					double[] cvals = c.values(i);
+					int cix = c.pos(i);
+					cvals[cix] = cvals[cix] + vals[getIndex(i)] * qDict._scale;
+				}
+			}
+			else {
+				short[] vals = qDict.sumAllRowsToShort(_colIndexes.length);
+				for(int i = rl; i < ru; i++) {
+					double[] cvals = c.values(i);
+					int cix = c.pos(i);
+					cvals[cix] = cvals[cix] + vals[getIndex(i)] * qDict._scale;
+				}
+			}
+		}
+		else {
+			KahanObject kbuff = new KahanObject(0, 0);
+			KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
+			// pre-aggregate nnz per value tuple
+			double[] vals = _dict.sumAllRowsToDouble(kplus, kbuff, _colIndexes.length, false);
+
+			// scan data and add to result (use kahan plus not general KahanFunction
+			// for correctness in case of sqk+)
+			for(int i = rl; i < ru; i++) {
+				double[] cvals = c.values(i);
+				int cix = c.pos(i);
+				kbuff.set(cvals[cix], cvals[cix + 1]);
+				kplus2.execute2(kbuff, vals[getIndex(i)]);
+				cvals[cix] = kbuff._sum;
+				cvals[cix + 1] = kbuff._correction;
+			}
+
 		}
 	}
 
 	protected void computeRowMxx(MatrixBlock result, Builtin builtin, int rl, int ru) {
 		double[] c = result.getDenseBlockValues();
 		int ncol = getNumCols();
+		double[] dictionary = getValues();
 
-		for(int i = rl; i < ru; i++)
+		for(int i = rl; i < ru; i++) {
+			int rowIndex = getIndex(i);
 			for(int j = 0; j < ncol; j++)
-				c[i] = builtin.execute(c[i], getData(i, j));
+				c[i] = builtin.execute(c[i], dictionary[rowIndex + j]);
+		}
 	}
 
 	protected final void postScaling(double[] vals, double[] c) {
@@ -182,21 +260,40 @@
 	}
 
 	/**
+	 * Generic get index in dictionary for value at row position.
+	 * 
+	 * @param r row position to get dictionary index for.
+	 * @return The dictionary index
+	 */
+	protected abstract int getIndex(int r);
+
+	/**
+	 * Generic get index in dictionary for value at row, col position. If used consider changing to getIndex and
+	 * precalculate offset to row
+	 * 
+	 * @param r     The row to find
+	 * @param colIx the col index to find
+	 * @return the index in the dictionary containing the specified value
+	 */
+	protected abstract int getIndex(int r, int colIx);
+
+	/**
 	 * Generic get value for byte-length-agnostic access to first column.
 	 * 
 	 * @param r global row index
 	 * @return value
 	 */
-	protected abstract double getData(int r);
+	protected abstract double getData(int r, double[] dictionary);
 
 	/**
 	 * Generic get value for byte-length-agnostic access.
 	 * 
-	 * @param r     global row index
-	 * @param colIx local column index
+	 * @param r          global row index
+	 * @param colIx      local column index
+	 * @param dictionary The values contained in the column groups dictionary
 	 * @return value
 	 */
-	protected abstract double getData(int r, int colIx);
+	protected abstract double getData(int r, int colIx, double[] dictionary);
 
 	/**
 	 * Generic set value for byte-length-agnostic write of encoded value.
@@ -209,11 +306,6 @@
 	protected abstract int getCode(int r);
 
 	@Override
-	public long estimateInMemorySize() {
-		return ColGroupSizes.estimateInMemorySizeDDC(getNumCols(), getNumValues());
-	}
-
-	@Override
 	public Iterator<IJV> getIterator(int rl, int ru, boolean inclZeros, boolean rowMajor) {
 		// DDC iterator is always row major, so no need for custom handling
 		return new DDCIterator(rl, ru, inclZeros);
@@ -262,7 +354,7 @@
 				_cpos = nextRow ? 0 : _cpos + 1;
 				if(_rpos >= _ru)
 					return; // reached end
-				_value = getData(_rpos, _cpos);
+				_value = _dict.getValue(getIndex(_rpos, _cpos));
 			}
 			while(!_inclZeros && _value == 0);
 		}
@@ -290,4 +382,5 @@
 		sb.append(super.toString());
 		return sb.toString();
 	}
+
 }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC1.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC1.java
index f29f740..e4c579f 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC1.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC1.java
@@ -25,11 +25,8 @@
 import java.util.Arrays;
 import java.util.HashMap;
 
-import org.apache.sysds.runtime.compress.UncompressedBitmap;
-import org.apache.sysds.runtime.data.DenseBlock;
-import org.apache.sysds.runtime.functionobjects.KahanFunction;
-import org.apache.sysds.runtime.functionobjects.KahanPlus;
-import org.apache.sysds.runtime.instructions.cp.KahanObject;
+import org.apache.sysds.runtime.compress.CompressionSettings;
+import org.apache.sysds.runtime.compress.utils.AbstractBitmap;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
 
@@ -46,12 +43,12 @@
 		super();
 	}
 
-	protected ColGroupDDC1(int[] colIndices, int numRows, UncompressedBitmap ubm) {
-		super(colIndices, numRows, ubm);
+	protected ColGroupDDC1(int[] colIndices, int numRows, AbstractBitmap ubm, CompressionSettings cs) {
+		super(colIndices, numRows, ubm, cs);
 
 		int numVals = ubm.getNumValues();
 		int numCols = ubm.getNumColumns();
-		
+
 		_data = new byte[numRows];
 
 		// materialize zero values, if necessary
@@ -59,8 +56,7 @@
 			int zeroIx = containsAllZeroValue();
 			if(zeroIx < 0) {
 				zeroIx = numVals;
-				_dict = new Dictionary(Arrays.copyOf(
-					_dict.getValues(), _dict.getValues().length + numCols));
+				_dict = IDictionary.materializeZeroValue(_dict, numCols);
 			}
 			Arrays.fill(_data, (byte) zeroIx);
 		}
@@ -80,9 +76,8 @@
 		_data = data;
 	}
 
-	
 	@Override
-	protected ColGroupType getColGroupType(){
+	protected ColGroupType getColGroupType() {
 		return ColGroupType.DDC1;
 	}
 
@@ -98,13 +93,23 @@
 	}
 
 	@Override
-	protected double getData(int r) {
-		return _dict.getValue(_data[r] & 0xFF);
+	protected int getIndex(int r) {
+		return _data[r] & 0xFF;
 	}
 
 	@Override
-	protected double getData(int r, int colIx) {
-		return _dict.getValue((_data[r] & 0xFF) * getNumCols() + colIx);
+	protected int getIndex(int r, int colIx) {
+		return _data[r] & 0xFF * getNumCols() + colIx;
+	}
+
+	@Override
+	protected double getData(int r, double[] dictionary) {
+		return dictionary[_data[r] & 0xFF];
+	}
+
+	@Override
+	protected double getData(int r, int colIx, double[] values) {
+		return values[(_data[r] & 0xFF) * getNumCols() + colIx];
 	}
 
 	@Override
@@ -132,57 +137,16 @@
 
 	@Override
 	public void write(DataOutput out) throws IOException {
-		write(out, false);
-	}
-
-	@Override
-	public void write(DataOutput out, boolean skipDict) throws IOException {
-		int numCols = getNumCols();
-		int numVals = getNumValues();
-		out.writeInt(_numRows);
-		out.writeInt(numCols);
-		out.writeInt(numVals);
-
-		// write col indices
-		for(int i = 0; i < _colIndexes.length; i++)
-			out.writeInt(_colIndexes[i]);
-
-		// write distinct values
-		if(!skipDict) {
-			final double[] values = getValues();
-			for(int i = 0; i < numCols*numVals; i++)
-				out.writeDouble(values[i]);
-		}
-
+		super.write(out);
 		// write data
+		// out.writeInt(_numRows);
 		for(int i = 0; i < _numRows; i++)
 			out.writeByte(_data[i]);
 	}
 
 	@Override
 	public void readFields(DataInput in) throws IOException {
-		readFields(in, false);
-	}
-
-	@Override
-	public void readFields(DataInput in, boolean skipDict) throws IOException {
-		_numRows = in.readInt();
-		int numCols = in.readInt();
-		int numVals = in.readInt();
-
-		// read col indices
-		_colIndexes = new int[numCols];
-		for(int i = 0; i < numCols; i++)
-			_colIndexes[i] = in.readInt();
-
-		// read distinct values
-		if(!skipDict || numCols != 1) {
-			double[] values = new double[numVals * numCols];
-			for(int i = 0; i < numVals * numCols; i++)
-				values[i] = in.readDouble();
-			_dict = new Dictionary(values);
-		}
-
+		super.readFields(in);
 		// read data
 		_data = new byte[_numRows];
 		for(int i = 0; i < _numRows; i++)
@@ -191,20 +155,16 @@
 
 	@Override
 	public long getExactSizeOnDisk() {
-		long ret = 12; // header
-		// col indices
-		ret += 4 * _colIndexes.length;
-		// distinct values (groups of values)
-		ret += 8 * _dict.getValues().length;
+		long ret = super.getExactSizeOnDisk();
 		// data
-		ret += 1 * _data.length;
+		ret += _data.length;
 
 		return ret;
 	}
 
 	@Override
 	public long estimateInMemorySize() {
-		return ColGroupSizes.estimateInMemorySizeDDC1(getNumCols(), getNumValues(), _data.length);
+		return ColGroupSizes.estimateInMemorySizeDDC1(getNumCols(), getNumValues(), _data.length, isLossy());
 	}
 
 	@Override
@@ -311,117 +271,90 @@
 	public void leftMultByRowVector(MatrixBlock vector, MatrixBlock result) {
 		double[] a = ColGroupConverter.getDenseVector(vector);
 		double[] c = result.getDenseBlockValues();
-		final int nrow = getNumRows();
+		// final int nrow = getNumRows();
 		final int numVals = getNumValues();
 
 		// iterative over codes and pre-aggregate inputs per code (guaranteed <=255)
 		// temporary array also avoids false sharing in multi-threaded environments
 		double[] vals = allocDVector(numVals, true);
-		for(int i = 0; i < nrow; i++) {
-			vals[_data[i] & 0xFF] += a[i];
+		for(int i = 0; i < _numRows; i++) {
+			int index = getIndex(i);
+			vals[index] += a[i];
 		}
 
 		// post-scaling of pre-aggregate with distinct values
 		postScaling(vals, c);
 	}
 
-	@Override
-	public void leftMultByRowVector(ColGroupDDC a, MatrixBlock result) {
-		double[] c = result.getDenseBlockValues();
-		final int nrow = getNumRows();
-		final int numVals = getNumValues();
+	// @Override
+	// public void leftMultByRowVector(ColGroupDDC a, MatrixBlock result) {
+	// 	double[] c = result.getDenseBlockValues();
+	// 	final int nrow = getNumRows();
+	// 	final int numVals = getNumValues();
+	// 	// final double[] dictionary = getValues();
 
-		// iterative over codes and pre-aggregate inputs per code (guaranteed <=255)
-		// temporary array also avoids false sharing in multi-threaded environments
-		double[] vals = allocDVector(numVals, true);
-		for(int i = 0; i < nrow; i++)
-			vals[_data[i] & 0xFF] += a.getData(i);
+	// 	// iterative over codes and pre-aggregate inputs per code (guaranteed <=255)
+	// 	// temporary array also avoids false sharing in multi-threaded environments
+	// 	double[] vals = allocDVector(numVals, true);
+	// 	double[] aDict = a.getValues();
+	// 	for(int i = 0; i < nrow; i++) {
+	// 		int rowIdA = a.getIndex(i);
+	// 		int rowIdThis = getIndex(i);
+	// 		vals[rowIdThis] += aDict[rowIdA];
+	// 	}
+	// 	// vals[_data[i] & 0xFF] += a.getData(i, dictionary);
 
-		// post-scaling of pre-aggregate with distinct values
-		postScaling(vals, c);
-	}
+	// 	// post-scaling of pre-aggregate with distinct values
+	// 	postScaling(vals, c);
+	// }
 
-	@Override
-	protected void computeSum(MatrixBlock result, KahanFunction kplus) {
-		final int ncol = getNumCols();
-		final int numVals = getNumValues();
-		final double[] values = getValues();
 
-		// iterative over codes and count per code (guaranteed <=255)
-		int[] counts = getCounts();
+	// public static void computeRowSums(ColGroupDDC1[] grps, MatrixBlock result, KahanFunction kplus, int rl, int ru) {
+	// 	// note: due to corrections the output might be a large dense block
+	// 	DenseBlock c = result.getDenseBlock();
 
-		// post-scaling of pre-aggregate with distinct values
-		KahanObject kbuff = new KahanObject(result.quickGetValue(0, 0), result.quickGetValue(0, 1));
-		for(int k = 0, valOff = 0; k < numVals; k++, valOff += ncol) {
-			int cntk = counts[k];
-			for(int j = 0; j < ncol; j++)
-				kplus.execute3(kbuff, values[valOff + j], cntk);
-		}
+	// 	if(grps[0]._dict instanceof QDictionary && !(kplus instanceof KahanPlusSq)) {
 
-		result.quickSetValue(0, 0, kbuff._sum);
-		result.quickSetValue(0, 1, kbuff._correction);
-	}
 
-	@Override
-	protected void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
-		// note: due to corrections the output might be a large dense block
-		DenseBlock c = result.getDenseBlock();
-		KahanObject kbuff = new KahanObject(0, 0);
-		KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
+	// 		return; // early return if needed.
+	// 	}
 
-		// pre-aggregate nnz per value tuple
-		double[] vals = sumAllValues(kplus, kbuff, false);
+	// 	KahanObject kbuff = new KahanObject(0, 0);
+	// 	KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
 
-		// scan data and add to result (use kahan plus not general KahanFunction
-		// for correctness in case of sqk+)
-		for(int i = rl; i < ru; i++) {
-			double[] cvals = c.values(i);
-			int cix = c.pos(i);
-			kbuff.set(cvals[cix], cvals[cix + 1]);
-			kplus2.execute2(kbuff, vals[_data[i] & 0xFF]);
-			cvals[cix] = kbuff._sum;
-			cvals[cix + 1] = kbuff._correction;
-		}
-	}
+	// 	// prepare distinct values once
+	// 	double[][] vals = new double[grps.length][];
+	// 	for(int i = 0; i < grps.length; i++) {
+	// 		// pre-aggregate all distinct values (guaranteed <=255)
+	// 		vals[i] = grps[i].sumAllValues(kplus, kbuff);
+	// 	}
 
-	public static void computeRowSums(ColGroupDDC1[] grps, MatrixBlock result, KahanFunction kplus, int rl, int ru) {
-		// note: due to corrections the output might be a large dense block
-		DenseBlock c = result.getDenseBlock();
-		KahanObject kbuff = new KahanObject(0, 0);
-		KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
+	// 	// cache-conscious row sums operations
+	// 	// iterative over codes of all groups and add to output
+	// 	// (use kahan plus not general KahanFunction for correctness in case of sqk+)
+	// 	int blksz = 1024; // 16KB
+	// 	double[] tmpAgg = new double[blksz];
+	// 	for(int bi = rl; bi < ru; bi += blksz) {
+	// 		Arrays.fill(tmpAgg, 0);
+	// 		// aggregate all groups
+	// 		for(int j = 0; j < grps.length; j++) {
+	// 			double[] valsj = vals[j];
+	// 			byte[] dataj = grps[j]._data;
+	// 			for(int i = bi; i < Math.min(bi + blksz, ru); i++)
+	// 				tmpAgg[i - bi] += valsj[dataj[i] & 0xFF];
+	// 		}
+	// 		// add partial results of all ddc groups
+	// 		for(int i = bi; i < Math.min(bi + blksz, ru); i++) {
+	// 			double[] cvals = c.values(i);
+	// 			int cix = c.pos(i);
+	// 			kbuff.set(cvals[cix], cvals[cix + 1]);
+	// 			kplus2.execute2(kbuff, tmpAgg[i - bi]);
+	// 			cvals[cix] = kbuff._sum;
+	// 			cvals[cix + 1] = kbuff._correction;
+	// 		}
+	// 	}
 
-		// prepare distinct values once
-		double[][] vals = new double[grps.length][];
-		for(int i = 0; i < grps.length; i++) {
-			// pre-aggregate all distinct values (guaranteed <=255)
-			vals[i] = grps[i].sumAllValues(kplus, kbuff);
-		}
-
-		// cache-conscious row sums operations
-		// iterative over codes of all groups and add to output
-		// (use kahan plus not general KahanFunction for correctness in case of sqk+)
-		int blksz = 1024; // 16KB
-		double[] tmpAgg = new double[blksz];
-		for(int bi = rl; bi < ru; bi += blksz) {
-			Arrays.fill(tmpAgg, 0);
-			// aggregate all groups
-			for(int j = 0; j < grps.length; j++) {
-				double[] valsj = vals[j];
-				byte[] dataj = grps[j]._data;
-				for(int i = bi; i < Math.min(bi + blksz, ru); i++)
-					tmpAgg[i - bi] += valsj[dataj[i] & 0xFF];
-			}
-			// add partial results of all ddc groups
-			for(int i = bi; i < Math.min(bi + blksz, ru); i++) {
-				double[] cvals = c.values(i);
-				int cix = c.pos(i);
-				kbuff.set(cvals[cix], cvals[cix + 1]);
-				kplus2.execute2(kbuff, tmpAgg[i - bi]);
-				cvals[cix] = kbuff._sum;
-				cvals[cix + 1] = kbuff._correction;
-			}
-		}
-	}
+	// }
 
 	@Override
 	public ColGroup scalarOperation(ScalarOperator op) {
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC2.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC2.java
index a0218a1..b3d9fc7 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC2.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC2.java
@@ -24,11 +24,8 @@
 import java.io.IOException;
 import java.util.Arrays;
 
-import org.apache.sysds.runtime.compress.UncompressedBitmap;
-import org.apache.sysds.runtime.data.DenseBlock;
-import org.apache.sysds.runtime.functionobjects.KahanFunction;
-import org.apache.sysds.runtime.functionobjects.KahanPlus;
-import org.apache.sysds.runtime.instructions.cp.KahanObject;
+import org.apache.sysds.runtime.compress.CompressionSettings;
+import org.apache.sysds.runtime.compress.utils.AbstractBitmap;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
 
@@ -39,16 +36,14 @@
 public class ColGroupDDC2 extends ColGroupDDC {
 	private static final long serialVersionUID = -3995768285207071013L;
 
-	private static final int MAX_TMP_VALS = 32 * 1024;
-
 	private char[] _data;
 
 	protected ColGroupDDC2() {
 		super();
 	}
 
-	protected ColGroupDDC2(int[] colIndices, int numRows, UncompressedBitmap ubm) {
-		super(colIndices, numRows, ubm);
+	protected ColGroupDDC2(int[] colIndices, int numRows, AbstractBitmap ubm, CompressionSettings cs) {
+		super(colIndices, numRows, ubm, cs);
 		_data = new char[numRows];
 
 		int numVals = ubm.getNumValues();
@@ -59,8 +54,7 @@
 			int zeroIx = containsAllZeroValue();
 			if(zeroIx < 0) {
 				zeroIx = numVals;
-				double[] values = _dict.getValues();
-				_dict = new Dictionary(Arrays.copyOf(values, values.length + numCols));
+				_dict = IDictionary.materializeZeroValue(_dict, numCols);
 			}
 			Arrays.fill(_data, (char) zeroIx);
 		}
@@ -81,7 +75,7 @@
 	}
 
 	@Override
-	protected ColGroupType getColGroupType(){
+	protected ColGroupType getColGroupType() {
 		return ColGroupType.DDC1;
 	}
 
@@ -98,12 +92,22 @@
 	}
 
 	@Override
-	protected double getData(int r) {
+	protected int getIndex(int r){
+		return _data[r];
+	}
+	
+	@Override
+	protected int getIndex(int r, int colIx){
+		return _data[r]  * getNumCols() + colIx;
+	}
+
+	@Override
+	protected double getData(int r, double[] dictionary) {
 		return _dict.getValue(_data[r]);
 	}
 
 	@Override
-	protected double getData(int r, int colIx) {
+	protected double getData(int r, int colIx,  double[] dictionary) {
 		return _dict.getValue(_data[r] * getNumCols() + colIx);
 	}
 
@@ -119,43 +123,16 @@
 
 	@Override
 	public void write(DataOutput out) throws IOException {
-		int numCols = getNumCols();
-		int numVals = getNumValues();
-		out.writeInt(_numRows);
-		out.writeInt(numCols);
-		out.writeInt(numVals);
-
-		// write col indices
-		for(int i = 0; i < _colIndexes.length; i++)
-			out.writeInt(_colIndexes[i]);
-
-		// write distinct values
-		double[] values = getValues();
-		for(int i = 0; i < values.length; i++)
-			out.writeDouble(values[i]);
-
+		super.write(out);
 		// write data
+		// out.writeInt(_data.length);
 		for(int i = 0; i < _numRows; i++)
 			out.writeChar(_data[i]);
 	}
 
 	@Override
 	public void readFields(DataInput in) throws IOException {
-		_numRows = in.readInt();
-		int numCols = in.readInt();
-		int numVals = in.readInt();
-
-		// read col indices
-		_colIndexes = new int[numCols];
-		for(int i = 0; i < numCols; i++)
-			_colIndexes[i] = in.readInt();
-
-		// read distinct values
-		double[] values = new double[numVals * numCols];
-		for(int i = 0; i < numVals * numCols; i++)
-			values[i] = in.readDouble();
-		_dict = new Dictionary(values);
-		
+		super.readFields(in);
 		// read data
 		_data = new char[_numRows];
 		for(int i = 0; i < _numRows; i++)
@@ -164,11 +141,7 @@
 
 	@Override
 	public long getExactSizeOnDisk() {
-		long ret = 12; // header
-		// col indices
-		ret += 4 * _colIndexes.length;
-		// distinct values (groups of values)
-		ret += 8 * getValues().length;
+		long ret = super.getExactSizeOnDisk();
 		// data
 		ret += 2 * _data.length;
 
@@ -178,7 +151,7 @@
 	@Override
 	public long estimateInMemorySize() {
 		// LOG.debug(this.toString());
-		return ColGroupSizes.estimateInMemorySizeDDC2(getNumCols(), getNumValues(), _data.length);
+		return ColGroupSizes.estimateInMemorySizeDDC2(getNumCols(), getNumValues(), _data.length, isLossy());
 	}
 
 	@Override
@@ -287,85 +260,59 @@
 		}
 	}
 
-	@Override
-	public void leftMultByRowVector(ColGroupDDC a, MatrixBlock result) {
-		double[] c = result.getDenseBlockValues();
-		final int nrow = getNumRows();
-		final int ncol = getNumCols();
-		final int numVals = getNumValues();
+	// @Override
+	// public void leftMultByRowVector(ColGroupDDC a, MatrixBlock result) {
+	// 	double[] c = result.getDenseBlockValues();
+	// 	final int nrow = getNumRows();
+	// 	final int ncol = getNumCols();
+	// 	final int numVals = getNumValues();
+	// 	final double[] dictionary = getValues();
 
-		if(8 * numVals < getNumRows()) {
-			// iterative over codes and pre-aggregate inputs per code
-			// temporary array also avoids false sharing in multi-threaded environments
-			double[] vals = allocDVector(numVals, true);
-			for(int i = 0; i < nrow; i++) {
-				vals[_data[i]] += a.getData(i);
-			}
+	// 	if(8 * numVals < getNumRows()) {
+	// 		// iterative over codes and pre-aggregate inputs per code
+	// 		// temporary array also avoids false sharing in multi-threaded environments
+	// 		double[] vals = allocDVector(numVals, true);
+	// 		for(int i = 0; i < nrow; i++) {
+	// 			vals[_data[i]] += a.getData(i, dictionary);
+	// 		}
 
-			// post-scaling of pre-aggregate with distinct values
-			postScaling(vals, c);
-		}
-		else // general case
-		{
-			// iterate over codes, compute all, and add to the result
-			double[] values = getValues();
-			for(int i = 0; i < nrow; i++) {
-				double aval = a.getData(i, 0);
-				if(aval != 0)
-					for(int j = 0, valOff = _data[i] * ncol; j < ncol; j++)
-						c[_colIndexes[j]] += aval * values[valOff + j];
-			}
-		}
-	}
+	// 		// post-scaling of pre-aggregate with distinct values
+	// 		postScaling(vals, c);
+	// 	}
+	// 	else // general case
+	// 	{
+	// 		// iterate over codes, compute all, and add to the result
+	// 		double[] values = getValues();
+	// 		for(int i = 0; i < nrow; i++) {
+	// 			double aval = a.getData(i, 0, dictionary);
+	// 			if(aval != 0)
+	// 				for(int j = 0, valOff = _data[i] * ncol; j < ncol; j++)
+	// 					c[_colIndexes[j]] += aval * values[valOff + j];
+	// 		}
+	// 	}
+	// }
 
-	@Override
-	protected void computeSum(MatrixBlock result, KahanFunction kplus) {
-		final int ncol = getNumCols();
-		final int numVals = getNumValues();
+	// @Override
+	// protected void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
+	// 	// note: due to corrections the output might be a large dense block
+	// 	DenseBlock c = result.getDenseBlock();
+	// 	KahanObject kbuff = new KahanObject(0, 0);
+	// 	KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
 
-		if(numVals < MAX_TMP_VALS) {
-			// iterative over codes and count per code
-			int[] counts = getCounts();
-			double[] values = getValues();
+	// 	// pre-aggregate nnz per value tuple
+	// 	double[] vals = sumAllValues(kplus, kbuff, false);
 
-			// post-scaling of pre-aggregate with distinct values
-			KahanObject kbuff = new KahanObject(result.quickGetValue(0, 0), result.quickGetValue(0, 1));
-			for(int k = 0, valOff = 0; k < numVals; k++, valOff += ncol) {
-				int cntk = counts[k];
-				for(int j = 0; j < ncol; j++)
-					kplus.execute3(kbuff, values[valOff + j], cntk);
-			}
-
-			result.quickSetValue(0, 0, kbuff._sum);
-			result.quickSetValue(0, 1, kbuff._correction);
-		}
-		else // general case
-		{
-			super.computeSum(result, kplus);
-		}
-	}
-
-	@Override
-	protected void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
-		// note: due to corrections the output might be a large dense block
-		DenseBlock c = result.getDenseBlock();
-		KahanObject kbuff = new KahanObject(0, 0);
-		KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
-
-		// pre-aggregate nnz per value tuple
-		double[] vals = sumAllValues(kplus, kbuff, false);
-
-		// scan data and add to result (use kahan plus not general KahanFunction
-		// for correctness in case of sqk+)
-		for(int i = rl; i < ru; i++) {
-			double[] cvals = c.values(i);
-			int cix = c.pos(i);
-			kbuff.set(cvals[cix], cvals[cix + 1]);
-			kplus2.execute2(kbuff, vals[_data[i]]);
-			cvals[cix] = kbuff._sum;
-			cvals[cix + 1] = kbuff._correction;
-		}
-	}
+	// 	// scan data and add to result (use kahan plus not general KahanFunction
+	// 	// for correctness in case of sqk+)
+	// 	for(int i = rl; i < ru; i++) {
+	// 		double[] cvals = c.values(i);
+	// 		int cix = c.pos(i);
+	// 		kbuff.set(cvals[cix], cvals[cix + 1]);
+	// 		kplus2.execute2(kbuff, vals[_data[i]]);
+	// 		cvals[cix] = kbuff._sum;
+	// 		cvals[cix + 1] = kbuff._correction;
+	// 	}
+	// }
 
 	@Override
 	public ColGroup scalarOperation(ScalarOperator op) {
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java
index e33cb3e..3472c1d 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java
@@ -25,6 +25,7 @@
 import java.util.List;
 import java.util.PriorityQueue;
 import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Future;
 
@@ -32,11 +33,11 @@
 import org.apache.sysds.runtime.DMLRuntimeException;
 import org.apache.sysds.runtime.compress.BitmapEncoder;
 import org.apache.sysds.runtime.compress.CompressionSettings;
-import org.apache.sysds.runtime.compress.UncompressedBitmap;
 import org.apache.sysds.runtime.compress.colgroup.ColGroup.CompressionType;
 import org.apache.sysds.runtime.compress.estim.CompressedSizeEstimator;
 import org.apache.sysds.runtime.compress.estim.CompressedSizeEstimatorExact;
 import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup;
+import org.apache.sysds.runtime.compress.utils.AbstractBitmap;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.util.CommonThreadPool;
 
@@ -58,25 +59,26 @@
 	 */
 	public static ColGroup[] compressColGroups(MatrixBlock in, HashMap<Integer, Double> compRatios, List<int[]> groups,
 		CompressionSettings compSettings, int k) {
-
-		if(k == 1) {
-			compressColGroups(in, compRatios, groups, compSettings);
+		if(k <= 1) {
+			return compressColGroups(in, compRatios, groups, compSettings);
 		}
-
-		try {
-			ExecutorService pool = CommonThreadPool.get(k);
-			ArrayList<CompressTask> tasks = new ArrayList<>();
-			for(int[] colIndexes : groups)
-				tasks.add(new CompressTask(in, compRatios, colIndexes, compSettings));
-			List<Future<ColGroup>> rtask = pool.invokeAll(tasks);
-			ArrayList<ColGroup> ret = new ArrayList<>();
-			for(Future<ColGroup> lrtask : rtask)
-				ret.add(lrtask.get());
-			pool.shutdown();
-			return ret.toArray(new ColGroup[0]);
-		}
-		catch(Exception ex) {
-			throw new DMLRuntimeException(ex);
+		else {
+			try {
+				ExecutorService pool = CommonThreadPool.get(k);
+				ArrayList<CompressTask> tasks = new ArrayList<>();
+				for(int[] colIndexes : groups)
+					tasks.add(new CompressTask(in, compRatios, colIndexes, compSettings));
+				List<Future<ColGroup>> rtask = pool.invokeAll(tasks);
+				ArrayList<ColGroup> ret = new ArrayList<>();
+				for(Future<ColGroup> lrtask : rtask)
+					ret.add(lrtask.get());
+				pool.shutdown();
+				return ret.toArray(new ColGroup[0]);
+			}
+			catch(InterruptedException | ExecutionException e) {
+				// If there is an error in the parallel execution default to the non parallel implementation
+				return compressColGroups(in, compRatios, groups, compSettings);
+			}
 		}
 	}
 
@@ -144,14 +146,10 @@
 		CompressedSizeInfoColGroup sizeInfo;
 		// The compression type is decided based on a full bitmap since it
 		// will be reused for the actual compression step.
-		UncompressedBitmap ubm = null;
+		AbstractBitmap ubm = null;
 		PriorityQueue<CompressedColumn> compRatioPQ = CompressedColumn.makePriorityQue(compRatios, colIndexes);
 
-		// TODO: Use sample based estimator still here.
 		// Switching to exact estimator here, when doing the actual compression.
-		// FYI, this was also how it was doing it before, under the covers.
-		// This is because the ubm is extracted for the entire column, (because it is going to be used for the later
-		// compression i guess)
 		CompressedSizeEstimator estimator = new CompressedSizeEstimatorExact(in, compSettings);
 
 		while(true) {
@@ -210,32 +208,31 @@
 	 * 
 	 * @param colIndexes     The Column indexes to compress
 	 * @param rlen           The number of rows in the columns
-	 * @param ubm            The uncompressedBitmap containing all the data needed for the compression (unless
-	 *                       Uncompressed ColGroup)
+	 * @param ubm            The Bitmap containing all the data needed for the compression (unless Uncompressed
+	 *                       ColGroup)
 	 * @param compType       The CompressionType selected
-	 * @param compSettings   The compression Settings used for the given compression
+	 * @param cs             The compression Settings used for the given compression
 	 * @param rawMatrixBlock The copy of the original input (maybe transposed) MatrixBlock
 	 * @return A Compressed ColGroup
 	 */
-	public static ColGroup compress(int[] colIndexes, int rlen, UncompressedBitmap ubm, CompressionType compType,
-		CompressionSettings compSettings, MatrixBlock rawMatrixBlock) {
-
+	public static ColGroup compress(int[] colIndexes, int rlen, AbstractBitmap ubm, CompressionType compType,
+		CompressionSettings cs, MatrixBlock rawMatrixBlock) {
 		switch(compType) {
 			case DDC:
 				if(ubm.getNumValues() < 256) {
-					return new ColGroupDDC1(colIndexes, rlen, ubm);
+					return new ColGroupDDC1(colIndexes, rlen, ubm, cs);
 				}
 				else {
-					return new ColGroupDDC2(colIndexes, rlen, ubm);
+					return new ColGroupDDC2(colIndexes, rlen, ubm, cs);
 				}
 			case RLE:
-				return new ColGroupRLE(colIndexes, rlen, ubm);
+				return new ColGroupRLE(colIndexes, rlen, ubm, cs);
 			case OLE:
-				return new ColGroupOLE(colIndexes, rlen, ubm);
+				return new ColGroupOLE(colIndexes, rlen, ubm, cs);
 			case UNCOMPRESSED:
-				return new ColGroupUncompressed(colIndexes, rawMatrixBlock, compSettings);
-			case QUAN:
-				return new ColGroupQuan(colIndexes, rlen, ubm);
+				return new ColGroupUncompressed(colIndexes, rawMatrixBlock, cs);
+			// case QUAN:
+				// return new ColGroupQuan(colIndexes, rlen, ubm);
 			default:
 				throw new DMLCompressionException("Not implemented ColGroup Type compressed in factory.");
 		}
@@ -248,9 +245,9 @@
 	 * TODO Redesign this method such that it does not utilize the null pointers to decide on which ColGroups should be
 	 * incompressable. This is done by changing both this method and compressColGroup inside this class.
 	 * 
-	 * @param numCols The number of columns in input matrix
-	 * @param colGroups The colgroups made to assign
-	 * @param rawBlock The (maybe transposed) original MatrixBlock
+	 * @param numCols      The number of columns in input matrix
+	 * @param colGroups    The colgroups made to assign
+	 * @param rawBlock     The (maybe transposed) original MatrixBlock
 	 * @param compSettings The Compressionsettings used.
 	 * @return return the final ColGroupList.
 	 */
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupIO.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupIO.java
index f72e307..03e78d7 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupIO.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupIO.java
@@ -25,6 +25,8 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.sysds.runtime.DMLRuntimeException;
 import org.apache.sysds.runtime.compress.colgroup.ColGroup.ColGroupType;
 
@@ -34,6 +36,8 @@
  */
 public class ColGroupIO {
 
+	protected static final Log LOG = LogFactory.getLog(ColGroupIO.class.getName());
+
 	/**
 	 * Read groups from a file. Note that the information about how many should be in the file already.
 	 * 
@@ -46,14 +50,15 @@
 
 		// Read in how many colGroups there are
 		int nColGroups = in.readInt();
-
+		LOG.debug("reading " + nColGroups + " ColGroups");
 		// Allocate that amount into an ArrayList
 		List<ColGroup> _colGroups = new ArrayList<>(nColGroups);
-		double[] sharedDict = null;
+		// double[] sharedDict = null;
 
 		// Read each ColGroup one at a time.
 		for(int i = 0; i < nColGroups; i++) {
 			ColGroupType ctype = ColGroupType.values()[in.readByte()];
+			LOG.debug(ctype);
 			ColGroup grp = null;
 
 			// create instance of column group
@@ -73,24 +78,24 @@
 				case DDC2:
 					grp = new ColGroupDDC2();
 					break;
-				case QUAN8S:
-					grp = new ColGroupQuan();
-					break;
+				// case QUAN8S:
+					// grp = new ColGroupQuan();
+					// break;
 				default:
-					throw new DMLRuntimeException("Unsupported ColGroup Type used:  "  + ctype);
+					throw new DMLRuntimeException("Unsupported ColGroup Type used:  " + ctype);
 			}
 
 			// Deserialize and add column group (flag for shared dictionary passed
 			// and numCols evaluated in DDC1 because numCols not available yet
-			grp.readFields(in, sharedDict != null);
+			grp.readFields(in);
 
 			// use shared DDC1 dictionary if applicable
-			if(_sharedDDC1Dict && grp.getNumCols() == 1 && grp instanceof ColGroupDDC1) {
-				if(sharedDict == null)
-					sharedDict = ((ColGroupValue) grp).getValues();
-				else
-					((ColGroupValue) grp).setValues(sharedDict);
-			}
+			// if(_sharedDDC1Dict && grp.getNumCols() == 1 && grp instanceof ColGroupDDC1) {
+			// 	if(sharedDict == null)
+			// 		sharedDict = ((ColGroupValue) grp).getValues();
+			// 	else
+			// 		((ColGroupValue) grp).setValues(sharedDict);
+			// }
 
 			_colGroups.add(grp);
 		}
@@ -107,18 +112,18 @@
 	 * @throws IOException Throws IO Exception if the out refuses to write.
 	 */
 	public static void writeGroups(DataOutput out, boolean _sharedDDC1Dict, List<ColGroup> _colGroups)
-		throws IOException
-	{
+		throws IOException {
 		// Write out how many ColGroups we save.
 		out.writeInt(_colGroups.size());
 
-		boolean skipDict = false;
+		// boolean skipDict = false;
 		for(ColGroup grp : _colGroups) {
 			// TODO save DDC Dict sharing smarter.
-			boolean shared = (grp instanceof ColGroupDDC1 && _sharedDDC1Dict && grp.getNumCols() == 1);
+			// boolean shared = false;// (grp instanceof ColGroupDDC1 && _sharedDDC1Dict && grp.getNumCols() == 1);
 			out.writeByte(grp.getColGroupType().ordinal());
-			grp.write(out, skipDict & shared); 
-			skipDict |= shared;
+			// grp.write(out, skipDict & shared);
+			grp.write(out);
+			// skipDict |= shared;
 		}
 	}
 }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java
index fe07b18..aa3d871 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java
@@ -19,18 +19,22 @@
 
 package org.apache.sysds.runtime.compress.colgroup;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
 import java.util.Arrays;
 import java.util.Iterator;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.sysds.runtime.compress.BitmapEncoder;
-import org.apache.sysds.runtime.compress.UncompressedBitmap;
+import org.apache.sysds.runtime.compress.CompressionSettings;
+import org.apache.sysds.runtime.compress.utils.AbstractBitmap;
 import org.apache.sysds.runtime.compress.utils.LinearAlgebraUtils;
 import org.apache.sysds.runtime.data.DenseBlock;
 import org.apache.sysds.runtime.functionobjects.Builtin;
 import org.apache.sysds.runtime.functionobjects.KahanFunction;
 import org.apache.sysds.runtime.functionobjects.KahanPlus;
+import org.apache.sysds.runtime.functionobjects.KahanPlusSq;
 import org.apache.sysds.runtime.instructions.cp.KahanObject;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
@@ -44,7 +48,7 @@
 
 	private static final Log LOG = LogFactory.getLog(ColGroupOLE.class.getName());
 
-	protected int[] _skiplist;
+	protected int[] _skipList;
 
 	protected ColGroupOLE() {
 		super();
@@ -56,28 +60,28 @@
 	 * @param colIndices indices (within the block) of the columns included in this column
 	 * @param numRows    total number of rows in the parent block
 	 * @param ubm        Uncompressed bitmap representation of the block
+	 * @param cs         The Compression settings used for compression
 	 */
-	protected ColGroupOLE(int[] colIndices, int numRows, UncompressedBitmap ubm) {
-		super(colIndices, numRows, ubm);
+	protected ColGroupOLE(int[] colIndices, int numRows, AbstractBitmap ubm, CompressionSettings cs) {
+		super(colIndices, numRows, ubm, cs);
 
 		// compress the bitmaps
 		final int numVals = ubm.getNumValues();
 		char[][] lbitmaps = new char[numVals][];
 		int totalLen = 0;
 		for(int i = 0; i < numVals; i++) {
-			lbitmaps[i] = BitmapEncoder.genOffsetBitmap(ubm.getOffsetsList(i).extractValues(), ubm.getNumOffsets(i));
+			lbitmaps[i] = genOffsetBitmap(ubm.getOffsetsList(i).extractValues(), ubm.getNumOffsets(i));
 			totalLen += lbitmaps[i].length;
 		}
 
 		// compact bitmaps to linearized representation
 		createCompressedBitmaps(numVals, totalLen, lbitmaps);
 
-		// TODO FIX Skiplist construction Since it is not needed in all cases.
-
-		_skiplist = new int[numVals];
-		if( CREATE_SKIP_LIST && numRows > 2 * BitmapEncoder.BITMAP_BLOCK_SZ) {
-			int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
-			// _skiplist = new int[numVals];
+		_skipList = null;
+		if(CREATE_SKIP_LIST && numRows > 2 * CompressionSettings.BITMAP_BLOCK_SZ) {
+			_skipList = new int[numVals];
+			int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
+			// _skipList = new int[numVals];
 			int rl = (getNumRows() / 2 / blksz) * blksz;
 			for(int k = 0; k < numVals; k++) {
 				int boff = _ptr[k];
@@ -86,7 +90,7 @@
 				for(int i = 0; i < rl && bix < blen; i += blksz) {
 					bix += _data[boff + bix] + 1;
 				}
-				_skiplist[k] = bix;
+				_skipList[k] = bix;
 			}
 		}
 
@@ -109,14 +113,14 @@
 	}
 
 	@Override
-	protected ColGroupType getColGroupType(){
+	protected ColGroupType getColGroupType() {
 		return ColGroupType.OLE;
 	}
 
 	@Override
 	public void decompressToBlock(MatrixBlock target, int rl, int ru) {
-		if( getNumValues() > 1) {
-			final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+		if(getNumValues() > 1) {
+			final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
 			final int numCols = getNumCols();
 			final int numVals = getNumValues();
 			final double[] values = getValues();
@@ -150,8 +154,8 @@
 
 	@Override
 	public void decompressToBlock(MatrixBlock target, int[] colixTargets) {
-		if( getNumValues() > 1) {
-			final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+		if(getNumValues() > 1) {
+			final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
 			final int numCols = getNumCols();
 			final int numVals = getNumValues();
 			final int n = getNumRows();
@@ -191,7 +195,7 @@
 
 	@Override
 	public void decompressToBlock(MatrixBlock target, int colpos) {
-		final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+		final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
 		final int numCols = getNumCols();
 		final int numVals = getNumValues();
 		final int n = getNumRows();
@@ -241,7 +245,7 @@
 
 	@Override
 	public int[] getCounts(int rl, int ru, int[] counts) {
-		final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+		final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
 		final int numVals = getNumValues();
 		Arrays.fill(counts, 0, numVals, 0);
 		for(int k = 0; k < numVals; k++) {
@@ -261,7 +265,7 @@
 		// LOG.debug(this.toString());
 		// Note 0 is because the size can be calculated based on the given values,
 		// And because the fourth argument is only needed in estimation, not when an OLE ColGroup is created.
-		return ColGroupSizes.estimateInMemorySizeOLE(getNumCols(), getValues().length, _data.length, 0);
+		return ColGroupSizes.estimateInMemorySizeOLE(getNumCols(), getValues().length, _data.length, 0, isLossy());
 	}
 
 	@Override
@@ -283,7 +287,7 @@
 		}
 
 		double[] rvalues = applyScalarOp(op, val0, getNumCols());
-		char[] lbitmap = BitmapEncoder.genOffsetBitmap(loff, loff.length);
+		char[] lbitmap = genOffsetBitmap(loff, loff.length);
 		char[] rbitmaps = Arrays.copyOf(_data, _data.length + lbitmap.length);
 		System.arraycopy(lbitmap, 0, rbitmaps, _data.length, lbitmap.length);
 		int[] rbitmapOffs = Arrays.copyOf(_ptr, _ptr.length + 1);
@@ -296,7 +300,7 @@
 	public void rightMultByVector(MatrixBlock vector, MatrixBlock result, int rl, int ru) {
 		double[] b = ColGroupConverter.getDenseVector(vector);
 		double[] c = result.getDenseBlockValues();
-		final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+		final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
 		final int numCols = getNumCols();
 		final int numVals = getNumValues();
 
@@ -306,13 +310,13 @@
 			sb[j] = b[_colIndexes[j]];
 		}
 
-		if( numVals > 1 && _numRows > blksz) {
+		if(numVals > 1 && _numRows > blksz) {
 			// since single segment scans already exceed typical L2 cache sizes
 			// and because there is some overhead associated with blocking, the
 			// best configuration aligns with L3 cache size (x*vcores*64K*8B < L3)
 			// x=4 leads to a good yet slightly conservative compromise for single-/
 			// multi-threaded and typical number of cores and L3 cache sizes
-			final int blksz2 = ColGroupOffset.WRITE_CACHE_BLKSZ;
+			final int blksz2 = CompressionSettings.BITMAP_BLOCK_SZ * 2;
 
 			// step 1: prepare position and value arrays
 			int[] apos = skipScan(numVals, rl);
@@ -380,15 +384,15 @@
 	public void leftMultByRowVector(MatrixBlock vector, MatrixBlock result) {
 		double[] a = ColGroupConverter.getDenseVector(vector);
 		double[] c = result.getDenseBlockValues();
-		final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+		final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
 		final int numCols = getNumCols();
 		final int numVals = getNumValues();
 		final int n = getNumRows();
 		final double[] values = getValues();
 
-		if( numVals > 1 && _numRows > blksz) {
+		if(numVals > 1 && _numRows > blksz) {
 			// cache blocking config (see matrix-vector mult for explanation)
-			final int blksz2 = ColGroupOffset.READ_CACHE_BLKSZ;
+			final int blksz2 = 2 * CompressionSettings.BITMAP_BLOCK_SZ;
 
 			// step 1: prepare position and value arrays
 
@@ -445,72 +449,98 @@
 		}
 	}
 
-	@Override
-	public void leftMultByRowVector(ColGroupDDC a, MatrixBlock result) {
-		// note: this method is only applicable for numrows < blocksize
-		double[] c = result.getDenseBlockValues();
-		final int numCols = getNumCols();
-		final int numVals = getNumValues();
-		final double[] values = getValues();
+	// @Override
+	// public void leftMultByRowVector(ColGroupDDC a, MatrixBlock result) {
+	// 	// note: this method is only applicable for numrows < blocksize
+	// 	double[] c = result.getDenseBlockValues();
+	// 	final int numCols = getNumCols();
+	// 	final int numVals = getNumValues();
+	// 	final double[] values = getValues();
+	// 	final double[] aValues = a.getValues();
 
-		// iterate over all values and their bitmaps
-		for(int k = 0, valOff = 0; k < numVals; k++, valOff += numCols) {
-			int boff = _ptr[k];
+	// 	// iterate over all values and their bitmaps
+	// 	for(int k = 0, valOff = 0; k < numVals; k++, valOff += numCols) {
+	// 		int boff = _ptr[k];
 
-			// iterate over bitmap blocks and add partial results
-			double vsum = 0;
-			for(int j = boff + 1; j < boff + 1 + _data[boff]; j++)
-				vsum += a.getData(_data[j]);
+	// 		// iterate over bitmap blocks and add partial results
+	// 		double vsum = 0;
+	// 		for(int j = boff + 1; j < boff + 1 + _data[boff]; j++)
+	// 			vsum += aValues[a.getIndex(_data[j])];
 
-			// scale partial results by values and write results
-			for(int j = 0; j < numCols; j++)
-				c[_colIndexes[j]] += vsum * values[valOff + j];
-		}
-	}
+	// 		// scale partial results by values and write results
+	// 		for(int j = 0; j < numCols; j++)
+	// 			c[_colIndexes[j]] += vsum * values[valOff + j];
+	// 	}
+	// }
 
 	@Override
 	protected final void computeSum(MatrixBlock result, KahanFunction kplus) {
-		KahanObject kbuff = new KahanObject(result.quickGetValue(0, 0), result.quickGetValue(0, 1));
 
 		// iterate over all values and their bitmaps
 		final int numVals = getNumValues();
 		final int numCols = getNumCols();
-		final double[] values = getValues();
 
-		for(int k = 0; k < numVals; k++) {
-			int boff = _ptr[k];
-			int blen = len(k);
-			int valOff = k * numCols;
+		if(_dict instanceof QDictionary && !(kplus instanceof KahanPlusSq)) {
+			final QDictionary values = ((QDictionary) _dict);
+			long sum = 0;
+			for(int k = 0; k < numVals; k++) {
+				int boff = _ptr[k];
+				int blen = len(k);
+				int valOff = k * numCols;
 
-			// iterate over bitmap blocks and count partial lengths
-			int count = 0;
-			for(int bix = 0; bix < blen; bix += _data[boff + bix] + 1)
-				count += _data[boff + bix];
+				// iterate over bitmap blocks and count partial lengths
+				int count = 0;
+				for(int bix = 0; bix < blen; bix += _data[boff + bix] + 1)
+					count += _data[boff + bix];
 
-			// scale counts by all values
-			for(int j = 0; j < numCols; j++)
-				kplus.execute3(kbuff, values[valOff + j], count);
+				// scale counts by all values
+				for(int j = 0; j < numCols; j++)
+					sum += values.getValueByte(valOff + j) * count;
+			}
+			result.quickSetValue(0, 0, result.quickGetValue(0, 0) + sum * values._scale);
+			result.quickSetValue(0, 1, 0);
 		}
+		else {
+			KahanObject kbuff = new KahanObject(result.quickGetValue(0, 0), result.quickGetValue(0, 1));
 
-		result.quickSetValue(0, 0, kbuff._sum);
-		result.quickSetValue(0, 1, kbuff._correction);
+			final double[] values = getValues();
+
+			for(int k = 0; k < numVals; k++) {
+				int boff = _ptr[k];
+				int blen = len(k);
+				int valOff = k * numCols;
+
+				// iterate over bitmap blocks and count partial lengths
+				int count = 0;
+				for(int bix = 0; bix < blen; bix += _data[boff + bix] + 1)
+					count += _data[boff + bix];
+
+				// scale counts by all values
+				for(int j = 0; j < numCols; j++)
+					kplus.execute3(kbuff, values[valOff + j], count);
+			}
+
+			result.quickSetValue(0, 0, kbuff._sum);
+			result.quickSetValue(0, 1, kbuff._correction);
+		}
 	}
 
 	@Override
 	protected final void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
 		// note: due to corrections the output might be a large dense block
 		DenseBlock c = result.getDenseBlock();
+
 		KahanObject kbuff = new KahanObject(0, 0);
 		KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
-		final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+		final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
 		final int numVals = getNumValues();
 
-		if(ALLOW_CACHE_CONSCIOUS_ROWSUMS && numVals > 1 && _numRows > blksz) {
-			final int blksz2 = ColGroupOffset.WRITE_CACHE_BLKSZ / 2;
+		if(numVals > 1 && _numRows > blksz) {
+			final int blksz2 = CompressionSettings.BITMAP_BLOCK_SZ;
 
 			// step 1: prepare position and value arrays
 			int[] apos = skipScan(numVals, rl);
-			double[] aval = sumAllValues(kplus, kbuff, false);
+			double[] aval = _dict.sumAllRowsToDouble(kplus, kbuff, _colIndexes.length,false);
 
 			// step 2: cache conscious row sums via horizontal scans
 			for(int bi = rl; bi < ru; bi += blksz2) {
@@ -533,10 +563,12 @@
 							int rix = ii + _data[pos + i];
 							double[] cvals = c.values(rix);
 							int cix = c.pos(rix);
+
 							kbuff.set(cvals[cix], cvals[cix + 1]);
 							kplus2.execute2(kbuff, val);
 							cvals[cix] = kbuff._sum;
 							cvals[cix + 1] = kbuff._correction;
+
 						}
 						bix += len + 1;
 					}
@@ -545,13 +577,15 @@
 				}
 			}
 		}
-		else {
+		else
+
+		{
 			// iterate over all values and their bitmaps
 			for(int k = 0; k < numVals; k++) {
 				// prepare value-to-add for entire value bitmap
 				int boff = _ptr[k];
 				int blen = len(k);
-				double val = sumValues(k, kplus, kbuff);
+				double val = _dict.sumRow(k, kplus, kbuff, _colIndexes.length);
 
 				// iterate over bitmap blocks and add values
 				if(val != 0) {
@@ -606,7 +640,7 @@
 	@Override
 	protected final void computeRowMxx(MatrixBlock result, Builtin builtin, int rl, int ru) {
 		// NOTE: zeros handled once for all column groups outside
-		final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+		final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
 		final int numVals = getNumValues();
 		double[] c = result.getDenseBlockValues();
 
@@ -638,7 +672,7 @@
 	@Override
 	protected boolean[] computeZeroIndicatorVector() {
 		boolean[] ret = new boolean[_numRows];
-		final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+		final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
 		final int numVals = getNumValues();
 
 		// initialize everything with zero
@@ -666,8 +700,8 @@
 
 	@Override
 	public void countNonZerosPerRow(int[] rnnz, int rl, int ru) {
-		final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
-		final int blksz2 = ColGroupOffset.WRITE_CACHE_BLKSZ;
+		final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
+		final int blksz2 = CompressionSettings.BITMAP_BLOCK_SZ * 2;
 		final int numVals = getNumValues();
 		final int numCols = getNumCols();
 
@@ -711,7 +745,7 @@
 	 */
 	private int[] skipScan(int numVals, int rl) {
 		int[] ret = allocIVector(numVals, rl == 0);
-		final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+		final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
 
 		if(rl > 0) { // rl aligned with blksz
 			int rskip = (getNumRows() / 2 / blksz) * blksz;
@@ -720,7 +754,7 @@
 				int boff = _ptr[k];
 				int blen = len(k);
 				int start = (rl >= rskip) ? rskip : 0;
-				int bix = (rl >= rskip) ? _skiplist[k] : 0;
+				int bix = (rl >= rskip) ? _skipList[k] : 0;
 				for(int i = start; i < rl && bix < blen; i += blksz) {
 					bix += _data[boff + bix] + 1;
 				}
@@ -732,14 +766,14 @@
 	}
 
 	private int skipScanVal(int k, int rl) {
-		final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+		final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
 
 		if(rl > 0) { // rl aligned with blksz
 			int rskip = (getNumRows() / 2 / blksz) * blksz;
 			int boff = _ptr[k];
 			int blen = len(k);
 			int start = (rl >= rskip) ? rskip : 0;
-			int bix = (rl >= rskip) ? _skiplist[k] : 0;
+			int bix = (rl >= rskip) ? _skipList[k] : 0;
 			for(int i = start; i < rl && bix < blen; i += blksz) {
 				bix += _data[boff + bix] + 1;
 			}
@@ -750,6 +784,48 @@
 	}
 
 	@Override
+	public void readFields(DataInput in) throws IOException {
+		super.readFields(in);
+		boolean skiplistNull = in.readBoolean();
+		if(!skiplistNull) {
+			_skipList = new int[in.readInt()];
+			for(int i = 0; i < _skipList.length; i++) {
+				_skipList[i] = in.readInt();
+			}
+		}
+		else {
+			_skipList = null;
+		}
+
+	}
+
+	@Override
+	public void write(DataOutput out) throws IOException {
+		super.write(out);
+		if(_skipList != null) {
+			out.writeBoolean(false);
+			out.writeInt(_skipList.length);
+			for(int i = 0; i < _skipList.length; i++) {
+				out.writeInt(_skipList[i]);
+			}
+		}
+		else {
+			out.writeBoolean(true);
+		}
+	}
+
+	@Override
+	public long getExactSizeOnDisk() {
+		long ret = super.getExactSizeOnDisk();
+		ret += 1; // in case skip list is null.
+		if(_skipList != null) {
+			ret += 4; // skiplist length
+			ret += 4 * _skipList.length;
+		}
+		return ret;
+	}
+
+	@Override
 	public Iterator<Integer> getIterator(int k) {
 		return new OLEValueIterator(k, 0, getNumRows());
 	}
@@ -768,8 +844,14 @@
 	public String toString() {
 		StringBuilder sb = new StringBuilder();
 		sb.append(super.toString());
-		sb.append(String.format("\n%15s%5d ", "SkipList:", this._skiplist.length));
-		sb.append(Arrays.toString(this._skiplist));
+		if(_skipList != null) {
+			sb.append(String.format("\n%15s%5d ", "SkipList:", this._skipList.length));
+			sb.append(Arrays.toString(this._skipList));
+		}
+		else {
+			sb.append("skiplist empty");
+		}
+
 		return sb.toString();
 	}
 
@@ -789,7 +871,7 @@
 			_blen = len(k);
 
 			// initialize position via segment-aligned skip-scan
-			int lrl = rl - rl % BitmapEncoder.BITMAP_BLOCK_SZ;
+			int lrl = rl - rl % CompressionSettings.BITMAP_BLOCK_SZ;
 			_bix = skipScanVal(k, lrl);
 			_start = lrl;
 
@@ -826,7 +908,7 @@
 				_rpos = _start + _data[_boff + _bix + _spos + 1];
 			}
 			else {
-				_start += BitmapEncoder.BITMAP_BLOCK_SZ;
+				_start += CompressionSettings.BITMAP_BLOCK_SZ;
 				_bix += _slen + 1;
 				if(_bix < _blen) {
 					_slen = _data[_boff + _bix];
@@ -846,7 +928,7 @@
 
 		public OLERowIterator(int rl, int ru) {
 			_apos = skipScan(getNumValues(), rl);
-			_vcodes = new int[Math.min(BitmapEncoder.BITMAP_BLOCK_SZ, ru - rl)];
+			_vcodes = new int[Math.min(CompressionSettings.BITMAP_BLOCK_SZ, ru - rl)];
 			Arrays.fill(_vcodes, -1); // initial reset
 			getNextSegment();
 		}
@@ -863,7 +945,7 @@
 				// reset vcode to avoid scan on next segment
 				_vcodes[segIx] = -1;
 			}
-			if(segIx + 1 == BitmapEncoder.BITMAP_BLOCK_SZ && !last)
+			if(segIx + 1 == CompressionSettings.BITMAP_BLOCK_SZ && !last)
 				getNextSegment();
 		}
 
@@ -884,4 +966,55 @@
 			}
 		}
 	}
+
+	/**
+	 * Encodes the bitmap in blocks of offsets. Within each block, the bits are stored as absolute offsets from the
+	 * start of the block.
+	 * 
+	 * @param offsets uncompressed offset list
+	 * @param len     logical length of the given offset list
+	 * 
+	 * @return compressed version of said bitmap
+	 */
+	public static char[] genOffsetBitmap(int[] offsets, int len) {
+		int lastOffset = offsets[len - 1];
+
+		// Build up the blocks
+		int numBlocks = (lastOffset / CompressionSettings.BITMAP_BLOCK_SZ) + 1;
+		// To simplify the logic, we make two passes.
+		// The first pass divides the offsets by block.
+		int[] blockLengths = new int[numBlocks];
+
+		for(int ix = 0; ix < len; ix++) {
+			int val = offsets[ix];
+			int blockForVal = val / CompressionSettings.BITMAP_BLOCK_SZ;
+			blockLengths[blockForVal]++;
+		}
+
+		// The second pass creates the blocks.
+		int totalSize = numBlocks;
+		for(int block = 0; block < numBlocks; block++) {
+			totalSize += blockLengths[block];
+		}
+		char[] encodedBlocks = new char[totalSize];
+
+		int inputIx = 0;
+		int blockStartIx = 0;
+		for(int block = 0; block < numBlocks; block++) {
+			int blockSz = blockLengths[block];
+
+			// First entry in the block is number of bits
+			encodedBlocks[blockStartIx] = (char) blockSz;
+
+			for(int i = 0; i < blockSz; i++) {
+				encodedBlocks[blockStartIx + i +
+					1] = (char) (offsets[inputIx + i] % CompressionSettings.BITMAP_BLOCK_SZ);
+			}
+
+			inputIx += blockSz;
+			blockStartIx += blockSz + 1;
+		}
+
+		return encodedBlocks;
+	}
 }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOffset.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOffset.java
index 24cb0a4..5cd85a7 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOffset.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOffset.java
@@ -26,8 +26,8 @@
 import java.util.HashMap;
 import java.util.Iterator;
 
-import org.apache.sysds.runtime.compress.BitmapEncoder;
-import org.apache.sysds.runtime.compress.UncompressedBitmap;
+import org.apache.sysds.runtime.compress.CompressionSettings;
+import org.apache.sysds.runtime.compress.utils.AbstractBitmap;
 import org.apache.sysds.runtime.compress.utils.LinearAlgebraUtils;
 import org.apache.sysds.runtime.functionobjects.Builtin;
 import org.apache.sysds.runtime.functionobjects.Builtin.BuiltinCode;
@@ -46,10 +46,6 @@
 
 	protected static final boolean CREATE_SKIP_LIST = true;
 
-	protected static final int READ_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ;
-	public static final int WRITE_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ;
-	public static boolean ALLOW_CACHE_CONSCIOUS_ROWSUMS = true;
-
 	/** Bitmaps, one per uncompressed value tuple in {@link #_dict}. */
 	protected int[] _ptr; // bitmap offsets per value
 	protected char[] _data; // linearized bitmaps (variable length)
@@ -64,10 +60,10 @@
 	 * @param colIndices indices (within the block) of the columns included in this column
 	 * @param numRows    total number of rows in the parent block
 	 * @param ubm        Uncompressed bitmap representation of the block
+	 * @param cs         The Compression settings used for compression
 	 */
-	public ColGroupOffset(int[] colIndices, int numRows, UncompressedBitmap ubm) {
-		super(colIndices, numRows, ubm);
-		_zeros = (ubm.getNumOffsets() < numRows);
+	public ColGroupOffset(int[] colIndices, int numRows, AbstractBitmap ubm, CompressionSettings cs) {
+		super(colIndices, numRows, ubm, cs);
 	}
 
 	/**
@@ -104,10 +100,10 @@
 	public long estimateInMemorySize() {
 		// Could use a ternary operator, but it looks odd with our code formatter here.
 		if(_data == null) {
-			return ColGroupSizes.estimateInMemorySizeOffset(getNumCols(), _colIndexes.length, 0, 0);
+			return ColGroupSizes.estimateInMemorySizeOffset(getNumCols(), _colIndexes.length, 0, 0, isLossy());
 		}
 		else {
-			return ColGroupSizes.estimateInMemorySizeOffset(getNumCols(), getValues().length, _ptr.length, _data.length);
+			return ColGroupSizes.estimateInMemorySizeOffset(getNumCols(), getValues().length, _ptr.length, _data.length, isLossy());
 		}
 	}
 
@@ -262,79 +258,45 @@
 
 	@Override
 	public void readFields(DataInput in) throws IOException {
-		_numRows = in.readInt();
-		int numCols = in.readInt();
-		int numVals = in.readInt();
-		_zeros = in.readBoolean();
-
-		// read col indices
-		_colIndexes = new int[numCols];
-		for(int i = 0; i < numCols; i++)
-			_colIndexes[i] = in.readInt();
-
-		// read distinct values
-		double[] values = new double[numVals * numCols];
-		for(int i = 0; i < numVals * numCols; i++)
-			values[i] = in.readDouble();
-		_dict = new Dictionary(values);
+		super.readFields(in);
 		
 		// read bitmaps
-		int totalLen = in.readInt();
-		_ptr = new int[numVals + 1];
-		_data = new char[totalLen];
-		for(int i = 0, off = 0; i < numVals; i++) {
-			int len = in.readInt();
-			_ptr[i] = off;
-			for(int j = 0; j < len; j++)
-				_data[off + j] = in.readChar();
-			off += len;
+		_ptr = new int[in.readInt()];
+		for(int i = 0; i< _ptr.length; i++){
+			_ptr[i] = in.readInt();
 		}
-		_ptr[numVals] = totalLen;
+		int totalLen = in.readInt();
+		_data = new char[totalLen];
+		for(int i = 0; i< totalLen; i++){
+			_data[i] = in.readChar();
+		}
 	}
 
 	@Override
 	public void write(DataOutput out) throws IOException {
-		int numCols = getNumCols();
-		int numVals = getNumValues();
-		out.writeInt(_numRows);
-		out.writeInt(numCols);
-		out.writeInt(numVals);
-		out.writeBoolean(_zeros);
-
-		// write col indices
-		for(int i = 0; i < _colIndexes.length; i++)
-			out.writeInt(_colIndexes[i]);
-
-		// write distinct values
-		double[] values = getValues();
-		for(int i = 0; i < numCols * numVals; i++)
-			out.writeDouble(values[i]);
-
+		super.write(out);
 		// write bitmaps (lens and data, offset later recreated)
-		int totalLen = 0;
-		for(int i = 0; i < numVals; i++)
-			totalLen += len(i);
-		out.writeInt(totalLen);
-		for(int i = 0; i < numVals; i++) {
-			int len = len(i);
-			int off = _ptr[i];
-			out.writeInt(len);
-			for(int j = 0; j < len; j++)
-				out.writeChar(_data[off + j]);
+		out.writeInt(_ptr.length);
+		for(int i = 0; i < _ptr.length; i++){
+			out.writeInt(_ptr[i]);
 		}
+		out.writeInt(_data.length);
+		for(int i = 0; i < _data.length; i++){
+			out.writeChar(_data[i]);
+		}
+
 	}
 
 	@Override
 	public long getExactSizeOnDisk() {
-		long ret = 13; // header
-		// col indices
-		ret += 4 * _colIndexes.length;
-		// distinct values (groups of values)
-		ret += 8 * getValues().length;
+		long ret = super.getExactSizeOnDisk();
 		// actual bitmaps
-		ret += 4; // total length
-		for(int i = 0; i < getNumValues(); i++)
-			ret += 4 + 2 * len(i);
+		ret += 4; // total length // _ptr list
+		ret += 4 * _ptr.length;
+		ret += 4; // _data list
+		ret += 2 * _data.length;
+		// for(int i = 0; i < getNumValues(); i++)
+		// 	ret += 4 + 2 * len(i);
 
 		return ret;
 	}
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupQuan.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupQuan.java
index 16638d2..7805921 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupQuan.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupQuan.java
@@ -1,513 +1,513 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
+// /*
+//  * Licensed to the Apache Software Foundation (ASF) under one
+//  * or more contributor license agreements.  See the NOTICE file
+//  * distributed with this work for additional information
+//  * regarding copyright ownership.  The ASF licenses this file
+//  * to you under the Apache License, Version 2.0 (the
+//  * "License"); you may not use this file except in compliance
+//  * with the License.  You may obtain a copy of the License at
+//  *
+//  *   http://www.apache.org/licenses/LICENSE-2.0
+//  *
+//  * Unless required by applicable law or agreed to in writing,
+//  * software distributed under the License is distributed on an
+//  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+//  * KIND, either express or implied.  See the License for the
+//  * specific language governing permissions and limitations
+//  * under the License.
+//  */
 
-package org.apache.sysds.runtime.compress.colgroup;
+// package org.apache.sysds.runtime.compress.colgroup;
 
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.DoubleSummaryStatistics;
-import java.util.Iterator;
+// import java.io.DataInput;
+// import java.io.DataOutput;
+// import java.io.IOException;
+// import java.util.Arrays;
+// import java.util.Iterator;
 
-import org.apache.commons.lang.NotImplementedException;
-import org.apache.sysds.runtime.DMLCompressionException;
-import org.apache.sysds.runtime.DMLRuntimeException;
-import org.apache.sysds.runtime.DMLScriptException;
-import org.apache.sysds.runtime.compress.UncompressedBitmap;
-import org.apache.sysds.runtime.functionobjects.Builtin;
-import org.apache.sysds.runtime.functionobjects.Builtin.BuiltinCode;
-import org.apache.sysds.runtime.functionobjects.KahanPlus;
-import org.apache.sysds.runtime.functionobjects.KahanPlusSq;
-import org.apache.sysds.runtime.functionobjects.Multiply;
-import org.apache.sysds.runtime.functionobjects.ReduceAll;
-import org.apache.sysds.runtime.functionobjects.ReduceCol;
-import org.apache.sysds.runtime.functionobjects.ReduceRow;
-import org.apache.sysds.runtime.matrix.data.IJV;
-import org.apache.sysds.runtime.matrix.data.MatrixBlock;
-import org.apache.sysds.runtime.matrix.operators.AggregateUnaryOperator;
-import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
+// import org.apache.commons.lang.NotImplementedException;
+// import org.apache.sysds.runtime.DMLCompressionException;
+// import org.apache.sysds.runtime.DMLScriptException;
+// import org.apache.sysds.runtime.compress.utils.AbstractBitmap;
+// import org.apache.sysds.runtime.compress.utils.BitmapLossy;
+// import org.apache.sysds.runtime.functionobjects.Builtin;
+// import org.apache.sysds.runtime.functionobjects.Builtin.BuiltinCode;
+// import org.apache.sysds.runtime.functionobjects.KahanPlus;
+// import org.apache.sysds.runtime.functionobjects.KahanPlusSq;
+// import org.apache.sysds.runtime.functionobjects.ReduceAll;
+// import org.apache.sysds.runtime.functionobjects.ReduceCol;
+// import org.apache.sysds.runtime.functionobjects.ReduceRow;
+// import org.apache.sysds.runtime.matrix.data.IJV;
+// import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+// import org.apache.sysds.runtime.matrix.operators.AggregateUnaryOperator;
+// import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
 
-public class ColGroupQuan extends ColGroup {
+// public class ColGroupQuan extends ColGroup {
 
-	private static final long serialVersionUID = -9157476271360522008L;
+// 	private static final long serialVersionUID = -9157476271360522008L;
 
-	protected double _scale;
-	protected byte[] _values;
+// 	protected QDictionary _values;
 
-	protected ColGroupQuan() {
-		super();
-	}
+// 	protected ColGroupQuan() {
+// 		super();
+// 	}
 
-	protected ColGroupQuan(int[] colIndexes, int numRows, UncompressedBitmap ubm) {
-		super(colIndexes, numRows);
-		_values = new byte[ubm.getNumColumns() * numRows];
+// 	protected ColGroupQuan(int[] colIndexes, int numRows, AbstractBitmap ubm) {
+// 		// throw new NotImplementedException();
+// 		super(colIndexes, numRows);
+// 		byte[] lossyValues = ((BitmapLossy)ubm).getValues();
+// 		byte[] values = new byte[numRows * colIndexes.length];
+// 		for(int i = 0; i < lossyValues.length; i++) {
+// 			int[] runs = ubm.getOffsetsList(i).extractValues();
+// 			byte curV = lossyValues[i];
 
-		double[] valuesFullPrecision = ubm.getValues();
-		DoubleSummaryStatistics stat = Arrays.stream(valuesFullPrecision).summaryStatistics();
-		double max = Math.abs(Math.max(stat.getMax(), Math.abs(stat.getMin())));
-		if(Double.isInfinite(max)){
-			throw new DMLCompressionException("Invalid ColGroupQuan, can't quantize Infinite value.");
-		} else if (max == 0){
-			_scale = 1;
-			LOG.error("ColGroup! column with only 0 values good excuse to make new ColGroup");
-		} else{
-			_scale = max / (double) (Byte.MAX_VALUE);
-		}
-		for (int i = 0; i < valuesFullPrecision.length; i++) {
-			int[] runs = ubm.getOffsetsList(i).extractValues();
-			double curV = valuesFullPrecision[i];
-			double scaledVal = curV / _scale;
-			if(Double.isNaN(scaledVal) || Double.isInfinite(scaledVal)){
-				throw new DMLRuntimeException("Something went wrong in scaling values");
-			}
-			byte scaledValQuan = (byte) (scaledVal);
-			for (int j = 0; j < ubm.getOffsetsList(i).size(); j++) {
-				_values[runs[j]] = scaledValQuan;
-			}
-		}
-	}
+// 			for(int j = 0; j < ubm.getOffsetsList(i).size(); j++) {
+// 				values[runs[j]] = curV;
+// 			}
+// 		}
 
-	@Override
-	public boolean getIfCountsType(){
-		return false;
-	}
+// 		_values = new QDictionary(values, ((BitmapLossy)ubm).getScale());
+// 	}
 
-	private ColGroupQuan(int[] colIndexes, double scale, byte[] values) {
-		super(colIndexes, values.length / colIndexes.length);
-		this._scale = scale;
-		this._values = values;
-	}
+// 	protected ColGroupQuan(int[] colIndexes, int numRows, QDictionary values) {
+// 		super(colIndexes, numRows);
+// 		_values = values;
+// 	}
 
-	@Override
-	public CompressionType getCompType() {
-		return CompressionType.QUAN;
-	}
+// 	@Override
+// 	public boolean getIfCountsType() {
+// 		return false;
+// 	}
 
-	@Override
-	protected ColGroupType getColGroupType() {
-		return ColGroupType.QUAN8S;
-	}
+// 	private ColGroupQuan(int[] colIndexes, QDictionary values) {
+// 		super(colIndexes, values.getValuesLength() / colIndexes.length);
+// 		this._values = values;
+// 	}
 
-	@Override
-	public void decompressToBlock(MatrixBlock target, int rl, int ru) {
-		if (_values == null || _values.length == 0) {
-			return;
-		}
-		for (int row = rl; row < ru; row++) {
-			for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
-				int col = _colIndexes[colIx];
-				byte qVal = _values[row * colIx + row];
-				double val = qVal * _scale;
-				target.quickSetValue(row, col, val);
-			}
-		}
-	}
+// 	@Override
+// 	public CompressionType getCompType() {
+// 		return CompressionType.QUAN;
+// 	}
 
-	@Override
-	public void decompressToBlock(MatrixBlock target, int[] colIndexTargets) {
-		if (_values == null || _values.length == 0) {
-			return;
-		}
-		for (int row = 0; row < _numRows; row++) {
-			for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
-				int col = _colIndexes[colIx];
-				double val = _values[row * colIx + row] * _scale;
-				target.quickSetValue(row, col, val);
-			}
-		}
-	}
+// 	@Override
+// 	protected ColGroupType getColGroupType() {
+// 		return ColGroupType.QUAN8S;
+// 	}
 
-	@Override
-	public void decompressToBlock(MatrixBlock target, int colpos) {
-		if (_values == null || _values.length == 0)
-			return;
+// 	@Override
+// 	public void decompressToBlock(MatrixBlock target, int rl, int ru) {
+// 		if(_values == null || _values.getValuesLength()   == 0) {
+// 			return;
+// 		}
+// 		// TODO Fix Loop to not multiply
+// 		for(int row = rl; row < ru; row++) {
+// 			for(int colIx = 0; colIx < _colIndexes.length; colIx++) {
+// 				int col = _colIndexes[colIx];
+// 				target.quickSetValue(row, col, _values.getValue(row * colIx + row));
+// 			}
+// 		}
+// 	}
 
-		/**
-		 * target.getDenseBlockValues() because this decompress is used for
-		 * TransposeSelfMatrixMult meaning that the result is allocated directly into
-		 * the result row or col matrix with the same code !
-		 */
-		// double[] c = target.getDenseBlockValues();
+// 	@Override
+// 	public void decompressToBlock(MatrixBlock target, int[] colIndexTargets) {
+// 		if(_values == null || _values.getValuesLength() == 0) {
+// 			return;
+// 		}
+// 		for(int row = 0; row < _numRows; row++) {
+// 			for(int colIx = 0; colIx < _colIndexes.length; colIx++) {
+// 				int col = _colIndexes[colIx];
+// 				target.quickSetValue(row, col, _values.getValue(row * colIx + row));
+// 			}
+// 		}
+// 	}
 
-		// for (int row = 0; row < _numRows; row++) {
-		// c[row] = (double)_values[row * colpos + row] * _scale;
-		// }
-		// target.setNonZeros(_numRows);
+// 	@Override
+// 	public void decompressToBlock(MatrixBlock target, int colpos) {
+// 		if(_values == null || _values.getValuesLength()  == 0)
+// 			return;
 
-		double[] c = target.getDenseBlockValues();
-		int nnz = 0;
+// 		double[] c = target.getDenseBlockValues();
+// 		int nnz = 0;
+// 		// TODO Fix for multi col group
+// 		for(int row = 0; row < _numRows; row++) {
+// 			double val = _values.getValue(row);
+// 			if(val != 0) {
+// 				nnz++;
+// 			}
+// 			c[row] = val;
+// 		}
+// 		target.setNonZeros(nnz);
+// 	}
 
-		for (int row = 0; row < _numRows; row++) {
-			double val = _values[row * colpos + row];
-			if (val != 0) {
-				nnz++;
-			}
-			c[row] = val * _scale;
-		}
-		target.setNonZeros(nnz);
-	}
+// 	@Override
+// 	public void write(DataOutput out) throws IOException {
 
-	@Override
-	public void write(DataOutput out) throws IOException {
+// 		out.writeInt(_numRows);
+// 		out.writeInt(_colIndexes.length);
 
-		out.writeInt(_numRows);
-		out.writeInt(_colIndexes.length);
+// 		for(int i = 0; i < _colIndexes.length; i++)
+// 			out.writeInt(_colIndexes[i]);
 
-		for (int i = 0; i < _colIndexes.length; i++)
-			out.writeInt(_colIndexes[i]);
+// 		for(int i = 0; i < _values.getValuesLength() ; i++)
+// 			out.writeByte(_values.getValueByte(i));
 
-		for (int i = 0; i < _values.length; i++)
-			out.writeByte(_values[i]);
+// 		out.writeDouble(_values.getScale());
+// 	}
 
-		out.writeDouble(_scale);
-	}
+// 	@Override
+// 	public void readFields(DataInput in) throws IOException {
+// 		_numRows = in.readInt();
+// 		int numCols = in.readInt();
 
-	@Override
-	public void readFields(DataInput in) throws IOException {
-		_numRows = in.readInt();
-		int numCols = in.readInt();
+// 		_colIndexes = new int[numCols];
+// 		for(int i = 0; i < _colIndexes.length; i++)
+// 			_colIndexes[i] = in.readInt();
 
-		_colIndexes = new int[numCols];
-		for (int i = 0; i < _colIndexes.length; i++)
-			_colIndexes[i] = in.readInt();
+// 		byte[] values = new byte[_numRows * numCols];
+// 		for(int i = 0; i < values.length; i++)
+// 			values[i] = in.readByte();
 
-		_values = new byte[_numRows * numCols];
-		for (int i = 0; i < _values.length; i++)
-			_values[i] = in.readByte();
+// 		double scale = in.readDouble();
 
-		_scale = in.readDouble();
-	}
+// 		_values = new QDictionary(values, scale);
+// 	}
 
-	@Override
-	public long getExactSizeOnDisk() {
-		long ret = 8; // header
-		ret += 4 * _colIndexes.length;
-		ret += _values.length;
-		return ret;
-	}
+// 	@Override
+// 	public long getExactSizeOnDisk() {
+// 		long ret = 8; // header
+// 		ret += 8; // Object header of QDictionary
+// 		ret += 4 * _colIndexes.length;
+// 		ret += _values.getValuesLength() ;
+// 		ret += 8; // scale value
+// 		return ret;
+// 	}
 
-	@Override
-	public double get(int r, int c) {
-		int colIx = Arrays.binarySearch(_colIndexes, c);
-		return _values[r * colIx + r] * _scale;
-	}
+// 	@Override
+// 	public double get(int r, int c) {
+// 		int colIx = Arrays.binarySearch(_colIndexes, c);
+// 		return _values.getValue(r * colIx + r);
+// 	}
 
-	@Override
-	public void rightMultByVector(MatrixBlock vector, MatrixBlock result, int rl, int ru) {
-		double[] b = ColGroupConverter.getDenseVector(vector);
-		double[] c = result.getDenseBlockValues();
+// 	@Override
+// 	public void rightMultByVector(MatrixBlock vector, MatrixBlock result, int rl, int ru) {
 
-		// prepare reduced rhs w/ relevant values
-		double[] sb = new double[_colIndexes.length];
-		for (int j = 0; j < _colIndexes.length; j++) {
-			sb[j] = b[_colIndexes[j]];
-		}
+// 		double[] b = ColGroupConverter.getDenseVector(vector);
+// 		double[] c = result.getDenseBlockValues();
 
-		for (int row = rl; row < ru; row++) {
-			for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
-				c[row] += (_values[row * colIx + row] * _scale) * sb[colIx];
-			}
-		}
-	}
+// 		if(_colIndexes.length == 1) {
+// 			double r = b[_colIndexes[0]] * _values.getScale();
+// 			for(int row = rl; row < ru; row++) {
+// 				c[row] += _values.getValueByte(row) * r;
+// 			}
+// 		}
+// 		else {
 
-	@Override
-	public void leftMultByRowVector(MatrixBlock vector, MatrixBlock result) {
-		double[] a = ColGroupConverter.getDenseVector(vector);
-		double[] c = result.getDenseBlockValues();
+// 			// prepare reduced rhs w/ relevant values
+// 			double[] sb = new double[_colIndexes.length];
+// 			for(int j = 0; j < _colIndexes.length; j++) {
+// 				sb[j] = b[_colIndexes[j]];
+// 			}
 
-		for (int row = 0; row < _numRows; row++) {
-			double val = _values[row] * _scale;
-			for (int col = 0; col < _colIndexes.length; col++) {
-				double value = val * a[row * col + row];
-				c[_colIndexes[col]] += value;
-			}
-		}
+// 			int colIx = 0;
+// 			for(int off = 0; off < _values.getValuesLength() ; off += _numRows) {
+// 				double r = _values.getScale() * sb[colIx];
+// 				for(int row = rl; row < ru; row++) {
+// 					c[row] += _values.getValueByte(off + row) * r;
+// 				}
+// 				colIx++;
+// 			}
+// 		}
+// 	}
+
+// 	@Override
+// 	public void leftMultByRowVector(MatrixBlock vector, MatrixBlock result) {
+// 		double[] a = ColGroupConverter.getDenseVector(vector);
+// 		double[] c = result.getDenseBlockValues();
+
+// 		for(int row = 0; row < _numRows; row++) {
+// 			double val = _values.getValue(row);
+// 			for(int col = 0; col < _colIndexes.length; col++) {
+// 				double value = val * a[row * col + row];
+// 				c[_colIndexes[col]] += value;
+// 			}
+// 		}
+
+// 	}
+
+// 	@Override
+// 	public void leftMultByRowVector(ColGroupDDC vector, MatrixBlock result) {
+// 		throw new NotImplementedException();
+// 	}
+
+// 	@Override
+// 	public ColGroup scalarOperation(ScalarOperator op) {
+// 		QDictionary res = _values.apply(op);
+// 		return new ColGroupQuan(_colIndexes, res);
+// 	}
+
+// 	@Override
+// 	public void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result) {
+// 		unaryAggregateOperations(op, result, 0, getNumRows());
+// 	}
+
+// 	@Override
+// 	public long estimateInMemorySize() {
+// 		return ColGroupSizes.estimateInMemorySizeQuan(getNumRows(), getNumCols());
+// 	}
+
+// 	@Override
+// 	public void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result, int rl, int ru) {
+
+// 		if(op.aggOp.increOp.fn instanceof KahanPlus) {
+
+// 			// Not using KahnObject because we already lost some of that precision anyway in
+// 			// quantization.
+// 			if(op.indexFn instanceof ReduceAll)
+// 				computeSum(result);
+// 			else if(op.indexFn instanceof ReduceCol)
+// 				computeRowSums(result, rl, ru);
+// 			else if(op.indexFn instanceof ReduceRow)
+// 				computeColSums(result);
+// 		}
+// 		else if(op.aggOp.increOp.fn instanceof KahanPlusSq) {
+// 			if(op.indexFn instanceof ReduceAll)
+// 				computeSumSq(result);
+// 			else if(op.indexFn instanceof ReduceCol)
+// 				computeRowSumsSq(result, rl, ru);
+// 			else if(op.indexFn instanceof ReduceRow)
+// 				computeColSumsSq(result);
+// 		}
+// 		else if(op.aggOp.increOp.fn instanceof Builtin &&
+// 			(((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX ||
+// 				((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MIN)) {
+// 			Builtin builtin = (Builtin) op.aggOp.increOp.fn;
+// 			// min and max (reduceall/reducerow over tuples only)
 
-	}
+// 			if(op.indexFn instanceof ReduceAll)
+// 				computeMxx(result, builtin, _zeros);
+// 			else if(op.indexFn instanceof ReduceCol)
+// 				computeRowMxx(result, builtin, rl, ru);
+// 			else if(op.indexFn instanceof ReduceRow)
+// 				computeColMxx(result, builtin, _zeros);
+// 		}
+// 		else {
+// 			throw new DMLScriptException("Unknown UnaryAggregate operator on CompressedMatrixBlock");
+// 		}
+// 	}
 
-	@Override
-	public void leftMultByRowVector(ColGroupDDC vector, MatrixBlock result) {
-		throw new NotImplementedException();
-	}
+// 	protected void computeSum(MatrixBlock result) {
+// 		long sum = 0L;
+// 		for(int i = 0; i < _values.length(); i++) {
+// 			sum += _values.getValueByte(i);
+// 		}
+// 		result.quickSetValue(0, 0, result.getValue(0, 0) + (double) sum * _values.getScale());
+// 	}
 
-	@Override
-	public ColGroup scalarOperation(ScalarOperator op) {
-		if (op.fn instanceof Multiply) {
-			return new ColGroupQuan(_colIndexes, op.executeScalar(_scale), _values);
-		}
-		double[] temp = new double[_values.length];
-		double max = op.executeScalar((double)_values[0] * _scale);
-		temp[0] = max;
-		for (int i = 1; i < _values.length; i++) {
-			temp[i] = op.executeScalar((double)_values[i] * _scale);
-			double absTemp = Math.abs(temp[i]);
-			if (absTemp > max) {
-				max = absTemp;
-			}
-		}
-		byte[] newValues = new byte[_values.length];
-		double newScale = max / (double) (Byte.MAX_VALUE);
-		for (int i = 0; i < _values.length; i++) {
-			newValues[i] = (byte) ((double)temp[i] / newScale);
-		}
+// 	protected void computeSumSq(MatrixBlock result) {
 
-		return new ColGroupQuan(_colIndexes, newScale, newValues);
-	}
+// 		double sumsq = 0;
+// 		for(int i = 0; i < _values.length(); i++) {
+// 			double v = _values.getValue(i);
+// 			sumsq += v * v;
+// 		}
+// 		result.quickSetValue(0, 0, result.getValue(0, 0) + sumsq);
+// 	}
 
-	@Override
-	public void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result) {
-		unaryAggregateOperations(op, result, 0, getNumRows());
-	}
+// 	protected void computeRowSums(MatrixBlock result, int rl, int ru) {
+// 		if(_colIndexes.length < 256) {
+// 			short[] rowSums = new short[ru - rl];
+// 			for(int row = rl; row < ru; row++) {
+// 				for(int colIx = 0; colIx < _colIndexes.length; colIx++) {
+// 					rowSums[row - rl] += _values.getValueByte(row * colIx + row);
+// 				}
+// 			}
+// 			for(int row = rl; row < ru; row++) {
+// 				result.quickSetValue(row, 0, result.getValue(row, 0) + rowSums[row - rl] * _values.getScale());
+// 			}
+// 		}
+// 		else {
+// 			throw new NotImplementedException("Not Implemented number of columns in ColGroupQuan row sum");
+// 		}
+// 	}
 
-	@Override
-	public void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result, int rl, int ru) {
+// 	protected void computeRowSumsSq(MatrixBlock result, int rl, int ru) {
+// 		// TODO FIX Loop Index calculation!
+// 		if(_colIndexes.length < 256) {
+// 			float[] rowSumSq = new float[ru - rl];
+// 			for(int row = rl; row < ru; row++) {
+// 				for(int colIx = 0; colIx < _colIndexes.length; colIx++) {
+// 					double v = _values.getValue(row * colIx + row);
+// 					rowSumSq[row - rl] += v * v;
+// 				}
+// 			}
 
-		if (op.aggOp.increOp.fn instanceof KahanPlus) {
+// 			for(int row = rl; row < ru; row++) {
+// 				result.quickSetValue(row, 0, result.getValue(row, 0) + rowSumSq[row - rl]);
+// 			}
 
-			// Not using KahnObject because we already lost some of that precision anyway in
-			// quantization.
-			if (op.indexFn instanceof ReduceAll)
-				computeSum(result);
-			else if (op.indexFn instanceof ReduceCol)
-				computeRowSums(result, rl, ru);
-			else if (op.indexFn instanceof ReduceRow)
-				computeColSums(result);
-		} else if (op.aggOp.increOp.fn instanceof KahanPlusSq) {
-			if (op.indexFn instanceof ReduceAll)
-				computeSumSq(result);
-			else if (op.indexFn instanceof ReduceCol)
-				computeRowSumsSq(result, rl, ru);
-			else if (op.indexFn instanceof ReduceRow)
-				computeColSumsSq(result);
-		} else if (op.aggOp.increOp.fn instanceof Builtin
-				&& (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX
-						|| ((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MIN)) {
-			Builtin builtin = (Builtin) op.aggOp.increOp.fn;
-			// min and max (reduceall/reducerow over tuples only)
+// 		}
+// 		else {
+// 			throw new NotImplementedException("Not Implemented number of columns in ColGroupQuan row sum");
+// 		}
+// 	}
 
-			if (op.indexFn instanceof ReduceAll)
-				computeMxx(result, builtin, _zeros);
-			else if (op.indexFn instanceof ReduceCol)
-				computeRowMxx(result, builtin, rl, ru);
-			else if (op.indexFn instanceof ReduceRow)
-				computeColMxx(result, builtin, _zeros);
-		} else {
-			throw new DMLScriptException("Unknown UnaryAggregate operator on CompressedMatrixBlock");
-		}
-	}
+// 	protected void computeColSums(MatrixBlock result) {
+// 		// TODO AVOID division
+// 		if(_numRows < 256) {
+// 			short[] colSums = new short[_colIndexes.length];
+// 			for(int i = 0; i < _values.length(); i++) {
+// 				colSums[i / _numRows] += _values.getValueByte(i);
+// 			}
 
-	protected void computeSum(MatrixBlock result) {
-		// TODO Potential speedup use vector instructions/group in batches of 32
-		long sum = 0L;
-		for (int i = 0; i < _values.length; i++) {
-			sum += (long) _values[i];
-		}
-		result.quickSetValue(0, 0, result.getValue(0, 0) + (double) sum * _scale);
-	}
+// 			for(int col = 0; col < _colIndexes.length; col++) {
+// 				result.quickSetValue(0, _colIndexes[col], colSums[col] * _values.getScale());
+// 			}
+// 		}
+// 		else if(_numRows < 16777216) { // (Int max + 1) / (short max + 1)
+// 			int[] colSums = new int[_colIndexes.length];
+// 			for(int i = 0; i < _values.length(); i++) {
+// 				colSums[i / _numRows] += _values.getValueByte(i);
+// 			}
 
-	protected void computeSumSq(MatrixBlock result) {
+// 			for(int col = 0; col < _colIndexes.length; col++) {
+// 				result.quickSetValue(0, _colIndexes[col], colSums[col] * _values.getScale());
+// 			}
+// 		}
+// 		else {
+// 			double[] colSums = new double[_colIndexes.length];
+// 			for(int i = 0; i < _values.length(); i++) {
+// 				colSums[i / _numRows] += _values.getValueByte(i);
+// 			}
 
-		double sumsq = 0;
-		for (int i = 0; i < _values.length; i++) {
-			double v =  _values[i] * _scale;
-			sumsq += v*v;
-		}
-		result.quickSetValue(0, 0, result.getValue(0, 0) + sumsq);
-	}
+// 			for(int col = 0; col < _colIndexes.length; col++) {
+// 				result.quickSetValue(0, _colIndexes[col], colSums[col] * _values.getScale());
+// 			}
+// 		}
+// 	}
 
-	protected void computeRowSums(MatrixBlock result, int rl, int ru) {
-		if (_colIndexes.length < 256) {
-			short[] rowSums = new short[ru - rl];
-			for (int row = rl; row < ru; row++) {
-				for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
-					rowSums[row - rl] += _values[row * colIx + row];
-				}
-			}
-			for (int row = rl; row < ru; row++) {
-				result.quickSetValue(row, 0, result.getValue(row, 0) + (double) rowSums[row - rl] * _scale);
-			}
-		} else {
-			throw new NotImplementedException("Not Implemented number of columns in ColGroupQuan row sum");
-		}
-	}
+// 	protected void computeColSumsSq(MatrixBlock result) {
 
-	protected void computeRowSumsSq(MatrixBlock result, int rl, int ru) {
-		if (_colIndexes.length < 256) {
-			float[] rowSumSq = new float[ru - rl];
-			for (int row = rl; row < ru; row++) {
-				for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
-					double v = (double) _values[row * colIx + row] * _scale;
-					rowSumSq[row - rl] += v*v;
-				}
-			}
+// 		// TODO Avoid Division!
+// 		double[] sumsq = new double[_colIndexes.length];
+// 		for(int i = 0; i < _values.length(); i++) {
+// 			double v = _values.getValue(i);
+// 			sumsq[i / _numRows] += v * v;
+// 		}
 
-			for (int row = rl; row < ru; row++) {
-				result.quickSetValue(row, 0, result.getValue(row, 0) + rowSumSq[row - rl]);
-			}
+// 		for(int col = 0; col < _colIndexes.length; col++) {
+// 			result.quickSetValue(0, _colIndexes[col], sumsq[col]);
+// 		}
 
-		} else {
-			throw new NotImplementedException("Not Implemented number of columns in ColGroupQuan row sum");
-		}
-	}
+// 	}
 
-	protected void computeColSums(MatrixBlock result) {
-		if (_numRows < 256) {
-			short[] colSums = new short[_colIndexes.length];
-			for (int i = 0; i < _values.length; i++) {
-				colSums[i / _numRows] += _values[i];
-			}
+// 	protected void computeRowMxx(MatrixBlock result, Builtin builtin, int rl, int ru) {
+// 		double[] c = result.getDenseBlockValues();
+// 		// TODO: Fix Loop!
+// 		for(int row = rl; row < ru; row++) {
+// 			for(int colIx = 0; colIx < _colIndexes.length; colIx++) {
 
-			for (int col = 0; col < _colIndexes.length; col++) {
-				result.quickSetValue(0, _colIndexes[col], colSums[col] * _scale);
-			}
-		} else if (_numRows < 16777216) { // (Int max + 1) / (short max + 1)
-			int[] colSums = new int[_colIndexes.length];
-			for (int i = 0; i < _values.length; i++) {
-				colSums[i / _numRows] += _values[i];
-			}
+// 				double v = _values.getValue(row * colIx + row);
+// 				// System.out.println(v);
+// 				c[row] = builtin.execute(c[row], v);
+// 			}
+// 		}
 
-			for (int col = 0; col < _colIndexes.length; col++) {
-				result.quickSetValue(0, _colIndexes[col], colSums[col] * _scale);
-			}
-		} else {
-			double[] colSums = new double[_colIndexes.length];
-			for (int i = 0; i < _values.length; i++) {
-				colSums[i / _numRows] += _values[i];
-			}
+// 	}
 
-			for (int col = 0; col < _colIndexes.length; col++) {
-				result.quickSetValue(0, _colIndexes[col], colSums[col] * _scale);
-			}
-		}
-	}
+// 	protected void computeMxx(MatrixBlock result, Builtin builtin, boolean zeros) {
 
-	protected void computeColSumsSq(MatrixBlock result) {
-	
-		double[] sumsq = new double[_colIndexes.length];
-		for (int i = 0; i < _values.length; i++) {
-			double v =  _values[i] * _scale;
-			sumsq[i / _numRows] += v*v;
-		}
-		
-		for (int col = 0; col < _colIndexes.length; col++) {
-			result.quickSetValue(0, _colIndexes[col], sumsq[col]);
-		}
-		
-	}
+// 		double res = 0;
+// 		for(int i = 0; i < _values.length(); i++) {
+// 			res = builtin.execute(res, _values.getValue(i));
+// 		}
+// 		result.quickSetValue(0, 0, res);
+// 	}
 
-	protected void computeRowMxx(MatrixBlock result, Builtin builtin, int rl, int ru) {
-		double[] c = result.getDenseBlockValues();
-		for (int row = rl; row < ru; row++) {
-			for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
-				double v = ((double)_values[row * colIx + row]) * _scale;
-				// System.out.println(v);
-				c[row] = builtin.execute(c[row], v);
-			}
-		}
-		
-	}
+// 	protected void computeColMxx(MatrixBlock result, Builtin builtin, boolean zeros) {
+// 		double[] colRes = new double[_colIndexes.length];
+// 		// TODO FIX INDEX CALCULATION / loop
+// 		for(int i = 0; i < _values.length(); i++) {
+// 			colRes[i / _numRows] = builtin.execute(colRes[i / _numRows], _values.getValue(i));
+// 		}
 
-	protected void computeMxx(MatrixBlock result, Builtin builtin, boolean zeros) {
+// 		for(int col = 0; col < _colIndexes.length; col++) {
+// 			result.quickSetValue(0, _colIndexes[col], colRes[col]);
+// 		}
+// 	}
 
-		double res = 0;
-		for (int i = 0; i < _values.length; i++) {
-			res = builtin.execute(res, _values[i] * _scale);
-		}
-		result.quickSetValue(0, 0, res);
-	}
+// 	@Override
+// 	public Iterator<IJV> getIterator(int rl, int ru, boolean inclZeros, boolean rowMajor) {
+// 		return new QuanValueIterator();
+// 	}
 
-	protected void computeColMxx(MatrixBlock result, Builtin builtin, boolean zeros) {
-		double[] colRes = new double[_colIndexes.length];
-		for (int i = 0; i < _values.length; i++) {
-			colRes[i / _numRows] = builtin.execute(colRes[i / _numRows], _values[i] * _scale);
-		}
+// 	private class QuanValueIterator implements Iterator<IJV> {
 
-		for (int col = 0; col < _colIndexes.length; col++) {
-			result.quickSetValue(0, _colIndexes[col], colRes[col]);
-		}
-	}
+// 		@Override
+// 		public boolean hasNext() {
+// 			throw new NotImplementedException("Not Implemented");
+// 		}
 
-	@Override
-	public Iterator<IJV> getIterator(int rl, int ru, boolean inclZeros, boolean rowMajor) {
-		return new QuanValueIterator();
-	}
+// 		@Override
+// 		public IJV next() {
+// 			throw new NotImplementedException("Not Implemented");
+// 		}
 
-	private class QuanValueIterator implements Iterator<IJV> {
+// 	}
 
-		@Override
-		public boolean hasNext() {
-			throw new NotImplementedException("Not Implemented");
-		}
+// 	@Override
+// 	public ColGroupRowIterator getRowIterator(int rl, int ru) {
 
-		@Override
-		public IJV next() {
-			throw new NotImplementedException("Not Implemented");
-		}
+// 		return new QuanRowIterator();
+// 	}
 
-	}
+// 	private class QuanRowIterator extends ColGroupRowIterator {
 
-	@Override
-	public ColGroupRowIterator getRowIterator(int rl, int ru) {
+// 		@Override
+// 		public void next(double[] buff, int rowIx, int segIx, boolean last) {
+// 			throw new NotImplementedException("Not Implemented");
+// 		}
 
-		return new QuanRowIterator();
-	}
+// 	}
 
-	private class QuanRowIterator extends ColGroupRowIterator {
+// 	@Override
+// 	public void countNonZerosPerRow(int[] rnnz, int rl, int ru) {
 
-		@Override
-		public void next(double[] buff, int rowIx, int segIx, boolean last) {
-			throw new NotImplementedException("Not Implemented");
-		}
+// 		for(int row = rl; row < ru; row++) {
+// 			int lnnz = 0;
+// 			for(int colIx = 0; colIx < _colIndexes.length; colIx++) {
+// 				lnnz += (_values.getValue(row * colIx + row) != 0) ? 1 : 0;
+// 			}
+// 			rnnz[row - rl] += lnnz;
+// 		}
+// 	}
 
-	}
+// 	@Override
+// 	public MatrixBlock getValuesAsBlock() {
+// 		MatrixBlock target = new MatrixBlock(_numRows, _colIndexes.length, 0.0);
+// 		decompressToBlock(target, _colIndexes);
+// 		return target;
+// 	}
 
-	@Override
-	public void countNonZerosPerRow(int[] rnnz, int rl, int ru) {
-		// TODO Auto-generated method stub
-		for (int row = rl; row < ru; row++) {
-			int lnnz = 0;
-			for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
-				lnnz += (_values[row * colIx + row] != 0) ? 1 : 0;
-			}
-			rnnz[row - rl] += lnnz;
-		}
-	}
+// 	@Override
+// 	public int[] getCounts() {
+// 		throw new DMLCompressionException(
+// 			"Invalid function call, the counts in Uncompressed Col Group is always 1 for each value");
+// 	}
 
-	@Override
-	public MatrixBlock getValuesAsBlock() {
-		// TODO Auto-generated method stub
-		MatrixBlock target = new MatrixBlock(_numRows, _colIndexes.length, 0.0);
-		decompressToBlock(target, _colIndexes);
-		return target;
-	}
+// 	@Override
+// 	public int[] getCounts(boolean includeZero) {
+// 		throw new DMLCompressionException(
+// 			"Invalid function call, the counts in Uncompressed Col Group is always 1 for each value");
+// 	}
 
-	@Override
-	public int[] getCounts() {
-		throw new DMLCompressionException(
-				"Invalid function call, the counts in Uncompressed Col Group is always 1 for each value");
-	}
+// 	@Override
+// 	public double[] getValues() {
+// 		return _values.getValues();
+// 	}
 
-	@Override
-	public int[] getCounts(boolean includeZero) {
-		throw new DMLCompressionException(
-				"Invalid function call, the counts in Uncompressed Col Group is always 1 for each value");
-	}
+// 	@Override
+// 	public boolean isLossy() {
+// 		return true;
+// 	}
 
-}
\ No newline at end of file
+// }
\ No newline at end of file
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java
index 7aa8b53..802dee4 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java
@@ -19,18 +19,21 @@
 
 package org.apache.sysds.runtime.compress.colgroup;
 
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Iterator;
+import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.sysds.runtime.compress.BitmapEncoder;
-import org.apache.sysds.runtime.compress.UncompressedBitmap;
+import org.apache.sysds.runtime.compress.CompressionSettings;
+import org.apache.sysds.runtime.compress.utils.AbstractBitmap;
 import org.apache.sysds.runtime.compress.utils.LinearAlgebraUtils;
 import org.apache.sysds.runtime.data.DenseBlock;
 import org.apache.sysds.runtime.functionobjects.Builtin;
 import org.apache.sysds.runtime.functionobjects.KahanFunction;
 import org.apache.sysds.runtime.functionobjects.KahanPlus;
+import org.apache.sysds.runtime.functionobjects.KahanPlusSq;
 import org.apache.sysds.runtime.instructions.cp.KahanObject;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.matrix.data.Pair;
@@ -52,16 +55,17 @@
 	 * @param colIndices indices (within the block) of the columns included in this column
 	 * @param numRows    total number of rows in the parent block
 	 * @param ubm        Uncompressed bitmap representation of the block
+	 * @param cs         The Compression settings used for compression
 	 */
-	protected ColGroupRLE(int[] colIndices, int numRows, UncompressedBitmap ubm) {
-		super(colIndices, numRows, ubm);
+	protected ColGroupRLE(int[] colIndices, int numRows, AbstractBitmap ubm, CompressionSettings cs) {
+		super(colIndices, numRows, ubm, cs);
 
 		// compress the bitmaps
 		final int numVals = ubm.getNumValues();
 		char[][] lbitmaps = new char[numVals][];
 		int totalLen = 0;
 		for(int k = 0; k < numVals; k++) {
-			lbitmaps[k] = BitmapEncoder.genRLEBitmap(ubm.getOffsetsList(k).extractValues(), ubm.getNumOffsets(k));
+			lbitmaps[k] = genRLEBitmap(ubm.getOffsetsList(k).extractValues(), ubm.getNumOffsets(k));
 			totalLen += lbitmaps[k].length;
 		}
 
@@ -71,8 +75,8 @@
 		// debug output
 		double ucSize = ColGroupSizes.estimateInMemorySizeUncompressed(numRows, colIndices.length, 1.0);
 		if(estimateInMemorySize() > ucSize)
-			LOG.warn(
-				String.format("RLE group larger than UC dense: %8d Uncompressed: %8d", estimateInMemorySize(), (int)ucSize));
+			LOG.warn(String
+				.format("RLE group larger than UC dense: %8d Uncompressed: %8d", estimateInMemorySize(), (int) ucSize));
 	}
 
 	protected ColGroupRLE(int[] colIndices, int numRows, boolean zeros, double[] values, char[] bitmaps,
@@ -273,11 +277,11 @@
 			sb[j] = b[_colIndexes[j]];
 		}
 
-		if(numVals > 1 && _numRows > BitmapEncoder.BITMAP_BLOCK_SZ) {
+		if(numVals > 1 && _numRows > CompressionSettings.BITMAP_BLOCK_SZ) {
 			// L3 cache alignment, see comment rightMultByVector OLE column group
 			// core difference of RLE to OLE is that runs are not segment alignment,
 			// which requires care of handling runs crossing cache-buckets
-			final int blksz = ColGroupOffset.WRITE_CACHE_BLKSZ;
+			final int blksz = CompressionSettings.BITMAP_BLOCK_SZ * 2;
 
 			// step 1: prepare position and value arrays
 
@@ -363,8 +367,8 @@
 		final int n = getNumRows();
 		final double[] values = getValues();
 
-		if(numVals > 1 && _numRows > BitmapEncoder.BITMAP_BLOCK_SZ) {
-			final int blksz = ColGroupOffset.READ_CACHE_BLKSZ;
+		if(numVals > 1 && _numRows > CompressionSettings.BITMAP_BLOCK_SZ) {
+			final int blksz = 2 * CompressionSettings.BITMAP_BLOCK_SZ;
 
 			// step 1: prepare position and value arrays
 
@@ -426,34 +430,36 @@
 		}
 	}
 
-	@Override
-	public void leftMultByRowVector(ColGroupDDC a, MatrixBlock result) {
-		// note: this method is only applicable for numrows < blocksize
-		double[] c = result.getDenseBlockValues();
-		final int numCols = getNumCols();
-		final int numVals = getNumValues();
-		final double[] values = getValues();
+	// @Override
+	// public void leftMultByRowVector(ColGroupDDC a, MatrixBlock result) {
+	// 	// note: this method is only applicable for numrows < blocksize
+	// 	double[] c = result.getDenseBlockValues();
+	// 	final int numCols = getNumCols();
+	// 	final int numVals = getNumValues();
+	// 	final double[] values = getValues();
+	// 	final double[] aValues = a.getValues();
 
-		// iterate over all values and their bitmaps
-		for(int k = 0, valOff = 0; k < numVals; k++, valOff += numCols) {
-			int boff = _ptr[k];
-			int blen = len(k);
+	// 	// iterate over all values and their bitmaps
+	// 	for(int k = 0, valOff = 0; k < numVals; k++, valOff += numCols) {
+	// 		int boff = _ptr[k];
+	// 		int blen = len(k);
 
-			double vsum = 0;
-			int curRunEnd = 0;
-			for(int bix = 0; bix < blen; bix += 2) {
-				int curRunStartOff = curRunEnd + _data[boff + bix];
-				int curRunLen = _data[boff + bix + 1];
-				for(int i = curRunStartOff; i < curRunStartOff + curRunLen; i++)
-					vsum += a.getData(i, 0);
-				curRunEnd = curRunStartOff + curRunLen;
-			}
+	// 		double vsum = 0;
+	// 		int curRunEnd = 0;
+	// 		for(int bix = 0; bix < blen; bix += 2) {
+	// 			int curRunStartOff = curRunEnd + _data[boff + bix];
+	// 			int curRunLen = _data[boff + bix + 1];
+	// 			for(int i = curRunStartOff; i < curRunStartOff + curRunLen; i++) {
+	// 				vsum += aValues[a.getIndex(_data[i])];
+	// 			}
+	// 			curRunEnd = curRunStartOff + curRunLen;
+	// 		}
 
-			// scale partial results by values and write results
-			for(int j = 0; j < numCols; j++)
-				c[_colIndexes[j]] += vsum * values[valOff + j];
-		}
-	}
+	// 		// scale partial results by values and write results
+	// 		for(int j = 0; j < numCols; j++)
+	// 			c[_colIndexes[j]] += vsum * values[valOff + j];
+	// 	}
+	// }
 
 	@Override
 	public ColGroup scalarOperation(ScalarOperator op) {
@@ -474,7 +480,7 @@
 		}
 
 		double[] rvalues = applyScalarOp(op, val0, getNumCols());
-		char[] lbitmap = BitmapEncoder.genRLEBitmap(loff, loff.length);
+		char[] lbitmap = genRLEBitmap(loff, loff.length);
 		char[] rbitmaps = Arrays.copyOf(_data, _data.length + lbitmap.length);
 		System.arraycopy(lbitmap, 0, rbitmaps, _data.length, lbitmap.length);
 		int[] rbitmapOffs = Arrays.copyOf(_ptr, _ptr.length + 1);
@@ -485,31 +491,52 @@
 
 	@Override
 	protected final void computeSum(MatrixBlock result, KahanFunction kplus) {
-		KahanObject kbuff = new KahanObject(result.quickGetValue(0, 0), result.quickGetValue(0, 1));
 
 		final int numCols = getNumCols();
 		final int numVals = getNumValues();
-		final double[] values = getValues();
 
-		for(int k = 0; k < numVals; k++) {
-			int boff = _ptr[k];
-			int blen = len(k);
-			int valOff = k * numCols;
-			int curRunEnd = 0;
-			int count = 0;
-			for(int bix = 0; bix < blen; bix += 2) {
-				int curRunStartOff = curRunEnd + _data[boff + bix];
-				curRunEnd = curRunStartOff + _data[boff + bix + 1];
-				count += curRunEnd - curRunStartOff;
+		if(_dict instanceof QDictionary && !(kplus instanceof KahanPlusSq)) {
+			final QDictionary values = ((QDictionary) _dict);
+			long sum = 0;
+			for(int k = 0; k < numVals; k++) {
+				int count = getCountValue(k);
+				int valOff = k * _colIndexes.length;
+				// scale counts by all values
+				for(int j = 0; j < numCols; j++)
+					sum += values.getValueByte(valOff + j) * count;
+			}
+			result.quickSetValue(0, 0, result.quickGetValue(0, 0) + sum * values._scale);
+			result.quickSetValue(0, 1, 0);
+		}
+		else {
+			KahanObject kbuff = new KahanObject(result.quickGetValue(0, 0), result.quickGetValue(0, 1));
+
+			final double[] values = getValues();
+			for(int k = 0; k < numVals; k++) {
+				int count = getCountValue(k);
+				int valOff = k * _colIndexes.length;
+				// scale counts by all values
+				for(int j = 0; j < numCols; j++)
+					kplus.execute3(kbuff, values[valOff + j], count);
 			}
 
-			// scale counts by all values
-			for(int j = 0; j < numCols; j++)
-				kplus.execute3(kbuff, values[valOff + j], count);
+			result.quickSetValue(0, 0, kbuff._sum);
+			result.quickSetValue(0, 1, kbuff._correction);
 		}
 
-		result.quickSetValue(0, 0, kbuff._sum);
-		result.quickSetValue(0, 1, kbuff._correction);
+	}
+
+	private int getCountValue(int k) {
+		int boff = _ptr[k];
+		int blen = len(k);
+		int curRunEnd = 0;
+		int count = 0;
+		for(int bix = 0; bix < blen; bix += 2) {
+			int curRunStartOff = curRunEnd + _data[boff + bix];
+			curRunEnd = curRunStartOff + _data[boff + bix + 1];
+			count += curRunEnd - curRunStartOff;
+		}
+		return count;
 	}
 
 	@Override
@@ -521,15 +548,15 @@
 
 		final int numVals = getNumValues();
 
-		if(ALLOW_CACHE_CONSCIOUS_ROWSUMS && numVals > 1 && _numRows > BitmapEncoder.BITMAP_BLOCK_SZ) {
-			final int blksz = ColGroupOffset.WRITE_CACHE_BLKSZ / 2;
+		if( numVals > 1 && _numRows > CompressionSettings.BITMAP_BLOCK_SZ) {
+			final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
 
 			// step 1: prepare position and value arrays
 
 			// current pos / values per RLE list
 			int[] astart = new int[numVals];
 			int[] apos = skipScan(numVals, rl, astart);
-			double[] aval = sumAllValues(kplus, kbuff, false);
+			double[] aval = _dict.sumAllRowsToDouble(kplus, kbuff, _colIndexes.length,false);
 
 			// step 2: cache conscious matrix-vector via horizontal scans
 			for(int bi = rl; bi < ru; bi += blksz) {
@@ -572,7 +599,7 @@
 			for(int k = 0; k < numVals; k++) {
 				int boff = _ptr[k];
 				int blen = len(k);
-				double val = sumValues(k, kplus, kbuff);
+				double val = _dict.sumRow(k, kplus, kbuff, _colIndexes.length);
 
 				if(val != 0.0) {
 					Pair<Integer, Integer> tmp = skipScanVal(k, rl);
@@ -835,7 +862,7 @@
 		public RLERowIterator(int rl, int ru) {
 			_astart = new int[getNumValues()];
 			_apos = skipScan(getNumValues(), rl, _astart);
-			_vcodes = new int[Math.min(BitmapEncoder.BITMAP_BLOCK_SZ, ru - rl)];
+			_vcodes = new int[Math.min(CompressionSettings.BITMAP_BLOCK_SZ, ru - rl)];
 			Arrays.fill(_vcodes, -1); // initial reset
 			getNextSegment(rl);
 		}
@@ -852,7 +879,7 @@
 				// reset vcode to avoid scan on next segment
 				_vcodes[segIx] = -1;
 			}
-			if(segIx + 1 == BitmapEncoder.BITMAP_BLOCK_SZ && !last)
+			if(segIx + 1 == CompressionSettings.BITMAP_BLOCK_SZ && !last)
 				getNextSegment(rowIx + 1);
 		}
 
@@ -860,7 +887,7 @@
 			// materialize value codes for entire segment in a
 			// single pass over all values (store value code by pos)
 			final int numVals = getNumValues();
-			final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+			final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
 			for(int k = 0; k < numVals; k++) {
 				int boff = _ptr[k];
 				int blen = len(k);
@@ -885,4 +912,101 @@
 			}
 		}
 	}
+
+	/**
+	 * Encodes the bitmap as a series of run lengths and offsets.
+	 * 
+	 * Note that this method should not be called if the len is 0.
+	 * 
+	 * @param offsets uncompressed offset list
+	 * @param len     logical length of the given offset list
+	 * @return compressed version of said bitmap
+	 */
+	public static char[] genRLEBitmap(int[] offsets, int len) {
+
+		// Use an ArrayList for correctness at the expense of temp space
+		List<Character> buf = new ArrayList<>();
+
+		// 1 + (position of last 1 in the previous run of 1's)
+		// We add 1 because runs may be of length zero.
+		int lastRunEnd = 0;
+
+		// Offset between the end of the previous run of 1's and the first 1 in
+		// the current run. Initialized below.
+		int curRunOff;
+
+		// Length of the most recent run of 1's
+		int curRunLen = 0;
+
+		// Current encoding is as follows:
+		// Negative entry: abs(Entry) encodes the offset to the next lone 1 bit.
+		// Positive entry: Entry encodes offset to next run of 1's. The next
+		// entry in the bitmap holds a run length.
+
+		// Special-case the first run to simplify the loop below.
+		int firstOff = offsets[0];
+
+		// The first run may start more than a short's worth of bits in
+		while(firstOff > Character.MAX_VALUE) {
+			buf.add(Character.MAX_VALUE);
+			buf.add((char) 0);
+			firstOff -= Character.MAX_VALUE;
+			lastRunEnd += Character.MAX_VALUE;
+		}
+
+		// Create the first run with an initial size of 1
+		curRunOff = firstOff;
+		curRunLen = 1;
+
+		// Process the remaining offsets
+		for(int i = 1; i < len; i++) {
+
+			int absOffset = offsets[i];
+
+			// 1 + (last position in run)
+			int curRunEnd = lastRunEnd + curRunOff + curRunLen;
+
+			if(absOffset > curRunEnd || curRunLen >= Character.MAX_VALUE) {
+				// End of a run, either because we hit a run of 0's or because the
+				// number of 1's won't fit in 16 bits. Add run to bitmap and start a new one.
+				buf.add((char) curRunOff);
+				buf.add((char) curRunLen);
+
+				lastRunEnd = curRunEnd;
+				curRunOff = absOffset - lastRunEnd;
+
+				while(curRunOff > Character.MAX_VALUE) {
+					// SPECIAL CASE: Offset to next run doesn't fit into 16 bits.
+					// Add zero-length runs until the offset is small enough.
+					buf.add(Character.MAX_VALUE);
+					buf.add((char) 0);
+					lastRunEnd += Character.MAX_VALUE;
+					curRunOff -= Character.MAX_VALUE;
+				}
+
+				curRunLen = 1;
+			}
+			else {
+				// Middle of a run
+				curRunLen++;
+			}
+		}
+
+		// Edge case, if the last run overlaps the character length bound.
+		if(curRunOff + curRunLen > Character.MAX_VALUE) {
+			buf.add(Character.MAX_VALUE);
+			buf.add((char) 0);
+			curRunOff -= Character.MAX_VALUE;
+		}
+
+		// Add the final Run.
+		buf.add((char) curRunOff);
+		buf.add((char) curRunLen);
+
+		// Convert wasteful ArrayList to packed array.
+		char[] ret = new char[buf.size()];
+		for(int i = 0; i < buf.size(); i++)
+			ret[i] = buf.get(i);
+		return ret;
+	}
 }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSizes.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSizes.java
index 5b39ed2..5b9c9cc 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSizes.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSizes.java
@@ -19,41 +19,16 @@
 
 package org.apache.sysds.runtime.compress.colgroup;
 
-import org.apache.commons.lang.NotImplementedException;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.sysds.runtime.compress.BitmapEncoder;
+import org.apache.sysds.runtime.DMLCompressionException;
+import org.apache.sysds.runtime.compress.CompressionSettings;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.utils.MemoryEstimates;
 
 public class ColGroupSizes {
 	protected static final Log LOG = LogFactory.getLog(ColGroupSizes.class.getName());
 
-	public static long getEmptyMemoryFootprint(Class<?> colGroupClass) {
-		switch(colGroupClass.getSimpleName()) {
-			case "ColGroup":
-				return estimateInMemorySizeGroup(0);
-			case "ColGroupValue":
-				return estimateInMemorySizeGroupValue(0, 0);
-			case "ColGroupOffset":
-				return estimateInMemorySizeOffset(0, 0, 0, 0);
-			case "ColGroupDDC":
-				return estimateInMemorySizeDDC(0, 0);
-			case "ColGroupDDC1":
-				return estimateInMemorySizeDDC1(0, 0, 0);
-			case "ColGroupDDC2":
-				return estimateInMemorySizeDDC2(0, 0, 0);
-			case "ColGroupOLE":
-				return estimateInMemorySizeOLE(0, 0, 0, 0);
-			case "ColGroupRLE":
-				return estimateInMemorySizeRLE(0, 0, 0, 0);
-			case "ColGroupUncompressed":
-				return estimateInMemorySizeUncompressed(0, 0, 0.0);
-			default:
-				throw new NotImplementedException("Case not implemented");
-		}
-	}
-
 	public static long estimateInMemorySizeGroup(int nrColumns) {
 		long size = 0;
 		size += 16; // Object header
@@ -64,60 +39,66 @@
 		return size;
 	}
 
-	public static long estimateInMemorySizeGroupValue(int nrColumns, long nrValues) {
+	public static long estimateInMemorySizeGroupValue(int nrColumns, int nrValues, boolean lossy) {
 		long size = estimateInMemorySizeGroup(nrColumns);
-		size += 24 //dictionary object
-			+ MemoryEstimates.doubleArrayCost(nrValues);
+		size += 8; // Dictionary Reference.
+		if(lossy){
+			size += QDictionary.getInMemorySize(nrValues);
+		}else{
+			size += Dictionary.getInMemorySize(nrValues);
+		}
 		return size;
 	}
 
-	public static long estimateInMemorySizeDDC(int nrCols, int uniqueVals) {
-		long size = estimateInMemorySizeGroupValue(nrCols, uniqueVals);
+	public static long estimateInMemorySizeDDC(int nrCols, int uniqueVals, boolean lossy) {
+		long size = estimateInMemorySizeGroupValue(nrCols, uniqueVals, lossy);
 		return size;
 	}
 
-	public static long estimateInMemorySizeDDC1(int nrCols, int uniqueVals, int dataLength) {
+	public static long estimateInMemorySizeDDC1(int nrCols, int uniqueVals, int dataLength, boolean lossy) {
 		if(uniqueVals > 255)
 			return Long.MAX_VALUE;
 		// LOG.debug("DD1C: " + nrCols + " nr unique: " + uniqueVals + " DataLength: " + dataLength);
-		long size = estimateInMemorySizeDDC(nrCols, uniqueVals);
+		long size = estimateInMemorySizeDDC(nrCols, uniqueVals, lossy);
 		size += MemoryEstimates.byteArrayCost(dataLength);
 		return size;
 	}
 
-	public static long estimateInMemorySizeDDC2(int nrCols, int uniqueVals, int dataLength) {
+	public static long estimateInMemorySizeDDC2(int nrCols, int uniqueVals, int dataLength, boolean lossy) {
 		if(uniqueVals > Character.MAX_VALUE)
 			return Long.MAX_VALUE;
 		// LOG.debug("DD2C: " + nrCols + "nr unique: " + uniqueVals +" datalen: "+ dataLength);
-		long size = estimateInMemorySizeDDC(nrCols, uniqueVals);
+		long size = estimateInMemorySizeDDC(nrCols, uniqueVals, lossy);
 		size += MemoryEstimates.charArrayCost(dataLength);
 		return size;
 	}
 
-	public static long estimateInMemorySizeOffset(int nrColumns, long nrValues, int pointers, int offsetLength) {
+	public static long estimateInMemorySizeOffset(int nrColumns, int nrValues, int pointers, int offsetLength, boolean lossy) {
 		// LOG.debug("OFFSET list: nrC " + nrColumns +"\tnrV " + nrValues + "\tpl "+pointers +"\tdl "+ offsetLength);
-		long size = estimateInMemorySizeGroupValue(nrColumns, nrValues);
+		long size = estimateInMemorySizeGroupValue(nrColumns, nrValues, lossy);
 		size += MemoryEstimates.intArrayCost(pointers);
 		size += MemoryEstimates.charArrayCost(offsetLength);
 		return size;
 	}
 
-	public static long estimateInMemorySizeOLE(int nrColumns, int nrValues, int offsetLength, int nrRows) {
+	public static long estimateInMemorySizeOLE(int nrColumns, int nrValues, int offsetLength, int nrRows, boolean lossy) {
 		nrColumns = nrColumns > 0 ? nrColumns : 1;
-		offsetLength += (nrRows / BitmapEncoder.BITMAP_BLOCK_SZ) * 2;
+		offsetLength += (nrRows / CompressionSettings.BITMAP_BLOCK_SZ) * 2;
 		long size = 0;
 		// LOG.debug("OLE cols: " + nrColumns + " vals: " + nrValues + " pointers: " + (nrValues / nrColumns + 1)
 		// + " offsetLength: " + (offsetLength) + " runs: " + nrValues / nrColumns);
-		size = estimateInMemorySizeOffset(nrColumns, nrValues, (nrValues / nrColumns) + 1, offsetLength);
-		size += MemoryEstimates.intArrayCost((int) nrValues / nrColumns);
+		size = estimateInMemorySizeOffset(nrColumns, nrValues, (nrValues / nrColumns) + 1, offsetLength, lossy);
+		if (nrRows > CompressionSettings.BITMAP_BLOCK_SZ * 2){
+			size += MemoryEstimates.intArrayCost((int) nrValues / nrColumns);
+		}
 		return size;
 	}
 
-	public static long estimateInMemorySizeRLE(int nrColumns, int nrValues, int nrRuns, int nrRows) {
+	public static long estimateInMemorySizeRLE(int nrColumns, int nrValues, int nrRuns, int nrRows, boolean lossy) {
 		nrColumns = nrColumns > 0 ? nrColumns : 1;
 		int offsetLength = (nrRuns) * 2;
 		// LOG.debug("\n\tRLE cols: " + nrColumns + " vals: " + nrValues + " offsetLength: " + offsetLength);
-		long size = estimateInMemorySizeOffset(nrColumns, nrValues, (nrValues / nrColumns) + 1, offsetLength);
+		long size = estimateInMemorySizeOffset(nrColumns, nrValues, (nrValues / nrColumns) + 1, offsetLength, lossy);
 
 		return size;
 	}
@@ -133,6 +114,9 @@
 
 	public static long estimateInMemorySizeQuan(int nrRows, int nrColumns){
 		long size = estimateInMemorySizeGroup(nrColumns);
+		if(nrRows < 0 || nrColumns < 0){
+			throw new DMLCompressionException("Invalid number of rows and columns");
+		}
 		size += 8; // scale value
 		size += MemoryEstimates.byteArrayCost(nrRows*nrColumns);
 		return size;
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
index 00e1563..fb9ca41 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
@@ -41,21 +41,21 @@
 import org.apache.sysds.runtime.util.SortUtils;
 
 /**
- * Column group type for columns that are stored as dense arrays of doubles.
- * Uses a MatrixBlock internally to store the column contents.
+ * Column group type for columns that are stored as dense arrays of doubles. Uses a MatrixBlock internally to store the
+ * column contents.
  * 
  */
 public class ColGroupUncompressed extends ColGroup {
 	private static final long serialVersionUID = 4870546053280378891L;
 
 	/**
-	 * We store the contents of the columns as a MatrixBlock to take advantage of
-	 * high-performance routines available for this data structure.
+	 * We store the contents of the columns as a MatrixBlock to take advantage of high-performance routines available
+	 * for this data structure.
 	 */
 	private MatrixBlock _data;
 
 	public ColGroupUncompressed() {
-		super(new int[] {}, -1);
+		super();
 	}
 
 	public long getValuesSize() {
@@ -65,12 +65,11 @@
 	/**
 	 * Main constructor for Uncompressed ColGroup.
 	 * 
-	 * @param colIndicesList Indices (relative to the current block) of the columns
-	 *                       that this column group represents.
-	 * @param rawBlock       The uncompressed block; uncompressed data must be
-	 *                       present at the time that the constructor is called
-	 * @param compSettings   The Settings for how to compress this block, Here using
-	 *                       information about the raw block if it is transposed.
+	 * @param colIndicesList Indices (relative to the current block) of the columns that this column group represents.
+	 * @param rawBlock       The uncompressed block; uncompressed data must be present at the time that the constructor
+	 *                       is called
+	 * @param compSettings   The Settings for how to compress this block, Here using information about the raw block if
+	 *                       it is transposed.
 	 */
 	public ColGroupUncompressed(int[] colIndicesList, MatrixBlock rawBlock, CompressionSettings compSettings) {
 		super(colIndicesList, compSettings.transposeInput ? rawBlock.getNumColumns() : rawBlock.getNumRows());
@@ -82,14 +81,14 @@
 		_data = new MatrixBlock(numRows, _colIndexes.length, rawBlock.isInSparseFormat());
 
 		// ensure sorted col indices
-		if (!SortUtils.isSorted(0, _colIndexes.length, _colIndexes))
+		if(!SortUtils.isSorted(0, _colIndexes.length, _colIndexes))
 			Arrays.sort(_colIndexes);
 
 		// special cases empty blocks
-		if (rawBlock.isEmptyBlock(false))
+		if(rawBlock.isEmptyBlock(false))
 			return;
 		// special cases full block
-		if (!compSettings.transposeInput && _data.getNumColumns() == rawBlock.getNumColumns()) {
+		if(!compSettings.transposeInput && _data.getNumColumns() == rawBlock.getNumColumns()) {
 			_data.copy(rawBlock);
 			return;
 		}
@@ -97,27 +96,25 @@
 		// dense implementation for dense and sparse matrices to avoid linear search
 		int m = numRows;
 		int n = _colIndexes.length;
-		for (int i = 0; i < m; i++) {
-			for (int j = 0; j < n; j++) {
-				double val = compSettings.transposeInput ? rawBlock.quickGetValue(_colIndexes[j], i)
-						: rawBlock.quickGetValue(i, _colIndexes[j]);
+		for(int i = 0; i < m; i++) {
+			for(int j = 0; j < n; j++) {
+				double val = compSettings.transposeInput ? rawBlock.quickGetValue(_colIndexes[j], i) : rawBlock
+					.quickGetValue(i, _colIndexes[j]);
 				_data.appendValue(i, j, val);
 			}
 		}
 		_data.examSparsity();
 
 		// convert sparse MCSR to read-optimized CSR representation
-		if (_data.isInSparseFormat()) {
+		if(_data.isInSparseFormat()) {
 			_data = new MatrixBlock(_data, Type.CSR, false);
 		}
 	}
 
 	/**
-	 * Constructor for creating temporary decompressed versions of one or more
-	 * compressed column groups.
+	 * Constructor for creating temporary decompressed versions of one or more compressed column groups.
 	 * 
-	 * @param groupsToDecompress compressed columns to subsume. Must contain at
-	 *                           least one element.
+	 * @param groupsToDecompress compressed columns to subsume. Must contain at least one element.
 	 */
 	public ColGroupUncompressed(List<ColGroup> groupsToDecompress) {
 		super(mergeColIndices(groupsToDecompress), groupsToDecompress.get(0)._numRows);
@@ -125,21 +122,20 @@
 		// Invert the list of column indices
 		int maxColIndex = _colIndexes[_colIndexes.length - 1];
 		int[] colIndicesInverted = new int[maxColIndex + 1];
-		for (int i = 0; i < _colIndexes.length; i++) {
+		for(int i = 0; i < _colIndexes.length; i++) {
 			colIndicesInverted[_colIndexes[i]] = i;
 		}
 
 		// Create the buffer that holds the uncompressed data, packed together
 		_data = new MatrixBlock(_numRows, _colIndexes.length, false);
 
-		for (ColGroup colGroup : groupsToDecompress) {
+		for(ColGroup colGroup : groupsToDecompress) {
 			colGroup.decompressToBlock(_data, colIndicesInverted);
 		}
 	}
 
 	/**
-	 * Constructor for internal use. Used when a method needs to build an instance
-	 * of this class from scratch.
+	 * Constructor for internal use. Used when a method needs to build an instance of this class from scratch.
 	 * 
 	 * @param colIndices column mapping for this column group
 	 * @param numRows    number of rows in the column, for passing to the superclass
@@ -172,21 +168,20 @@
 	/**
 	 * Subroutine of constructor.
 	 * 
-	 * @param groupsToDecompress input to the constructor that decompresses into a
-	 *                           temporary UncompressedColGroup
+	 * @param groupsToDecompress input to the constructor that decompresses into a temporary UncompressedColGroup
 	 * @return a merged set of column indices across all those groups
 	 */
 	private static int[] mergeColIndices(List<ColGroup> groupsToDecompress) {
 		// Pass 1: Determine number of columns
 		int sz = 0;
-		for (ColGroup colGroup : groupsToDecompress) {
+		for(ColGroup colGroup : groupsToDecompress) {
 			sz += colGroup.getNumCols();
 		}
 
 		// Pass 2: Copy column offsets out
 		int[] ret = new int[sz];
 		int pos = 0;
-		for (ColGroup colGroup : groupsToDecompress) {
+		for(ColGroup colGroup : groupsToDecompress) {
 			int[] tmp = colGroup.getColIndices();
 			System.arraycopy(tmp, 0, ret, pos, tmp.length);
 			pos += tmp.length;
@@ -205,10 +200,10 @@
 	@Override
 	public void decompressToBlock(MatrixBlock target, int rl, int ru) {
 		// empty block, nothing to add to output
-		if (_data.isEmptyBlock(false))
+		if(_data.isEmptyBlock(false))
 			return;
-		for (int row = rl; row < ru; row++) {
-			for (int colIx = 0; colIx < _colIndexes.length; colIx++) {
+		for(int row = rl; row < ru; row++) {
+			for(int colIx = 0; colIx < _colIndexes.length; colIx++) {
 				int col = _colIndexes[colIx];
 				double cellVal = _data.quickGetValue(row, colIx);
 				target.quickSetValue(row, col, cellVal);
@@ -219,12 +214,12 @@
 	@Override
 	public void decompressToBlock(MatrixBlock target, int[] colIndexTargets) {
 		// empty block, nothing to add to output
-		if (_data.isEmptyBlock(false)) {
+		if(_data.isEmptyBlock(false)) {
 			return;
 		}
 		// Run through the rows, putting values into the appropriate locations
-		for (int row = 0; row < _data.getNumRows(); row++) {
-			for (int colIx = 0; colIx < _data.getNumColumns(); colIx++) {
+		for(int row = 0; row < _data.getNumRows(); row++) {
+			for(int colIx = 0; colIx < _data.getNumColumns(); colIx++) {
 				int origMatrixColIx = getColIndex(colIx);
 				int col = colIndexTargets[origMatrixColIx];
 				double cellVal = _data.quickGetValue(row, colIx);
@@ -236,11 +231,11 @@
 	@Override
 	public void decompressToBlock(MatrixBlock target, int colpos) {
 		// empty block, nothing to add to output
-		if (_data.isEmptyBlock(false)) {
+		if(_data.isEmptyBlock(false)) {
 			return;
 		}
 		// Run through the rows, putting values into the appropriate locations
-		for (int row = 0; row < _data.getNumRows(); row++) {
+		for(int row = 0; row < _data.getNumRows(); row++) {
 			double cellVal = _data.quickGetValue(row, colpos);
 			// Apparently rows are cols here.
 			target.quickSetValue(0, row, cellVal);
@@ -251,7 +246,7 @@
 	public double get(int r, int c) {
 		// find local column index
 		int ix = Arrays.binarySearch(_colIndexes, c);
-		if (ix < 0)
+		if(ix < 0)
 			throw new RuntimeException("Column index " + c + " not in uncompressed group.");
 
 		// uncompressed get value
@@ -266,7 +261,7 @@
 		MatrixBlock shortVector = new MatrixBlock(clen, 1, false);
 		shortVector.allocateDenseBlock();
 		double[] b = shortVector.getDenseBlockValues();
-		for (int colIx = 0; colIx < clen; colIx++)
+		for(int colIx = 0; colIx < clen; colIx++)
 			b[colIx] = vector.quickGetValue(_colIndexes[colIx], 0);
 		shortVector.recomputeNonZeros();
 
@@ -281,7 +276,7 @@
 		MatrixBlock shortVector = new MatrixBlock(clen, 1, false);
 		shortVector.allocateDenseBlock();
 		double[] b = shortVector.getDenseBlockValues();
-		for (int colIx = 0; colIx < clen; colIx++)
+		for(int colIx = 0; colIx < clen; colIx++)
 			b[colIx] = vector.quickGetValue(_colIndexes[colIx], 0);
 		shortVector.recomputeNonZeros();
 
@@ -295,27 +290,27 @@
 		LibMatrixMult.matrixMult(vector, _data, pret);
 
 		// copying partialResult to the proper indices of the result
-		if (!pret.isEmptyBlock(false)) {
+		if(!pret.isEmptyBlock(false)) {
 			double[] rsltArr = result.getDenseBlockValues();
-			for (int colIx = 0; colIx < _colIndexes.length; colIx++)
+			for(int colIx = 0; colIx < _colIndexes.length; colIx++)
 				rsltArr[_colIndexes[colIx]] = pret.quickGetValue(0, colIx);
 			result.recomputeNonZeros();
 		}
 	}
 
-	@Override
-	public void leftMultByRowVector(ColGroupDDC vector, MatrixBlock result) {
-		throw new NotImplementedException();
-	}
+	// @Override
+	// public void leftMultByRowVector(ColGroupDDC vector, MatrixBlock result) {
+	// 	throw new NotImplementedException();
+	// }
 
 	public void leftMultByRowVector(MatrixBlock vector, MatrixBlock result, int k) {
 		MatrixBlock pret = new MatrixBlock(1, _colIndexes.length, false);
 		LibMatrixMult.matrixMult(vector, _data, pret, k);
 
 		// copying partialResult to the proper indices of the result
-		if (!pret.isEmptyBlock(false)) {
+		if(!pret.isEmptyBlock(false)) {
 			double[] rsltArr = result.getDenseBlockValues();
-			for (int colIx = 0; colIx < _colIndexes.length; colIx++)
+			for(int colIx = 0; colIx < _colIndexes.length; colIx++)
 				rsltArr[_colIndexes[colIx]] = pret.quickGetValue(0, colIx);
 			result.recomputeNonZeros();
 		}
@@ -335,14 +330,14 @@
 		LibMatrixAgg.aggregateUnaryMatrix(_data, ret, op);
 
 		// shift result into correct column indexes
-		if (op.indexFn instanceof ReduceRow) {
+		if(op.indexFn instanceof ReduceRow) {
 			// shift partial results, incl corrections
-			for (int i = _colIndexes.length - 1; i >= 0; i--) {
+			for(int i = _colIndexes.length - 1; i >= 0; i--) {
 				double val = ret.quickGetValue(0, i);
 				ret.quickSetValue(0, i, 0);
 				ret.quickSetValue(0, _colIndexes[i], val);
-				if (op.aggOp.existsCorrection())
-					for (int j = 1; j < ret.getNumRows(); j++) {
+				if(op.aggOp.existsCorrection())
+					for(int j = 1; j < ret.getNumRows(); j++) {
 						double corr = ret.quickGetValue(j, i);
 						ret.quickSetValue(j, i, 0);
 						ret.quickSetValue(j, _colIndexes[i], corr);
@@ -366,7 +361,7 @@
 		// read col indices
 		int numCols = _data.getNumColumns();
 		_colIndexes = new int[numCols];
-		for (int i = 0; i < numCols; i++)
+		for(int i = 0; i < numCols; i++)
 			_colIndexes[i] = in.readInt();
 	}
 
@@ -377,7 +372,7 @@
 
 		// write col indices
 		int len = _data.getNumColumns();
-		for (int i = 0; i < len; i++)
+		for(int i = 0; i < len; i++)
 			out.writeInt(_colIndexes[i]);
 	}
 
@@ -388,7 +383,7 @@
 
 	@Override
 	public void countNonZerosPerRow(int[] rnnz, int rl, int ru) {
-		for (int i = rl; i < ru; i++)
+		for(int i = rl; i < ru; i++)
 			rnnz[i - rl] += _data.recomputeNonZeros(i, i, 0, _data.getNumColumns() - 1);
 	}
 
@@ -424,7 +419,7 @@
 
 		@Override
 		public boolean hasNext() {
-			return (_rpos < _ru);
+			return(_rpos < _ru);
 		}
 
 		@Override
@@ -439,10 +434,11 @@
 				boolean nextRow = (_cpos + 1 >= getNumCols());
 				_rpos += nextRow ? 1 : 0;
 				_cpos = nextRow ? 0 : _cpos + 1;
-				if (_rpos >= _ru)
+				if(_rpos >= _ru)
 					return; // reached end
 				_value = _data.quickGetValue(_rpos, _cpos);
-			} while (!_inclZeros && _value == 0);
+			}
+			while(!_inclZeros && _value == 0);
 		}
 	}
 
@@ -454,21 +450,22 @@
 		@Override
 		public void next(double[] buff, int rowIx, int segIx, boolean last) {
 			// copy entire dense/sparse row
-			if (_data.isAllocated()) {
-				if (_data.isInSparseFormat()) {
-					if (!_data.getSparseBlock().isEmpty(rowIx)) {
+			if(_data.isAllocated()) {
+				if(_data.isInSparseFormat()) {
+					if(!_data.getSparseBlock().isEmpty(rowIx)) {
 						SparseBlock sblock = _data.getSparseBlock();
 						int apos = sblock.pos(rowIx);
 						int alen = sblock.size(rowIx);
 						int[] aix = sblock.indexes(rowIx);
 						double[] avals = sblock.values(rowIx);
-						for (int k = apos; k < apos + alen; k++)
+						for(int k = apos; k < apos + alen; k++)
 							buff[_colIndexes[aix[k]]] = avals[k];
 					}
-				} else {
+				}
+				else {
 					final int clen = getNumCols();
 					double[] a = _data.getDenseBlockValues();
-					for (int j = 0, aix = rowIx * clen; j < clen; j++)
+					for(int j = 0, aix = rowIx * clen; j < clen; j++)
 						buff[_colIndexes[j]] = a[aix + j];
 				}
 			}
@@ -497,13 +494,22 @@
 	@Override
 	public int[] getCounts() {
 		throw new DMLCompressionException(
-				"Invalid function call, the counts in Uncompressed Col Group is always 1 for each value");
+			"Invalid function call, the counts in Uncompressed Col Group is always 1 for each value");
 	}
 
 	@Override
-	public int[] getCounts(boolean includeZero) {
-		throw new DMLCompressionException(
-				"Invalid function call, the counts in Uncompressed Col Group is always 1 for each value");
+	public double[] getValues() {
+		if(_data.isInSparseFormat()) {
+			return _data.getSparseBlock().values(0);
+		}
+		else {
+			return _data.getDenseBlock().values(0);
+		}
+	}
+
+	@Override
+	public boolean isLossy() {
+		return false;
 	}
 
 }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java
index 7edda8f..06e205f 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java
@@ -19,12 +19,16 @@
 
 package org.apache.sysds.runtime.compress.colgroup;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
 import java.util.Arrays;
 
 import org.apache.sysds.runtime.DMLScriptException;
-import org.apache.sysds.runtime.compress.BitmapEncoder;
 import org.apache.sysds.runtime.compress.CompressionSettings;
-import org.apache.sysds.runtime.compress.UncompressedBitmap;
+import org.apache.sysds.runtime.compress.utils.AbstractBitmap;
+import org.apache.sysds.runtime.compress.utils.Bitmap;
+import org.apache.sysds.runtime.compress.utils.BitmapLossy;
 import org.apache.sysds.runtime.functionobjects.Builtin;
 import org.apache.sysds.runtime.functionobjects.Builtin.BuiltinCode;
 import org.apache.sysds.runtime.functionobjects.KahanFunction;
@@ -33,7 +37,6 @@
 import org.apache.sysds.runtime.functionobjects.ReduceAll;
 import org.apache.sysds.runtime.functionobjects.ReduceCol;
 import org.apache.sysds.runtime.functionobjects.ReduceRow;
-import org.apache.sysds.runtime.instructions.cp.KahanObject;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.matrix.data.Pair;
 import org.apache.sysds.runtime.matrix.operators.AggregateUnaryOperator;
@@ -56,7 +59,7 @@
 	};
 
 	/** Distinct values associated with individual bitmaps. */
-	protected Dictionary _dict;
+	protected IDictionary _dict;
 
 	public ColGroupValue() {
 		super();
@@ -68,17 +71,28 @@
 	 * @param colIndices indices (within the block) of the columns included in this column
 	 * @param numRows    total number of rows in the parent block
 	 * @param ubm        Uncompressed bitmap representation of the block
+	 * @param cs         The Compression settings used for compression
 	 */
-	public ColGroupValue(int[] colIndices, int numRows, UncompressedBitmap ubm) {
+	public ColGroupValue(int[] colIndices, int numRows, AbstractBitmap ubm, CompressionSettings cs) {
 		super(colIndices, numRows);
+		_lossy = false;
+		_zeros = ubm.containsZero();
 
 		// sort values by frequency, if requested
-		if(CompressionSettings.SORT_VALUES_BY_LENGTH && numRows > BitmapEncoder.BITMAP_BLOCK_SZ) {
+		if(cs.sortValuesByLength && numRows > CompressionSettings.BITMAP_BLOCK_SZ) {
 			ubm.sortValuesByFrequency();
 		}
-
+		switch(ubm.getType()) {
+			case Full:
+				_dict = new Dictionary(((Bitmap) ubm).getValues());
+				break;
+			case Lossy:
+				_dict = new QDictionary((BitmapLossy) ubm);
+				_lossy = true;
+				break;
+		}
 		// extract and store distinct values (bitmaps handled by subclasses)
-		_dict = new Dictionary(ubm.getValues());
+		// _dict = new Dictionary(ubm.getValues());
 	}
 
 	/**
@@ -93,14 +107,9 @@
 		_dict = new Dictionary(values);
 	}
 
-	@Override
-	public long estimateInMemorySize() {
-		return ColGroupSizes.estimateInMemorySizeGroupValue(_colIndexes.length, getNumValues());
-	}
-
 	public long getDictionarySize() {
-		//NOTE: this estimate needs to be consistent with the estimate above,
-		//so for now we use the (incorrect) double array size, not the dictionary size
+		// NOTE: this estimate needs to be consistent with the estimate above,
+		// so for now we use the (incorrect) double array size, not the dictionary size
 		return (_dict != null) ? MemoryEstimates.doubleArrayCost(_dict.getValues().length) : 0;
 	}
 
@@ -110,7 +119,7 @@
 	 * @return the number of distinct sets of values associated with the bitmaps in this column group
 	 */
 	public int getNumValues() {
-		return _dict.getValues().length / _colIndexes.length;
+		return _dict.getNumberOfValues(_colIndexes.length);
 	}
 
 	public double[] getValues() {
@@ -124,17 +133,16 @@
 	public double getValue(int k, int col) {
 		return _dict.getValues()[k * getNumCols() + col];
 	}
-	
+
 	public void setDictionary(Dictionary dict) {
 		_dict = dict;
 	}
 
 	@Override
 	public MatrixBlock getValuesAsBlock() {
-		boolean containsZeros = (this instanceof ColGroupOffset) ? ((ColGroupOffset) this)._zeros : false;
 		final double[] values = getValues();
 		int vlen = values.length;
-		int rlen = containsZeros ? vlen + 1 : vlen;
+		int rlen = _zeros ? vlen + 1 : vlen;
 		MatrixBlock ret = new MatrixBlock(rlen, 1, false);
 		for(int i = 0; i < vlen; i++)
 			ret.quickSetValue(i, 0, values[i]);
@@ -143,7 +151,13 @@
 
 	public final int[] getCounts() {
 		int[] tmp = new int[getNumValues()];
-		return getCounts(tmp);
+		tmp = getCounts(tmp);
+		if(_zeros && this instanceof ColGroupOffset) {
+			tmp = Arrays.copyOf(tmp, tmp.length + 1);
+			int sum = Arrays.stream(tmp).sum();
+			tmp[tmp.length - 1] = getNumRows() - sum;
+		}
+		return tmp;
 	}
 
 	public abstract int[] getCounts(int[] out);
@@ -153,24 +167,12 @@
 		return getCounts(rl, ru, tmp);
 	}
 
-	public boolean getIfCountsType(){
+	public boolean getIfCountsType() {
 		return true;
 	}
 
 	public abstract int[] getCounts(int rl, int ru, int[] out);
 
-	public int[] getCounts(boolean inclZeros) {
-		int[] counts = getCounts();
-		if(inclZeros && this instanceof ColGroupOffset) {
-			counts = Arrays.copyOf(counts, counts.length + 1);
-			int sum = 0;
-			for(int i = 0; i < counts.length; i++)
-				sum += counts[i];
-			counts[counts.length - 1] = getNumRows() - sum;
-		}
-		return counts;
-	}
-
 	public MatrixBlock getCountsAsBlock() {
 		return getCountsAsBlock(getCounts());
 	}
@@ -183,37 +185,64 @@
 	}
 
 	protected int containsAllZeroValue() {
-		return _dict.hasZeroTuple(getNumCols());
+		return _dict.hasZeroTuple(_colIndexes.length);
 	}
 
-	protected final double[] sumAllValues(KahanFunction kplus, KahanObject kbuff) {
-		return sumAllValues(kplus, kbuff, true);
-	}
+	// protected final double[] sumAllValues(KahanFunction kplus, KahanObject kbuff) {
+	// return sumAllValues(kplus, kbuff, true);
+	// }
 
-	public final double sumValues(int valIx, KahanFunction kplus, KahanObject kbuff) {
-		final int numCols = getNumCols();
-		final int valOff = valIx * numCols;
-		final double[] values = _dict.getValues();
-		kbuff.set(0, 0);
-		for(int i = 0; i < numCols; i++)
-			kplus.execute2(kbuff, values[valOff + i]);
-		return kbuff._sum;
-	}
+	// protected final double[] sumAllValues(KahanFunction kplus, KahanObject kbuff, boolean allocNew) {
+	// // quick path: sum
+	// if(getNumCols() > 1 && _dict instanceof QDictionary && kplus instanceof KahanPlus){
+	// return sumAllValuesQToDouble();
+	// }
+	// else if(getNumCols() == 1 && kplus instanceof KahanPlus)
+	// return _dict.getValues(); // shallow copy of values
 
-	protected final double[] sumAllValues(KahanFunction kplus, KahanObject kbuff, boolean allocNew) {
-		// quick path: sum
-		if(getNumCols() == 1 && kplus instanceof KahanPlus)
-			return _dict.getValues(); // shallow copy of values
+	// // pre-aggregate value tuple
+	// final int numVals = getNumValues();
+	// double[] ret = allocNew ? new double[numVals] : allocDVector(numVals, false);
+	// for(int k = 0; k < numVals; k++)
+	// ret[k] = sumValues(k, kplus, kbuff);
 
-		// pre-aggregate value tuple
-		final int numVals = getNumValues();
-		double[] ret = allocNew ? new double[numVals] : allocDVector(numVals, false);
-		for(int k = 0; k < numVals; k++)
-			ret[k] = sumValues(k, kplus, kbuff);
+	// return ret;
+	// }
 
-		return ret;
-	}
+	// /**
+	// * Method for summing all value tuples in the dictionary.
+	// *
+	// * This method assumes two things
+	// *
+	// * 1. That you dont call it if the number of columns in this ColGroup is 1. (then use
+	// ((QDictionary)_dict)._values)
+	// * 2. That it is not used for anything else than KahnPlus.
+	// * @return an short array of the sum of each row in the quantized array.
+	// */
+	// protected final short[] sumAllValuesQ(){
+	// final byte[] values = ((QDictionary)_dict)._values;
+	// short[] res = new short[getNumValues()];
 
+	// for(int i = 0, off = 0; off< values.length; i++, off += _colIndexes.length){
+	// for( int j = 0 ; j < _colIndexes.length; j++){
+	// res[i] += values[off + j];
+	// }
+	// }
+	// return res;
+	// }
+
+	// protected static final double[] sumAllValuesQToDouble(QDictionary dict, int nrCol){
+	// final byte[] values = dict._values;
+	// double[] res = new double[dict.getNumberOfValues()];
+
+	// for(int i = 0, off = 0; off< values.length; i++, off += _colIndexes.length){
+	// for( int j = 0 ; j < _colIndexes.length; j++){
+	// res[i] += values[off + j];
+	// }
+	// res[i] = res[i] * dict._scale;
+	// }
+	// return res;
+	// }
 
 	protected final double sumValues(int valIx, double[] b) {
 		final int numCols = getNumCols();
@@ -242,18 +271,17 @@
 	 * 
 	 * @param result  output matrix block
 	 * @param builtin function object
-	 * @param zeros   indicator if column group contains zero values
 	 */
-	protected void computeMxx(MatrixBlock result, Builtin builtin, boolean zeros) {
+	protected void computeMxx(MatrixBlock result, Builtin builtin) {
 		// init and 0-value handling
-		double val = (builtin.getBuiltinCode() == BuiltinCode.MAX) ?
-			Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
-		if(zeros)
+		double val = (builtin
+			.getBuiltinCode() == BuiltinCode.MAX) ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
+		if(_zeros)
 			val = builtin.execute(val, 0);
 
 		// iterate over all values only
 		val = _dict.aggregate(val, builtin);
-		
+
 		// compute new partial aggregate
 		val = builtin.execute(val, result.quickGetValue(0, 0));
 		result.quickSetValue(0, 0, val);
@@ -264,23 +292,22 @@
 	 * 
 	 * @param result  output matrix block
 	 * @param builtin function object
-	 * @param zeros   indicator if column group contains zero values
 	 */
-	protected void computeColMxx(MatrixBlock result, Builtin builtin, boolean zeros) {
+	protected void computeColMxx(MatrixBlock result, Builtin builtin) {
 		final int numCols = getNumCols();
 
 		// init and 0-value handling
 		double[] vals = new double[numCols];
-		Arrays.fill(vals, (builtin.getBuiltinCode() == BuiltinCode.MAX) ?
-			Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY);
-		if(zeros) {
-			for(int j = 0; j < numCols; j++)
-				vals[j] = builtin.execute(vals[j], 0);
+
+		// TODO fix edge cases in colMax. Since currently we rely on looking at rows in dict to specify if we start with
+		// zeros or not
+		if(!_zeros && _dict.getValuesLength() / numCols == getNumRows()) {
+			Arrays.fill(vals,
+				(builtin.getBuiltinCode() == BuiltinCode.MAX) ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY);
 		}
 
 		// iterate over all values only
 		vals = _dict.aggregateCols(vals, builtin, _colIndexes);
-		
 		// copy results to output
 		for(int j = 0; j < numCols; j++)
 			result.quickSetValue(0, _colIndexes[j], vals[j]);
@@ -297,12 +324,12 @@
 	}
 
 	protected double[] applyScalarOp(ScalarOperator op, double newVal, int numCols) {
-		double[] values = _dict.getValues(); //allocate new array just once
-		Dictionary tmp = new Dictionary(Arrays.copyOf(values, values.length+numCols));
+		double[] values = _dict.getValues(); // allocate new array just once
+		Dictionary tmp = new Dictionary(Arrays.copyOf(values, values.length + numCols));
 		double[] ret = tmp.apply(op).getValues();
 
 		// add new value to the end
-		Arrays.fill(ret, values.length, values.length+numCols, newVal);
+		Arrays.fill(ret, values.length, values.length + numCols, newVal);
 		return ret;
 	}
 
@@ -332,18 +359,18 @@
 			Builtin builtin = (Builtin) op.aggOp.increOp.fn;
 
 			if(op.indexFn instanceof ReduceAll)
-				computeMxx(result, builtin, _zeros);
+				computeMxx(result, builtin);
 			else if(op.indexFn instanceof ReduceCol)
 				computeRowMxx(result, builtin, rl, ru);
 			else if(op.indexFn instanceof ReduceRow)
-				computeColMxx(result, builtin, _zeros);
+				computeColMxx(result, builtin);
 		}
 		else {
 			throw new DMLScriptException("Unknown UnaryAggregate operator on CompressedMatrixBlock");
 		}
 	}
 
-	protected abstract void computeSum(MatrixBlock result, KahanFunction kplus );
+	protected abstract void computeSum(MatrixBlock result, KahanFunction kplus);
 
 	protected abstract void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru);
 
@@ -402,4 +429,56 @@
 		sb.append(Arrays.toString(_dict.getValues()));
 		return sb.toString();
 	}
+
+	@Override
+	public boolean isLossy() {
+		return _lossy;
+	}
+
+	@Override
+	public void readFields(DataInput in) throws IOException {
+		_numRows = in.readInt();
+		int numCols = in.readInt();
+		_zeros = in.readBoolean();
+		_lossy = in.readBoolean();
+
+		// read col indices
+		_colIndexes = new int[numCols];
+		for(int i = 0; i < numCols; i++)
+			_colIndexes[i] = in.readInt();
+
+		_dict = IDictionary.read(in, _lossy);
+
+	}
+
+	@Override
+	public void write(DataOutput out) throws IOException {
+		int numCols = getNumCols();
+		out.writeInt(_numRows);
+		out.writeInt(numCols);
+		out.writeBoolean(_zeros);
+		out.writeBoolean(_lossy);
+
+		// write col indices
+		for(int i = 0; i < _colIndexes.length; i++)
+			out.writeInt(_colIndexes[i]);
+
+		_dict.write(out);
+
+	}
+
+	@Override
+	public long getExactSizeOnDisk() {
+		long ret = 0; // header
+		ret += 4; // num rows int
+		ret += 4; // num cols int
+		ret += 1; // Zeros boolean
+		ret += 1; // lossy boolean
+		// col indices
+		ret += 4 * _colIndexes.length;
+		// distinct values (groups of values)
+		ret += _dict.getExactSizeOnDisk();
+		return ret;
+	}
+
 }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/DenseRowIterator.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/DenseRowIterator.java
index 5b593ea..3eb3bf6 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/DenseRowIterator.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/DenseRowIterator.java
@@ -22,7 +22,7 @@
 import java.util.Arrays;
 import java.util.List;
 
-import org.apache.sysds.runtime.compress.BitmapEncoder;
+import org.apache.sysds.runtime.compress.CompressionSettings;
 
 public class DenseRowIterator extends RowIterator<double[]> {
 
@@ -36,7 +36,7 @@
 	@Override
 	public double[] next() {
 		// prepare meta data common across column groups
-		final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+		final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
 		final int ix = _rpos % blksz;
 		final boolean last = (_rpos + 1 == _ru);
 		// copy group rows into consolidated row
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/Dictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/Dictionary.java
index 09506d1..c6a2e53 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/Dictionary.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/Dictionary.java
@@ -19,39 +19,57 @@
 
 package org.apache.sysds.runtime.compress.colgroup;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+
 import org.apache.sysds.runtime.functionobjects.Builtin;
+import org.apache.sysds.runtime.functionobjects.KahanFunction;
+import org.apache.sysds.runtime.functionobjects.KahanPlus;
+import org.apache.sysds.runtime.instructions.cp.KahanObject;
 import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
 import org.apache.sysds.utils.MemoryEstimates;
 
 /**
- * This dictionary class aims to encapsulate the storage and operations over
- * unique floating point values of a column group. The primary reason for its
- * introduction was to provide an entry point for specialization such as shared
+ * This dictionary class aims to encapsulate the storage and operations over unique floating point values of a column
+ * group. The primary reason for its introduction was to provide an entry point for specialization such as shared
  * dictionaries, which require additional information.
  */
-public class Dictionary {
-	// linearized <numcol vals> <numcol vals>
+public class Dictionary extends IDictionary {
+
+	// Linearized row major.
+	// v11 v12
+	// v21 v22
+	// ||
+	// \/
+	// v11 v12 v21 v22
 	protected final double[] _values;
-	
+
 	public Dictionary(double[] values) {
 		_values = values;
 	}
-	
+
 	public double[] getValues() {
 		return _values;
 	}
-	
+
 	public double getValue(int i) {
 		return _values[i];
 	}
-	
+
 	public long getInMemorySize() {
-		//object + values array
-		return 16 + MemoryEstimates.doubleArrayCost(_values.length);
+		// object + values array + double
+		return getInMemorySize(_values.length);
 	}
-	
+
+	public static long getInMemorySize(int valuesCount) {
+		// object + values array
+		return 16 + MemoryEstimates.doubleArrayCost(valuesCount);
+	}
+
 	public int hasZeroTuple(int ncol) {
-		int len = _values.length;
+		int len = _values.length / ncol;
 		for(int i = 0, off = 0; i < len; i++, off += ncol) {
 			boolean allZeros = true;
 			for(int j = 0; j < ncol; j++)
@@ -61,36 +79,85 @@
 		}
 		return -1;
 	}
-	
+
 	public double aggregate(double init, Builtin fn) {
-		//full aggregate can disregard tuple boundaries
+		// full aggregate can disregard tuple boundaries
 		int len = _values.length;
 		double ret = init;
 		for(int i = 0; i < len; i++)
 			ret = fn.execute(ret, _values[i]);
 		return ret;
 	}
-	
-	public double[] aggregateCols(double[] init, Builtin fn, int[] cols) {
-		int ncol = cols.length;
-		int vlen = _values.length / ncol;
-		double[] ret = init;
-		for(int k = 0; k < vlen; k++)
-			for(int j = 0, valOff = k * ncol; j < ncol; j++)
-				ret[j] = fn.execute(ret[j], _values[valOff + j]);
-		return ret;
-	}
-	
-	public Dictionary apply(ScalarOperator op) {
-		//in-place modification of the dictionary
+
+	public IDictionary apply(ScalarOperator op) {
+		// in-place modification of the dictionary
 		int len = _values.length;
 		for(int i = 0; i < len; i++)
 			_values[i] = op.executeScalar(_values[i]);
-		return this; //fluent API
+		return this; // fluent API
 	}
-	
+
 	@Override
-	public Dictionary clone() {
+	public IDictionary clone() {
 		return new Dictionary(_values.clone());
 	}
+
+	@Override
+	public int getValuesLength() {
+		return _values.length;
+	}
+
+	public static Dictionary read(DataInput in) throws IOException {
+		int numVals = in.readInt();
+		// read distinct values
+		double[] values = new double[numVals];
+		for(int i = 0; i < numVals; i++)
+			values[i] = in.readDouble();
+		return new Dictionary(values);
+	}
+
+	@Override
+	public void write(DataOutput out) throws IOException {
+		out.writeInt(_values.length);
+		for(int i = 0; i < _values.length; i++)
+			out.writeDouble(_values[i]);
+	}
+
+	@Override
+	public long getExactSizeOnDisk() {
+		return 4 + 8 * _values.length;
+	}
+
+	public static Dictionary materializeZeroValueFull(Dictionary OldDictionary, int numCols) {
+		return new Dictionary(Arrays.copyOf(OldDictionary._values, OldDictionary._values.length + numCols));
+	}
+
+	public int getNumberOfValues(int ncol) {
+		return _values.length / ncol;
+	}
+
+	@Override
+	protected double[] sumAllRowsToDouble(KahanFunction kplus, KahanObject kbuff, int nrColumns, boolean allocNew) {
+		if(nrColumns == 1 && kplus instanceof KahanPlus)
+			return getValues(); // shallow copy of values
+
+		// pre-aggregate value tuple
+		final int numVals = _values.length / nrColumns;
+		double[] ret = allocNew ? new double[numVals] : ColGroupValue.allocDVector(numVals, false);
+		for(int k = 0; k < numVals; k++) {
+			ret[k] = sumRow(k, kplus, kbuff, nrColumns);
+		}
+
+		return ret;
+	}
+
+	@Override
+	protected double sumRow(int k, KahanFunction kplus, KahanObject kbuff, int nrColumns) {
+		kbuff.set(0, 0);
+		int valOff = k * nrColumns;
+		for(int i = 0; i < nrColumns; i++)
+			kplus.execute2(kbuff, _values[valOff + i]);
+		return kbuff._sum;
+	}
+
 }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/IDictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/IDictionary.java
new file mode 100644
index 0000000..72e577b
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/IDictionary.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.compress.colgroup;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.sysds.runtime.functionobjects.Builtin;
+import org.apache.sysds.runtime.functionobjects.KahanFunction;
+import org.apache.sysds.runtime.instructions.cp.KahanObject;
+import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
+
+
+/**
+ * This dictionary class aims to encapsulate the storage and operations over unique floating point values of a column
+ * group. The primary reason for its introduction was to provide an entry point for specialization such as shared
+ * dictionaries, which require additional information.
+ */
+public abstract class IDictionary {
+
+	public abstract double[] getValues();
+
+	public abstract double getValue(int i);
+
+	public abstract int hasZeroTuple(int ncol);
+
+	public abstract long getInMemorySize();
+
+	public abstract double aggregate(double init, Builtin fn);
+
+	public abstract int getValuesLength();
+
+	public abstract IDictionary apply(ScalarOperator op);
+
+	public abstract IDictionary clone();
+
+	public double[] aggregateCols(double[] init, Builtin fn, int[] cols) {
+		int ncol = cols.length;
+		int vlen = getValuesLength() / ncol;
+		double[] ret = init;
+		for(int k = 0; k < vlen; k++)
+			for(int j = 0, valOff = k * ncol; j < ncol; j++)
+				ret[j] = fn.execute(ret[j], getValue(valOff + j));
+		return ret;
+	}
+
+	public static IDictionary read(DataInput in, boolean lossy) throws IOException {
+		return lossy ? QDictionary.read(in) : Dictionary.read(in);
+	}
+
+	public abstract void write(DataOutput out) throws IOException;
+
+	public abstract long getExactSizeOnDisk();
+
+	/**
+	 * Get the number of values given that the column group has n columns
+	 * @param ncol The number of Columns in the ColumnGroup.
+	 */
+	public abstract int getNumberOfValues(int ncol);
+
+	public static IDictionary materializeZeroValue(IDictionary OldDictionary, int numCols){
+		if(OldDictionary instanceof QDictionary){
+			return QDictionary.materializeZeroValueLossy((QDictionary)OldDictionary, numCols);
+		} else{
+			return Dictionary.materializeZeroValueFull((Dictionary)OldDictionary, numCols);
+		}
+	}
+
+	protected abstract double[] sumAllRowsToDouble(KahanFunction kplus, KahanObject kbuff, int nrColumns,  boolean allocNew);
+
+	protected abstract double sumRow(int k, KahanFunction kplus, KahanObject kbuff, int nrColumns);
+}
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/QDictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/QDictionary.java
new file mode 100644
index 0000000..34bc934
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/QDictionary.java
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.compress.colgroup;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.sysds.runtime.compress.utils.BitmapLossy;
+import org.apache.sysds.runtime.functionobjects.Builtin;
+import org.apache.sysds.runtime.functionobjects.KahanFunction;
+import org.apache.sysds.runtime.functionobjects.KahanPlus;
+import org.apache.sysds.runtime.functionobjects.Multiply;
+import org.apache.sysds.runtime.instructions.cp.KahanObject;
+import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
+import org.apache.sysds.utils.MemoryEstimates;
+
+/**
+ * This dictionary class aims to encapsulate the storage and operations over unique floating point values of a column
+ * group. The primary reason for its introduction was to provide an entry point for specialization such as shared
+ * dictionaries, which require additional information.
+ */
+public class QDictionary extends IDictionary {
+
+	protected static final Log LOG = LogFactory.getLog(QDictionary.class.getName());
+	protected final double _scale;
+	protected final byte[] _values;
+
+	public QDictionary(BitmapLossy bm) {
+		_values = bm.getValues();
+		_scale = bm.getScale();
+	}
+
+	public QDictionary(byte[] values, double scale) {
+		_values = values;
+		_scale = scale;
+	}
+
+	public double[] getValues() {
+		LOG.warn("Decompressing Quantized Representation");
+		double[] res = new double[_values.length];
+		for(int i = 0; i < _values.length; i++) {
+			res[i] = _values[i] * _scale;
+		}
+		return res;
+	}
+
+	public double getValue(int i) {
+		return _values[i] * _scale;
+	}
+
+	public byte getValueByte(int i) {
+		return _values[i];
+	}
+
+	public double getScale() {
+		return _scale;
+	}
+
+	public long getInMemorySize() {
+		// object + values array + double
+		return getInMemorySize(_values.length);
+	}
+
+	public static long getInMemorySize(int valuesCount) {
+		// object + values array + double
+		return 16 + MemoryEstimates.byteArrayCost(valuesCount) + 8;
+	}
+
+	public int hasZeroTuple(int ncol) {
+		int len = _values.length / ncol;
+		for(int i = 0, off = 0; i < len; i++, off += ncol) {
+			boolean allZeros = true;
+			for(int j = 0; j < ncol; j++)
+				allZeros &= (_values[off + j] == 0);
+			if(allZeros)
+				return i;
+		}
+		return -1;
+	}
+
+	public double aggregate(double init, Builtin fn) {
+		// full aggregate can disregard tuple boundaries
+		int len = _values.length;
+		double ret = init;
+		for(int i = 0; i < len; i++)
+			ret = fn.execute(ret, getValue(i));
+		return ret;
+	}
+
+	public QDictionary apply(ScalarOperator op) {
+
+		if(op.fn instanceof Multiply) {
+			return new QDictionary(_values, op.executeScalar(_scale));
+		}
+		double[] temp = new double[_values.length];
+		double max = op.executeScalar((double) _values[0] * _scale);
+		temp[0] = max;
+		for(int i = 1; i < _values.length; i++) {
+			temp[i] = op.executeScalar((double) _values[i] * _scale);
+			double absTemp = Math.abs(temp[i]);
+			if(absTemp > max) {
+				max = absTemp;
+			}
+		}
+		byte[] newValues = new byte[_values.length];
+		double newScale = max / (double) (Byte.MAX_VALUE);
+		for(int i = 0; i < _values.length; i++) {
+			newValues[i] = (byte) ((double) temp[i] / newScale);
+		}
+
+		return new QDictionary(newValues, newScale);
+	}
+
+	@Override
+	public int getValuesLength() {
+		return _values.length;
+	}
+
+	@Override
+	public IDictionary clone() {
+		return new QDictionary(_values.clone(), _scale);
+	}
+
+	public static QDictionary read(DataInput in) throws IOException {
+		double scale = in.readDouble();
+		int numVals = in.readInt();
+		// read distinct values
+		byte[] values = new byte[numVals];
+		for(int i = 0; i < numVals; i++)
+			values[i] = in.readByte();
+		return new QDictionary(values, scale);
+	}
+
+	@Override
+	public void write(DataOutput out) throws IOException {
+		out.writeDouble(_scale);
+		out.writeInt(_values.length);
+		for(int i = 0; i < _values.length; i++)
+			out.writeByte(_values[i]);
+	}
+
+	@Override
+	public long getExactSizeOnDisk() {
+		return 8 + 4 + _values.length + 10000;
+	}
+
+	public static QDictionary materializeZeroValueLossy(QDictionary OldDictionary, int numCols) {
+		return new QDictionary(Arrays.copyOf(OldDictionary._values, OldDictionary._values.length + numCols),
+			OldDictionary._scale);
+	}
+
+	public int getNumberOfValues(int nCol) {
+		return _values.length / nCol;
+	}
+
+	public short[] sumAllRowsToShort(int nCol) {
+		short[] res = new short[getNumberOfValues(nCol)];
+		for(int i = 0, off = 0; off < _values.length; i++, off += nCol) {
+			for(int j = 0; j < nCol; j++) {
+				res[i] += _values[off + j];
+			}
+		}
+		return res;
+	}
+
+	@Override
+	protected double[] sumAllRowsToDouble(KahanFunction kplus, KahanObject kbuff, int nrColumns, boolean allocNew) {
+		if(nrColumns == 1 && kplus instanceof KahanPlus)
+			return getValues(); // shallow copy of values
+
+		final int numVals = _values.length / nrColumns;
+		double[] ret = allocNew ? new double[numVals] : ColGroupValue.allocDVector(numVals, false);
+		for(int k = 0; k < numVals; k++) {
+			ret[k] = sumRow(k, kplus, kbuff, nrColumns);
+		}
+
+		return ret;
+	}
+
+	@Override
+	protected double sumRow(int k, KahanFunction kplus, KahanObject kbuff, int nrColumns) {
+		int valOff = k * nrColumns;
+		if(kplus instanceof KahanPlus){
+			short res = 0;
+			for (int i = 0; i < nrColumns; i++){
+				res += _values[valOff + i];
+			}
+			return res * _scale;
+		} else{
+			kbuff.set(0, 0);
+			for(int i = 0; i < nrColumns; i++)
+				kplus.execute2(kbuff, _values[valOff + i] *_scale);
+			return kbuff._sum;
+		}
+	}
+}
diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/SparseRowIterator.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/SparseRowIterator.java
index ae88c6d..d623ce7 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/SparseRowIterator.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/SparseRowIterator.java
@@ -21,7 +21,7 @@
 
 import java.util.List;
 
-import org.apache.sysds.runtime.compress.BitmapEncoder;
+import org.apache.sysds.runtime.compress.CompressionSettings;
 import org.apache.sysds.runtime.data.SparseRow;
 import org.apache.sysds.runtime.data.SparseRowVector;
 
@@ -38,7 +38,7 @@
 	@Override
 	public SparseRow next() {
 		// prepare meta data common across column groups
-		final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
+		final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
 		final int ix = _rpos % blksz;
 		final boolean last = (_rpos + 1 == _ru);
 		// copy group rows into consolidated dense vector
diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimationFactors.java b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimationFactors.java
deleted file mode 100644
index 7263a12..0000000
--- a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimationFactors.java
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysds.runtime.compress.estim;
-
-import java.util.ArrayList;
-import java.util.Iterator;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.sysds.runtime.compress.BitmapEncoder;
-import org.apache.sysds.runtime.compress.UncompressedBitmap;
-
-/**
- * Compressed Size Estimation factors. Contains meta information used to estimate the compression sizes of given columns
- * into given CompressionFormats
- */
-public class CompressedSizeEstimationFactors implements Comparable<CompressedSizeEstimationFactors> {
-	protected static final Log LOG = LogFactory.getLog(CompressedSizeEstimationFactors.class.getName());
-
-	protected final int numCols; // Number of columns in the compressed group
-	protected final int numVals; // Number of unique values in the compressed group
-	protected final int numOffs; // num OLE offsets
-	protected final int numRuns; // num RLE runs
-	protected final int numSingle; // num singletons
-	protected final int numRows;
-	protected final boolean containsZero;
-
-	protected CompressedSizeEstimationFactors(int numCols, int numVals, int numOffs, int numRuns, int numSingle,
-		int numRows, boolean containsZero) {
-		this.numCols = numCols;
-		this.numVals = numVals;
-		this.numOffs = numOffs;
-		this.numRuns = numRuns;
-		this.numSingle = numSingle;
-		this.numRows = numRows;
-		this.containsZero = containsZero;
-		LOG.debug(this);
-	}
-
-	protected static CompressedSizeEstimationFactors computeSizeEstimationFactors(UncompressedBitmap ubm,
-		boolean inclRLE, int numRows, int numCols) {
-
-		int numVals = ubm.getNumValues();
-
-		// TODO: fix the UncompressedBitmap to contain information of if the specific columns extracted
-		// contains zero values.
-		// This is still not contained in the list because default behavior is to ignore 0 values.
-		boolean containsZero = false;
-
-		int numRuns = 0;
-		int numOffs = 0;
-		int numSingle = 0;
-
-		LOG.debug("NumCols :" + numCols);
-
-		// compute size estimation factors
-		for(int i = 0; i < numVals; i++) {
-			int listSize = ubm.getNumOffsets(i);
-			numOffs += listSize;
-			numSingle += (listSize == 1) ? 1 : 0;
-			if(inclRLE) {
-				int[] list = ubm.getOffsetsList(i).extractValues();
-				int lastOff = -2;
-				numRuns += list[listSize - 1] / (BitmapEncoder.BITMAP_BLOCK_SZ - 1);
-				for(int j = 0; j < listSize; j++) {
-					if(list[j] != lastOff + 1) {
-						numRuns++;
-					}
-					lastOff = list[j];
-				}
-			}
-		}
-
-		return new CompressedSizeEstimationFactors(numCols, numVals * numCols, numOffs + numVals, numRuns, numSingle,
-			numRows, containsZero);
-	}
-
-	protected Iterable<Integer> fieldIterator() {
-		ArrayList<Integer> fields = new ArrayList<>();
-		fields.add(new Integer(numCols));
-		fields.add(numVals);
-		fields.add(numOffs);
-		fields.add(numRuns);
-		fields.add(numSingle);
-		fields.add(numRows);
-		fields.add(containsZero ? 1 : 0);
-		return fields;
-	}
-
-	public int compareTo(CompressedSizeEstimationFactors that) {
-		int diff = 0;
-		Iterator<Integer> thisF = this.fieldIterator().iterator();
-		Iterator<Integer> thatF = that.fieldIterator().iterator();
-
-		while(thisF.hasNext() && thatF.hasNext()) {
-			Integer thisV = thisF.next();
-			Integer thatV = thatF.next();
-
-			if(thisV == thatV) {
-				diff = diff << 1;
-			}
-			else if(thisV > thatV) {
-				diff = diff + 1 << 1;
-			}
-			else {
-				diff = diff - 1 << 1;
-			}
-		}
-		return diff;
-	}
-
-	@Override
-	public String toString() {
-		StringBuilder sb = new StringBuilder();
-		sb.append("\nrows:" + numRows);
-		sb.append("\tcols:" + numCols);
-		sb.append("\tnum Offsets:" + numOffs);
-		sb.append("\tnum Singles:" + numSingle);
-		sb.append("\tnum Runs:" + numRuns);
-		sb.append("\tnum Unique Vals:" + numVals);
-		sb.append("\tcontains a 0: " + containsZero);
-		return sb.toString();
-	}
-}
\ No newline at end of file
diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimator.java b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimator.java
index 4f73ff8..509b340 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimator.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimator.java
@@ -20,7 +20,6 @@
 package org.apache.sysds.runtime.compress.estim;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.concurrent.Callable;
@@ -30,10 +29,10 @@
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.sysds.runtime.DMLRuntimeException;
 import org.apache.sysds.runtime.compress.CompressionSettings;
-import org.apache.sysds.runtime.compress.UncompressedBitmap;
 import org.apache.sysds.runtime.compress.colgroup.ColGroup.CompressionType;
+import org.apache.sysds.runtime.compress.colgroup.ColGroupSizes;
+import org.apache.sysds.runtime.compress.utils.AbstractBitmap;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.util.CommonThreadPool;
 
@@ -68,15 +67,6 @@
 	}
 
 	/**
-	 * Single threaded version of extracting Compression Size info
-	 * 
-	 * @return The Compression Size info of each Column compressed isolated.
-	 */
-	public CompressedSizeInfo computeCompressedSizeInfos() {
-		return computeCompressedSizeInfos(1);
-	}
-
-	/**
 	 * Multi threaded version of extracting Compression Size info
 	 * 
 	 * @param k The concurrency degree.
@@ -87,64 +77,41 @@
 		return computeCompressedSizeInfos(sizeInfos);
 	}
 
+	/**
+	 * Extracts the CompressedSizeInfo for a list of ColGroups. The Compression Ratio is based on a Dense Uncompressed
+	 * Double Vector for each of the columns.
+	 * 
+	 * Internally it Loops through all the columns, and selects the best compression colGroup for that column. Even if
+	 * that is an UncompressedColGroup.
+	 * 
+	 * @param sizeInfos The size information of each of the Column Groups.
+	 * @return A CompressedSizeInfo object containing the information of the best column groups for individual columns.
+	 */
 	private CompressedSizeInfo computeCompressedSizeInfos(CompressedSizeInfoColGroup[] sizeInfos) {
 		List<Integer> colsC = new ArrayList<>();
 		List<Integer> colsUC = new ArrayList<>();
 		HashMap<Integer, Double> compRatios = new HashMap<>();
-		int nnzUC = 0;
+		// The size of an Uncompressed Dense ColGroup In the Column.
+		double unCompressedDenseSize = ColGroupSizes.estimateInMemorySizeUncompressed(_numCols, _numRows, 1.0);
+		int nnzUCSum = 0;
 
 		for(int col = 0; col < _numCols; col++) {
-			double uncompSize = sizeInfos[col].getCompressionSize(CompressionType.UNCOMPRESSED);
 			double minCompressedSize = (double) sizeInfos[col].getMinSize();
-			double compRatio = uncompSize / minCompressedSize;
-
-			if(compRatio > 1000) {
-				StringBuilder sb = new StringBuilder();
-				sb.append("Very good CompressionRatio: " +String.format("%10.1f", compRatio));
-				sb.append(" UncompressedSize: " + String.format("%14.0f",uncompSize));
-				sb.append(" tCompressedSize: " + String.format("%14.0f",minCompressedSize));
-				sb.append(" type: " + sizeInfos[col].getBestCompressionType());
-				LOG.warn(sb.toString());
+			double compRatio = unCompressedDenseSize / minCompressedSize;
+			compRatios.put(col, compRatio);
+			// If the best compression is achieved in an UnCompressed colGroup it is usually because it is a sparse
+			// ColGroup
+			if(sizeInfos[col].getBestCompressionType() == CompressionType.UNCOMPRESSED) {
+				colsUC.add(col);
+				nnzUCSum += sizeInfos[col].getEstNnz();
 			}
-
-			if(compRatio > 1) {
+			else {
 				colsC.add(col);
 				compRatios.put(col, compRatio);
 			}
-			else {
-				colsUC.add(col);
-				// TODO nnzUC not incrementing as intended outside this function.
-				nnzUC += sizeInfos[col].getEstNnz();
-			}
 		}
 
-		// correction of column classification (reevaluate dense estimates if necessary)
-		if(!MatrixBlock.evalSparseFormatInMemory(_numRows, colsUC.size(), nnzUC) && !colsUC.isEmpty()) {
-			for(int i = 0; i < colsUC.size(); i++) {
-				int col = colsUC.get(i);
-				double uncompSize = MatrixBlock.estimateSizeInMemory(_numRows, 1, 1.0);
-				// CompressedMatrixBlock.getUncompressedSize(numRows, 1, 1.0);
-				double compRatio = uncompSize / sizeInfos[col].getMinSize();
-				if(compRatio > 1) {
-					colsC.add(col);
-					colsUC.remove(i);
-					i--;
-					compRatios.put(col, compRatio);
-					nnzUC -= sizeInfos[col].getEstNnz();
-				}
-			}
-		}
-
-		if(LOG.isTraceEnabled()) {
-			LOG.trace("C: " + Arrays.toString(colsC.toArray(new Integer[0])));
-			LOG.trace(
-				"-- compression ratios: " + Arrays.toString(colsC.stream().map(c -> compRatios.get(c)).toArray()));
-			LOG.trace("UC: " + Arrays.toString(colsUC.toArray(new Integer[0])));
-			LOG.trace(
-				"-- compression ratios: " + Arrays.toString(colsUC.stream().map(c -> compRatios.get(c)).toArray()));
-		}
-
-		return new CompressedSizeInfo(sizeInfos, colsC, colsUC, compRatios, nnzUC);
+		return new CompressedSizeInfo(sizeInfos, colsC, colsUC, compRatios, nnzUCSum);
 
 	}
 
@@ -171,15 +138,15 @@
 	public abstract CompressedSizeInfoColGroup estimateCompressedColGroupSize(int[] colIndexes);
 
 	/**
-	 * Method used to extract the CompressedSizeEstimationFactors from an constructed UncompressedBitMap. Note this
+	 * Method used to extract the CompressedSizeEstimationFactors from an constructed UncompressedBitmap. Note this
 	 * method works both for the sample based estimator and the exact estimator, since the bitmap, can be extracted from
 	 * a sample or from the entire dataset.
 	 * 
-	 * @param ubm the UncompressedBitMap, either extracted from a sample or from the entier dataset
+	 * @param ubm the UncompressedBitmap, either extracted from a sample or from the entier dataset
 	 * @return The size factors estimated from the Bit Map.
 	 */
-	public CompressedSizeEstimationFactors estimateCompressedColGroupSize(UncompressedBitmap ubm) {
-		return CompressedSizeEstimationFactors.computeSizeEstimationFactors(ubm,
+	public EstimationFactors estimateCompressedColGroupSize(AbstractBitmap ubm) {
+		return EstimationFactors.computeSizeEstimationFactors(ubm,
 			_compSettings.validCompressions.contains(CompressionType.RLE),
 			_numRows,
 			ubm.getNumColumns());
@@ -210,7 +177,7 @@
 			return ret.toArray(new CompressedSizeInfoColGroup[0]);
 		}
 		catch(InterruptedException | ExecutionException e) {
-			throw new DMLRuntimeException(e);
+			return CompressedSizeInfoColGroup(clen);
 		}
 	}
 
diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorExact.java b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorExact.java
index 6911e69..3003936 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorExact.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorExact.java
@@ -21,7 +21,7 @@
 
 import org.apache.sysds.runtime.compress.BitmapEncoder;
 import org.apache.sysds.runtime.compress.CompressionSettings;
-import org.apache.sysds.runtime.compress.UncompressedBitmap;
+import org.apache.sysds.runtime.compress.utils.AbstractBitmap;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 
 /**
@@ -35,8 +35,8 @@
 
 	@Override
 	public CompressedSizeInfoColGroup estimateCompressedColGroupSize(int[] colIndexes) {
-		LOG.debug("CompressedSizeEstimatorExact: " + colIndexes.length);
-		UncompressedBitmap entireBitMap = BitmapEncoder.extractBitmap(colIndexes, _data, _compSettings);
-		return new CompressedSizeInfoColGroup(estimateCompressedColGroupSize(entireBitMap), _compSettings.validCompressions);
+		AbstractBitmap entireBitMap = BitmapEncoder.extractBitmap(colIndexes, _data, _compSettings);
+		return new CompressedSizeInfoColGroup(estimateCompressedColGroupSize(entireBitMap),
+			_compSettings.validCompressions);
 	}
 }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorFactory.java b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorFactory.java
index 8976c0d..5003a75 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorFactory.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorFactory.java
@@ -24,11 +24,10 @@
 
 public class CompressedSizeEstimatorFactory {
 
-	public static final boolean EXTRACT_SAMPLE_ONCE = true;
-
 	public static CompressedSizeEstimator getSizeEstimator(MatrixBlock data, CompressionSettings compSettings) {
 		long elements = compSettings.transposeInput ? data.getNumColumns() : data.getNumRows();
 		elements = data.getNonZeros() / (compSettings.transposeInput ? data.getNumRows() : data.getNumColumns());
+
 		return (compSettings.samplingRatio >= 1.0 || elements < 1000) ? new CompressedSizeEstimatorExact(data,
 			compSettings) : new CompressedSizeEstimatorSample(data, compSettings,
 				(int) Math.ceil(elements * compSettings.samplingRatio));
diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorSample.java b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorSample.java
index 82c90f5..adbf086 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorSample.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorSample.java
@@ -19,23 +19,18 @@
 
 package org.apache.sysds.runtime.compress.estim;
 
-import java.util.Arrays;
 import java.util.HashMap;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.sysds.runtime.DMLRuntimeException;
 import org.apache.sysds.runtime.compress.BitmapEncoder;
 import org.apache.sysds.runtime.compress.CompressionSettings;
-import org.apache.sysds.runtime.compress.UncompressedBitmap;
 import org.apache.sysds.runtime.compress.estim.sample.HassAndStokes;
+import org.apache.sysds.runtime.compress.utils.AbstractBitmap;
+import org.apache.sysds.runtime.compress.utils.AbstractBitmap.BitmapType;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.util.UtilFunctions;
 
 public class CompressedSizeEstimatorSample extends CompressedSizeEstimator {
 
-	private static final Log LOG = LogFactory.getLog(CompressedSizeEstimatorSample.class.getName());
-
 	private int[] _sampleRows = null;
 	private HashMap<Integer, Double> _solveCache = null;
 
@@ -48,19 +43,14 @@
 	 */
 	public CompressedSizeEstimatorSample(MatrixBlock data, CompressionSettings compSettings, int sampleSize) {
 		super(data, compSettings);
-		// get sample of rows, incl eager extraction
-		if(_numRows < sampleSize) {
-			throw new DMLRuntimeException("SampleSize should always be less than number of rows");
-		}
 
 		_sampleRows = getSortedUniformSample(_numRows, sampleSize, _compSettings.seed);
 
-		if(CompressedSizeEstimatorFactory.EXTRACT_SAMPLE_ONCE) {
-			MatrixBlock select = new MatrixBlock(_numRows, 1, false);
-			for(int i = 0; i < sampleSize; i++)
-				select.quickSetValue(_sampleRows[i], 0, 1);
-			_data = _data.removeEmptyOperations(new MatrixBlock(), !_compSettings.transposeInput, true, select);
-		}
+		// Override the _data Matrix block with the sampled matrix block.
+		MatrixBlock select = new MatrixBlock(_numRows, 1, false);
+		for(int i = 0; i < sampleSize; i++)
+			select.quickSetValue(_sampleRows[i], 0, 1);
+		_data = _data.removeEmptyOperations(new MatrixBlock(), !_compSettings.transposeInput, true, select);
 
 		// establish estimator-local cache for numeric solve
 		_solveCache = new HashMap<>();
@@ -73,58 +63,49 @@
 		int[] sampleRows = _sampleRows;
 
 		// extract statistics from sample
-		UncompressedBitmap ubm = CompressedSizeEstimatorFactory.EXTRACT_SAMPLE_ONCE ? BitmapEncoder
-			.extractBitmap(colIndexes, _data, _compSettings) : BitmapEncoder
-				.extractBitmapFromSample(colIndexes, _data, sampleRows, _compSettings);
-		CompressedSizeEstimationFactors fact = CompressedSizeEstimationFactors
-			.computeSizeEstimationFactors(ubm, false, _numRows, numCols);
+		AbstractBitmap ubm = BitmapEncoder.extractBitmap(colIndexes, _data, _compSettings);
+		EstimationFactors fact = EstimationFactors.computeSizeEstimationFactors(ubm, false, _numRows, numCols);
 
 		// estimate number of distinct values (incl fixes for anomalies w/ large sample fraction)
+		// TODO Replace this with lib matrix/data/LibMatrixCountDistinct
 		int totalCardinality = getNumDistinctValues(ubm, _numRows, sampleRows, _solveCache);
 		totalCardinality = Math.max(totalCardinality, fact.numVals);
+		totalCardinality =  _compSettings.lossy ? Math.min(totalCardinality, numCols * 127) : totalCardinality;
 		totalCardinality = Math.min(totalCardinality, _numRows);
 
-		// estimate unseen values
-		int unseenVals = totalCardinality - fact.numVals;
+		// Number of unseen values
+		// int unseenVals = totalCardinality - fact.numVals;
+
+		// Note this numZeros is the count of rows that are all zero.
+		int numZeros = ubm.getZeroCounts();
 
 		// estimate number of non-zeros (conservatively round up)
 		double C = Math.max(1 - (double) fact.numSingle / sampleSize, (double) sampleSize / _numRows);
-		int numZeros = sampleSize - fact.numOffs; // >=0
+
 		int numNonZeros = (int) Math.ceil(_numRows - (double) _numRows / sampleSize * C * numZeros);
 		numNonZeros = Math.max(numNonZeros, totalCardinality); // handle anomaly of zi=0
 
-		if(totalCardinality <= 0 || unseenVals < 0 || numZeros < 0 || numNonZeros <= 0)
-			LOG.warn("Invalid estimates detected for " + Arrays.toString(colIndexes) + ": " + totalCardinality + " "
-				+ unseenVals + " " + numZeros + " " + numNonZeros);
-
 		// estimate number of segments and number of runs incl correction for
 		// empty segments and empty runs (via expected mean of offset value)
 		// int numUnseenSeg = (int) (unseenVals * Math.ceil((double) _numRows / BitmapEncoder.BITMAP_BLOCK_SZ / 2));
-		int totalNumRuns = getNumRuns(ubm, sampleSize, _numRows, sampleRows);
+		int totalNumRuns = ubm.getNumValues() > 0 ? getNumRuns(ubm, sampleSize, _numRows, sampleRows) : 0;
 
-		// TODO. Make it possible to detect if the values contains a 0.
-		// Same case as in the Exact estimator, there is no way of knowing currently if a specific column or row
-		// contains
-		// a 0.
-		boolean containsZero = false;
+		boolean containsZero = numZeros > 0;
 
-		CompressedSizeEstimationFactors totalFacts = new CompressedSizeEstimationFactors(numCols, totalCardinality,
-			numNonZeros, totalNumRuns, fact.numSingle, _numRows, containsZero);
+		EstimationFactors totalFacts = new EstimationFactors(numCols, totalCardinality, numNonZeros, totalNumRuns,
+			fact.numSingle, _numRows, containsZero, ubm.getType() == BitmapType.Lossy);
 
 		// construct new size info summary
 		return new CompressedSizeInfoColGroup(totalFacts, _compSettings.validCompressions);
 	}
 
-	private static int getNumDistinctValues(UncompressedBitmap ubm, int numRows, int[] sampleRows,
+	private static int getNumDistinctValues(AbstractBitmap ubm, int numRows, int[] sampleRows,
 		HashMap<Integer, Double> solveCache) {
 		return HassAndStokes.haasAndStokes(ubm, numRows, sampleRows.length, solveCache);
 	}
 
-	private static int getNumRuns(UncompressedBitmap ubm, int sampleSize, int totalNumRows, int[] sampleRows) {
+	private static int getNumRuns(AbstractBitmap ubm, int sampleSize, int totalNumRows, int[] sampleRows) {
 		int numVals = ubm.getNumValues();
-		// all values in the sample are zeros
-		if(numVals == 0)
-			return 0;
 		double numRuns = 0;
 		for(int vi = 0; vi < numVals; vi++) {
 			int[] offsets = ubm.getOffsetsList(vi).extractValues();
@@ -289,8 +270,6 @@
 	 * @return sorted array of integers
 	 */
 	private static int[] getSortedUniformSample(int range, int smplSize, long seed) {
-		if(smplSize == 0)
-			throw new DMLRuntimeException("Sample Size of 0 is invalid");
 		return UtilFunctions.getSortedSampleIndexes(range, smplSize, seed);
 	}
 
diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java
index 7090ff8..2ba2f3e 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java
@@ -20,8 +20,8 @@
 package org.apache.sysds.runtime.compress.estim;
 
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.commons.lang.NotImplementedException;
 import org.apache.sysds.runtime.compress.colgroup.ColGroup.CompressionType;
@@ -38,8 +38,7 @@
 	private final CompressionType _bestCompressionType;
 	private final Map<CompressionType, Long> _sizes;
 
-	public CompressedSizeInfoColGroup(CompressedSizeEstimationFactors fact,
-		List<CompressionType> validCompressionTypes) {
+	public CompressedSizeInfoColGroup(EstimationFactors fact, Set<CompressionType> validCompressionTypes) {
 		_numVals = fact.numVals;
 		_numOffs = fact.numOffs;
 		_sizes = calculateCompressionSizes(fact, validCompressionTypes);
@@ -86,8 +85,8 @@
 		return _numOffs;
 	}
 
-	private static Map<CompressionType, Long> calculateCompressionSizes(CompressedSizeEstimationFactors fact,
-		List<CompressionType> validCompressionTypes) {
+	private static Map<CompressionType, Long> calculateCompressionSizes(EstimationFactors fact,
+		Set<CompressionType> validCompressionTypes) {
 		Map<CompressionType, Long> res = new HashMap<>();
 		for(CompressionType ct : validCompressionTypes) {
 			res.put(ct, getCompressionSize(ct, fact));
@@ -95,26 +94,30 @@
 		return res;
 	}
 
-	private static Long getCompressionSize(CompressionType ct, CompressedSizeEstimationFactors fact) {
+	private static Long getCompressionSize(CompressionType ct, EstimationFactors fact) {
 		long size = 0;
 		switch(ct) {
 			case DDC:
 				if(fact.numVals < 256) {
 					size = ColGroupSizes.estimateInMemorySizeDDC1(fact.numCols,
 						fact.numVals + (fact.containsZero ? 1 : 0),
-						fact.numRows);
+						fact.numRows,
+						fact.lossy);
 				}
 				else {
 					size = ColGroupSizes.estimateInMemorySizeDDC2(fact.numCols,
 						fact.numVals + (fact.containsZero ? 1 : 0),
-						fact.numRows);
+						fact.numRows,
+						fact.lossy);
 				}
 				break;
 			case RLE:
-				size = ColGroupSizes.estimateInMemorySizeRLE(fact.numCols, fact.numVals, fact.numRuns, fact.numRows);
+				size = ColGroupSizes
+					.estimateInMemorySizeRLE(fact.numCols, fact.numVals, fact.numRuns, fact.numRows, fact.lossy);
 				break;
 			case OLE:
-				size = ColGroupSizes.estimateInMemorySizeOLE(fact.numCols, fact.numVals, fact.numOffs, fact.numRows);
+				size = ColGroupSizes
+					.estimateInMemorySizeOLE(fact.numCols, fact.numVals, fact.numOffs, fact.numRows, fact.lossy);
 				break;
 			case UNCOMPRESSED:
 				size = ColGroupSizes.estimateInMemorySizeUncompressed(fact.numRows,
@@ -122,7 +125,7 @@
 					((double) fact.numVals / (fact.numRows * fact.numCols)));
 				break;
 			case QUAN:
-				size = ColGroupSizes.estimateInMemorySizeQuan(fact.numRows, fact.numCols); 
+				size = ColGroupSizes.estimateInMemorySizeQuan(fact.numRows, fact.numCols);
 				break;
 			default:
 				throw new NotImplementedException("The col compression Type is not yet supported");
diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/EstimationFactors.java b/src/main/java/org/apache/sysds/runtime/compress/estim/EstimationFactors.java
new file mode 100644
index 0000000..c5db40c
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/compress/estim/EstimationFactors.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.compress.estim;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.sysds.runtime.compress.CompressionSettings;
+import org.apache.sysds.runtime.compress.utils.AbstractBitmap;
+import org.apache.sysds.runtime.compress.utils.AbstractBitmap.BitmapType;
+
+/**
+ * Compressed Size Estimation factors. Contains meta information used to estimate the compression sizes of given columns
+ * into given CompressionFormats
+ */
+public class EstimationFactors {
+
+	protected static final Log LOG = LogFactory.getLog(EstimationFactors.class.getName());
+
+	protected final int numCols; // Number of columns in the compressed group
+	// TODO Make a variable called numDistinct to use for DDC.
+	/** Number of distinct value tuples in the columns, not to be confused with number of distinct values */
+	protected final int numVals; // Number of unique values in the compressed group
+	/** The number of offsets, to tuples of values in the column groups */
+	protected final int numOffs;
+	/** The Number of runs, of consecutive equal numbers, used primarily in RLE*/
+	protected final int numRuns;
+	/** The Number of Values in the collection not Zero , Also refered to as singletons */
+	protected final int numSingle;
+	protected final int numRows;
+	protected final boolean containsZero;
+	protected final boolean lossy;
+
+	protected EstimationFactors(int numCols, int numVals, int numOffs, int numRuns, int numSingle, int numRows,
+		boolean containsZero, boolean lossy) {
+		this.numCols = numCols;
+		this.numVals = numVals;
+		this.numOffs = numOffs;
+		this.numRuns = numRuns;
+		this.numSingle = numSingle;
+		this.numRows = numRows;
+		this.containsZero = containsZero;
+		this.lossy = lossy;
+		LOG.debug(this);
+	}
+
+	protected static EstimationFactors computeSizeEstimationFactors(AbstractBitmap ubm, boolean inclRLE, int numRows,
+		int numCols) {
+		int numVals = ubm.getNumValues();
+		boolean containsZero = ubm.containsZero();
+		
+		int numRuns = 0;
+		int numOffs = 0;
+		int numSingle = 0;
+
+		LOG.debug("NumCols :" + numCols);
+
+		// compute size estimation factors
+		for(int i = 0; i < numVals; i++) {
+			int listSize = ubm.getNumOffsets(i);
+			numOffs += listSize;
+			numSingle += (listSize == 1) ? 1 : 0;
+			if(inclRLE) {
+				int[] list = ubm.getOffsetsList(i).extractValues();
+				int lastOff = -2;
+				numRuns += list[listSize - 1] / (CompressionSettings.BITMAP_BLOCK_SZ- 1);
+				for(int j = 0; j < listSize; j++) {
+					if(list[j] != lastOff + 1) {
+						numRuns++;
+					}
+					lastOff = list[j];
+				}
+			}
+		}
+
+		return new EstimationFactors(numCols, numVals * numCols, numOffs + numVals, numRuns, numSingle, numRows,
+			containsZero, ubm.getType() == BitmapType.Lossy);
+	}
+
+	@Override
+	public String toString() {
+		StringBuilder sb = new StringBuilder();
+		sb.append("\nrows:" + numRows);
+		sb.append("\tcols:" + numCols);
+		sb.append("\tnum Offsets:" + numOffs);
+		sb.append("\tnum Singles:" + numSingle);
+		sb.append("\tnum Runs:" + numRuns);
+		sb.append("\tnum Unique Vals:" + numVals);
+		sb.append("\tcontains a 0: " + containsZero);
+		return sb.toString();
+	}
+}
\ No newline at end of file
diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/sample/FrequencyCount.java b/src/main/java/org/apache/sysds/runtime/compress/estim/sample/FrequencyCount.java
index 6685f11..3568683 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/estim/sample/FrequencyCount.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/estim/sample/FrequencyCount.java
@@ -19,7 +19,7 @@
 
 package org.apache.sysds.runtime.compress.estim.sample;
 
-import org.apache.sysds.runtime.compress.UncompressedBitmap;
+import org.apache.sysds.runtime.compress.utils.AbstractBitmap;
 
 public class FrequencyCount {
 
@@ -30,7 +30,7 @@
 	 * @param ubm uncompressed bitmap
 	 * @return frequency counts
 	 */
-	protected static int[] get(UncompressedBitmap ubm) {
+	protected static int[] get(AbstractBitmap ubm) {
 		// determine max frequency
 		int numVals = ubm.getNumValues();
 		int maxCount = 0;
diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/sample/HassAndStokes.java b/src/main/java/org/apache/sysds/runtime/compress/estim/sample/HassAndStokes.java
index 785f277..ff33809 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/estim/sample/HassAndStokes.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/estim/sample/HassAndStokes.java
@@ -23,7 +23,7 @@
 
 import org.apache.commons.math3.analysis.UnivariateFunction;
 import org.apache.commons.math3.analysis.solvers.UnivariateSolverUtils;
-import org.apache.sysds.runtime.compress.UncompressedBitmap;
+import org.apache.sysds.runtime.compress.utils.AbstractBitmap;
 
 public class HassAndStokes {
 
@@ -46,7 +46,7 @@
 	 * @param solveCache A Hashmap containing information for getDuj2aEstimate
 	 * @return An estimation of distinct elements in the population.
 	 */
-	public static int haasAndStokes(UncompressedBitmap ubm, int nRows, int sampleSize,
+	public static int haasAndStokes(AbstractBitmap ubm, int nRows, int sampleSize,
 		HashMap<Integer, Double> solveCache) {
 		// obtain value and frequency histograms
 		int numVals = ubm.getNumValues();
diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/sample/ShlosserEstimator.java b/src/main/java/org/apache/sysds/runtime/compress/estim/sample/ShlosserEstimator.java
index 3e9962c..5fd9e16 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/estim/sample/ShlosserEstimator.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/estim/sample/ShlosserEstimator.java
@@ -19,7 +19,7 @@
 
 package org.apache.sysds.runtime.compress.estim.sample;
 
-import org.apache.sysds.runtime.compress.UncompressedBitmap;
+import org.apache.sysds.runtime.compress.utils.Bitmap;
 
 public class ShlosserEstimator {
 
@@ -32,7 +32,7 @@
 	 * @param sampleSize The number of rows in the sample
 	 * @return an estimation of number of distinct values.
 	 */
-	public static int get(UncompressedBitmap ubm, int nRows, int sampleSize) {
+	public static int get(Bitmap ubm, int nRows, int sampleSize) {
 		double q = ((double) sampleSize) / nRows;
 		double oneMinusQ = 1 - q;
 
diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/sample/ShlosserJackknifeEstimator.java b/src/main/java/org/apache/sysds/runtime/compress/estim/sample/ShlosserJackknifeEstimator.java
index 7c04638..7ccffe8 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/estim/sample/ShlosserJackknifeEstimator.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/estim/sample/ShlosserJackknifeEstimator.java
@@ -20,7 +20,7 @@
 package org.apache.sysds.runtime.compress.estim.sample;
 
 import org.apache.commons.math3.distribution.ChiSquaredDistribution;
-import org.apache.sysds.runtime.compress.UncompressedBitmap;
+import org.apache.sysds.runtime.compress.utils.Bitmap;
 
 public class ShlosserJackknifeEstimator {
 
@@ -36,7 +36,7 @@
 	 * @return an estimation of number of distinct values.
 	 */
 	@SuppressWarnings("unused")
-	private static int shlosserJackknifeEstimator(UncompressedBitmap ubm, int nRows, int sampleSize) {
+	private static int shlosserJackknifeEstimator(Bitmap ubm, int nRows, int sampleSize) {
 		int numVals = ubm.getNumValues();
 		CriticalValue cv = computeCriticalValue(sampleSize);
 
diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/sample/SmoothedJackknifeEstimator.java b/src/main/java/org/apache/sysds/runtime/compress/estim/sample/SmoothedJackknifeEstimator.java
index 6282eb0..b5536e2 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/estim/sample/SmoothedJackknifeEstimator.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/estim/sample/SmoothedJackknifeEstimator.java
@@ -19,7 +19,7 @@
 
 package org.apache.sysds.runtime.compress.estim.sample;
 
-import org.apache.sysds.runtime.compress.UncompressedBitmap;
+import org.apache.sysds.runtime.compress.utils.Bitmap;
 
 public class SmoothedJackknifeEstimator {
 
@@ -32,7 +32,7 @@
 	 * @param sampleSize The number of rows in the sample
 	 * @return Estimate of the number of distinct values
 	 */
-	public static int get(UncompressedBitmap ubm, int nRows, int sampleSize) {
+	public static int get(Bitmap ubm, int nRows, int sampleSize) {
 		int numVals = ubm.getNumValues();
 		int[] freqCounts = FrequencyCount.get(ubm);
 		// all values in the sample are zeros
diff --git a/src/main/java/org/apache/sysds/runtime/compress/utils/AbstractBitmap.java b/src/main/java/org/apache/sysds/runtime/compress/utils/AbstractBitmap.java
new file mode 100644
index 0000000..c7cc8ee
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/compress/utils/AbstractBitmap.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.compress.utils;
+
+import java.util.Arrays;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+public abstract class AbstractBitmap {
+    protected static final Log LOG = LogFactory.getLog(AbstractBitmap.class.getName());
+
+	public enum BitmapType {
+		Lossy,
+		Full
+	}
+    protected final int _numCols;
+    
+    /** Bitmaps (as lists of offsets) for each of the values. */
+    protected IntArrayList[] _offsetsLists;
+
+    /** int specifying the number of zero value groups contained in the rows. */
+	protected final int _numZeros;
+
+    public AbstractBitmap(int numCols, IntArrayList[] offsetsLists, int numZeroGroups){
+		_numCols = numCols;
+        _numZeros = numZeroGroups;
+        _offsetsLists = offsetsLists;
+    }
+
+	public int getNumColumns() {
+		return _numCols;
+    }
+    
+    /**
+	 * Obtain number of distinct value groups in the column. this number is also the number of bitmaps, since there is
+	 * one bitmap per value
+	 * 
+	 * @return number of distinct value groups in the column;
+	 */
+	public abstract int getNumValues();
+    
+    
+    public IntArrayList[] getOffsetList() {
+		return _offsetsLists;
+    }
+	public IntArrayList getOffsetsList(int idx) {
+		return _offsetsLists[idx];
+    }
+    
+    public long getNumOffsets() {
+		long ret = 0;
+		for(IntArrayList offlist : _offsetsLists)
+			ret += offlist.size();
+		return ret;
+	}
+
+	public int getNumOffsets(int ix) {
+		return _offsetsLists[ix].size();
+    }
+    
+
+    public abstract void sortValuesByFrequency();
+
+    public boolean containsZero() {
+		return _numZeros > 0;
+    }
+    
+    public int getZeroCounts() {
+		return _numZeros;
+    }
+	
+	public abstract BitmapType getType();
+
+    @Override
+	public String toString() {
+		StringBuilder sb = new StringBuilder();
+		sb.append(super.toString());
+		sb.append("\nzeros:  " + _numZeros);
+		sb.append("\ncolumns:" + _numCols);
+		sb.append("\nOffsets:" + Arrays.toString(_offsetsLists));
+		return sb.toString();
+	}
+}
\ No newline at end of file
diff --git a/src/main/java/org/apache/sysds/runtime/compress/UncompressedBitmap.java b/src/main/java/org/apache/sysds/runtime/compress/utils/Bitmap.java
similarity index 64%
rename from src/main/java/org/apache/sysds/runtime/compress/UncompressedBitmap.java
rename to src/main/java/org/apache/sysds/runtime/compress/utils/Bitmap.java
index 0b1aa8f..2aba804 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/UncompressedBitmap.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/utils/Bitmap.java
@@ -17,61 +17,57 @@
  * under the License.
  */
 
-package org.apache.sysds.runtime.compress;
+package org.apache.sysds.runtime.compress.utils;
 
 import java.util.Arrays;
 
 import org.apache.commons.lang.ArrayUtils;
-import org.apache.sysds.runtime.compress.utils.DblArrayIntListHashMap;
-import org.apache.sysds.runtime.compress.utils.DoubleIntListHashMap;
 import org.apache.sysds.runtime.compress.utils.DblArrayIntListHashMap.DArrayIListEntry;
 import org.apache.sysds.runtime.compress.utils.DoubleIntListHashMap.DIListEntry;
-import org.apache.sysds.runtime.compress.utils.IntArrayList;
 import org.apache.sysds.runtime.util.SortUtils;
 
 /**
  * Uncompressed representation of one or more columns in bitmap format.
  */
-public final class UncompressedBitmap {
-	
-	private final int _numCols;
+public final class Bitmap extends AbstractBitmap {
 
-	/** Distinct values that appear in the column. Linearized as value groups <v11 v12> <v21 v22>. */
+	/**
+	 * Distinct values that appear in the column. Linearized as value groups <v11 v12> <v21 v22>.
+	 */
 	private double[] _values;
 
-	/** Bitmaps (as lists of offsets) for each of the values. */
-	private IntArrayList[] _offsetsLists;
+	public Bitmap(int numCols, IntArrayList[] offsetsLists, int numZeroGroups, double[] values) {
+		super(numCols, offsetsLists, numZeroGroups);
+		_values = values;
+	}
 
-	public UncompressedBitmap(DblArrayIntListHashMap distinctVals, int numColumns) {
+	public static Bitmap makeBitmap(DblArrayIntListHashMap distinctVals, int numColumns, int numZeros) {
 		// added for one pass bitmap construction
 		// Convert inputs to arrays
 		int numVals = distinctVals.size();
-		_values = new double[numVals * numColumns];
-		_offsetsLists = new IntArrayList[numVals];
+		int numCols = numColumns;
+		double[] values = new double[numVals * numCols];
+		IntArrayList[] offsetsLists = new IntArrayList[numVals];
 		int bitmapIx = 0;
 		for(DArrayIListEntry val : distinctVals.extractValues()) {
-			System.arraycopy(val.key.getData(), 0, _values, bitmapIx * numColumns, numColumns);
-			_offsetsLists[bitmapIx++] = val.value;
+			System.arraycopy(val.key.getData(), 0, values, bitmapIx * numCols, numCols);
+			offsetsLists[bitmapIx++] = val.value;
 		}
-		_numCols = numColumns;
+		return new Bitmap(numCols, offsetsLists, numZeros, values);
 	}
 
-	public UncompressedBitmap(DoubleIntListHashMap distinctVals) {
+	public static Bitmap makeBitmap(DoubleIntListHashMap distinctVals, int numZeros) {
 		// added for one pass bitmap construction
 		// Convert inputs to arrays
 		int numVals = distinctVals.size();
-		_values = new double[numVals];
-		_offsetsLists = new IntArrayList[numVals];
+		double[] values = new double[numVals];
+		IntArrayList[] offsetsLists = new IntArrayList[numVals];
 		int bitmapIx = 0;
 		for(DIListEntry val : distinctVals.extractValues()) {
-			_values[bitmapIx] = val.key;
-			_offsetsLists[bitmapIx++] = val.value;
+			values[bitmapIx] = val.key;
+			offsetsLists[bitmapIx++] = val.value;
 		}
-		_numCols = 1;
-	}
-
-	public int getNumColumns() {
-		return _numCols;
+		return new Bitmap(1, offsetsLists, numZeros, values);
 	}
 
 	/**
@@ -93,31 +89,10 @@
 		return Arrays.copyOfRange(_values, ix * _numCols, (ix + 1) * _numCols);
 	}
 
-	/**
-	 * Obtain number of distinct values in the column.
-	 * 
-	 * @return number of distinct values in the column; this number is also the number of bitmaps, since there is one
-	 *         bitmap per value
-	 */
 	public int getNumValues() {
 		return _values.length / _numCols;
 	}
 
-	public IntArrayList getOffsetsList(int ix) {
-		return _offsetsLists[ix];
-	}
-
-	public long getNumOffsets() {
-		long ret = 0;
-		for(IntArrayList offlist : _offsetsLists)
-			ret += offlist.size();
-		return ret;
-	}
-
-	public int getNumOffsets(int ix) {
-		return _offsetsLists[ix].size();
-	}
-
 	public void sortValuesByFrequency() {
 		int numVals = getNumValues();
 		int numCols = getNumColumns();
@@ -145,4 +120,17 @@
 		_values = lvalues;
 		_offsetsLists = loffsets;
 	}
+
+	@Override
+	public String toString() {
+		StringBuilder sb = new StringBuilder();
+		sb.append(super.toString());
+		sb.append("\nValues: " + Arrays.toString(_values));
+		return sb.toString();
+	}
+
+	@Override
+	public BitmapType getType() {
+		return BitmapType.Full;
+	}
 }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/utils/BitmapLossy.java b/src/main/java/org/apache/sysds/runtime/compress/utils/BitmapLossy.java
new file mode 100644
index 0000000..9037c00
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/compress/utils/BitmapLossy.java
@@ -0,0 +1,305 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.compress.utils;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.DoubleSummaryStatistics;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Queue;
+
+import org.apache.commons.lang.NotImplementedException;
+
+/**
+ * Uncompressed but Quantized representation of contained data.
+ */
+public final class BitmapLossy extends AbstractBitmap {
+
+	/**
+	 * Distinct values that appear in the column. Linearized as value groups <v11 v12> <v21 v22>.
+	 */
+	private final byte[] _values;
+	private final double _scale;
+
+	public BitmapLossy(int numCols, IntArrayList[] offsetsLists, int numZeroGroups, byte[] values, double scale) {
+		super(numCols, offsetsLists, numZeroGroups);
+		_values = values;
+		_scale = scale;
+	}
+
+	public static AbstractBitmap makeBitmapLossy(Bitmap ubm) {
+		int numCols = ubm.getNumColumns();
+		double[] fp = ubm.getValues();
+		double scale = getScale(fp);
+		if(Double.isNaN(scale)) {
+			LOG.warn("Defaulting to incompressable colGroup");
+			return ubm;
+		}
+		else {
+			byte[] scaledValues = scaleValues(fp, scale);
+			if(numCols == 1) {
+				return makeBitmapLossySingleCol(ubm, scaledValues, scale);
+			}
+			else {
+				return makeBitmapLossyMultiCol(ubm, scaledValues, scale);
+			}
+		}
+
+	}
+
+	private static AbstractBitmap makeBitmapLossySingleCol(Bitmap ubm, byte[] scaledValues, double scale) {
+
+		Map<Byte, Queue<IntArrayList>> values = new HashMap<>();
+		IntArrayList[] fullSizeOffsetsLists = ubm.getOffsetList();
+		int numZeroGroups = ubm.getZeroCounts();
+		for(int idx = 0; idx < scaledValues.length; idx++) {
+			if(scaledValues[idx] != 0) { // Throw away zero values.
+				if(values.containsKey(scaledValues[idx])) {
+					values.get(scaledValues[idx]).add(fullSizeOffsetsLists[idx]);
+				}
+				else {
+					Queue<IntArrayList> offsets = new LinkedList<IntArrayList>();
+					offsets.add(fullSizeOffsetsLists[idx]);
+					values.put(scaledValues[idx], offsets);
+				}
+			}
+			else {
+				numZeroGroups++;
+			}
+		}
+		byte[] scaledValuesReduced = new byte[values.keySet().size()];
+		IntArrayList[] newOffsetsLists = new IntArrayList[values.keySet().size()];
+		Iterator<Entry<Byte, Queue<IntArrayList>>> x = values.entrySet().iterator();
+		int idx = 0;
+		while(x.hasNext()) {
+			Entry<Byte, Queue<IntArrayList>> ent = x.next();
+			scaledValuesReduced[idx] = ent.getKey().byteValue();
+			newOffsetsLists[idx] = mergeOffsets(ent.getValue());
+			idx++;
+		}
+		return new BitmapLossy(ubm.getNumColumns(), newOffsetsLists, numZeroGroups, scaledValuesReduced, scale);
+	}
+
+	private static AbstractBitmap makeBitmapLossyMultiCol(Bitmap ubm, byte[] scaledValues, double scale) {
+		int numColumns = ubm.getNumColumns();
+		Map<List<Byte>, Queue<IntArrayList>> values = new HashMap<>();
+		IntArrayList[] fullSizeOffsetsLists = ubm.getOffsetList();
+		int numZeroGroups = ubm.getZeroCounts();
+		boolean allZero = true;
+		for(int idx = 0; idx < scaledValues.length; idx += numColumns) {
+			List<Byte> array = new ArrayList<>();
+			for(int off = 0; off < numColumns; off++) {
+				allZero = scaledValues[idx + off] == 0 && allZero;
+				array.add(scaledValues[idx + off]);
+			}
+			
+			numZeroGroups += allZero ? 1 : 0;
+			if(!allZero) {
+				if(values.containsKey(array)) {
+					values.get(array).add(fullSizeOffsetsLists[idx / numColumns]);
+				}
+				else {
+					Queue<IntArrayList> offsets = new LinkedList<IntArrayList>();
+					offsets.add(fullSizeOffsetsLists[idx / numColumns]);
+					values.put(array, offsets);
+				}
+				// LOG.error(array);
+			}
+			allZero = true;
+		}
+		// LOG.error(array);
+		// LOG.error(values);
+
+
+		byte[] scaledValuesReduced = new byte[values.keySet().size() * numColumns];
+		IntArrayList[] newOffsetsLists = new IntArrayList[values.keySet().size()];
+		Iterator<Entry<List<Byte>, Queue<IntArrayList>>> x = values.entrySet().iterator();
+		int idx = 0;
+		while(x.hasNext()) {
+			Entry<List<Byte>, Queue<IntArrayList>> ent = x.next();
+			List<Byte> key = ent.getKey();
+			int row = idx * numColumns;
+			for(int off = 0; off < numColumns; off++) {
+				scaledValuesReduced[row + off] = key.get(off);
+			}
+			newOffsetsLists[idx] = mergeOffsets(ent.getValue());
+			idx++;
+		}
+		// LOG.error(Arrays.toString(scaledValuesReduced));
+		// try {
+		// 	Thread.sleep(1000);
+		// }
+		// catch(InterruptedException e) {
+		// 	// TODO Auto-generated catch block
+		// 	e.printStackTrace();
+		// }
+		return new BitmapLossy(ubm.getNumColumns(), newOffsetsLists, numZeroGroups, scaledValuesReduced, scale);
+	}
+
+	/**
+	 * Get the scale for the given double array.
+	 * 
+	 * @param fp A array of double values
+	 * @return a scale to scale to range [-127, 127]
+	 */
+	public static double getScale(double[] fp) {
+		DoubleSummaryStatistics stat = Arrays.stream(fp).summaryStatistics();
+		double max = Math.abs(Math.max(stat.getMax(), Math.abs(stat.getMin())));
+		double scale;
+		if(Double.isInfinite(max)) {
+			LOG.warn("Invalid Column, can't quantize Infinite value.");
+			return Double.NaN;
+		}
+		else if(max == 0) { // The column group is filled with 0.
+			scale = 1;
+		}
+		else {
+			scale = max / (double) (Byte.MAX_VALUE);
+		}
+		return scale;
+	}
+
+	/**
+	 * Get all values without unnecessary allocations and copies.
+	 * 
+	 * @return dictionary of value tuples
+	 */
+	public byte[] getValues() {
+		return _values;
+	}
+
+	/**
+	 * Obtain tuple of column values associated with index.
+	 * 
+	 * @param ix index of a particular distinct value
+	 * @return the tuple of column values associated with the specified index
+	 */
+	public byte[] getValues(int ix) {
+		return Arrays.copyOfRange(_values, ix * _numCols, (ix + 1) * _numCols);
+	}
+
+	public double getScale() {
+		return _scale;
+	}
+
+	/**
+	 * Obtain number of distinct values in the column.
+	 * 
+	 * @return number of distinct values in the column; this number is also the number of bitmaps, since there is one
+	 *         bitmap per value
+	 */
+	public int getNumValues() {
+		return _values.length / _numCols;
+	}
+
+	public IntArrayList getOffsetsList(int ix) {
+		return _offsetsLists[ix];
+	}
+
+	public long getNumOffsets() {
+		long ret = 0;
+		for(IntArrayList offlist : _offsetsLists)
+			ret += offlist.size();
+		return ret;
+	}
+
+	public int getNumOffsets(int ix) {
+		return _offsetsLists[ix].size();
+	}
+
+	@Override
+	public void sortValuesByFrequency() {
+		// TODO Auto-generated method stub
+		throw new NotImplementedException("Not Implemented Sorting of Lossy Bit Map");
+	}
+
+	@Override
+	public String toString() {
+		StringBuilder sb = new StringBuilder();
+		sb.append(super.toString());
+		sb.append("\nValues: " + Arrays.toString(_values));
+		sb.append("\ncolumns:" + _numCols);
+		sb.append("\nScale:  " + _scale);
+		sb.append("\nOffsets:" + Arrays.toString(_offsetsLists));
+		return sb.toString();
+	}
+
+	// UTIL FUNCTIONS
+
+	private static IntArrayList mergeOffsets(Queue<IntArrayList> offsets) {
+		if(offsets.size() == 1) {
+			return offsets.remove();
+		}
+		else {
+			IntArrayList h = offsets.remove();
+			IntArrayList t = offsets.remove();
+			IntArrayList n = mergeOffsets(h, t);
+			offsets.add(n);
+			return mergeOffsets(offsets);
+		}
+	}
+
+	private static IntArrayList mergeOffsets(IntArrayList h, IntArrayList t) {
+		int lhsSize = h.size(); // Size left
+		int rhsSize = t.size(); // Size right
+		int[] res = new int[lhsSize + rhsSize]; // Result array.
+		int[] lhs = h.extractValues(); // Left hand side values
+		int[] rhs = t.extractValues(); // Right hand side values
+		int lhsP = 0; // Left hand side pointer
+		int rhsP = 0; // Right hand side pointer
+		int p = 0; // Pointer in array.
+		while(lhsP < lhsSize || rhsP < rhsSize) {
+			if(lhsP < lhsSize && (rhsP == rhsSize || lhs[lhsP] < rhs[rhsP])) {
+				res[p++] = lhs[lhsP++];
+			}
+			else {
+				res[p++] = rhs[rhsP++];
+			}
+		}
+		return new IntArrayList(res);
+	}
+
+	@Override
+	public BitmapType getType() {
+		return BitmapType.Lossy;
+	}
+
+	/**
+	 * Utility method to scale all the values in the array to byte range
+	 * 
+	 * TODO make scaling parallel since each scaling is independent.
+	 * 
+	 * @param fp    doulbe array to scale
+	 * @param scale the scale to apply
+	 * @return the scaled values in byte
+	 */
+	public static byte[] scaleValues(double[] fp, double scale) {
+		byte[] res = new byte[fp.length];
+		for(int idx = 0; idx < fp.length; idx++) {
+			res[idx] = (byte) (fp[idx] / scale);
+		}
+		return res;
+	}
+}
diff --git a/src/main/java/org/apache/sysds/runtime/compress/utils/IntArrayList.java b/src/main/java/org/apache/sysds/runtime/compress/utils/IntArrayList.java
index a9b224d..25ee75b 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/utils/IntArrayList.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/utils/IntArrayList.java
@@ -29,8 +29,8 @@
 	private static final int RESIZE_FACTOR = 2;
 
 	private int[] _data = null;
-	private int _size = -1;
-	private int _val0 = -1;
+	private int _size;
+	private int _val0;
 
 	public IntArrayList() {
 		_data = null;
@@ -42,6 +42,11 @@
 		appendValue(value);
 	}
 
+	public IntArrayList(int[] values){
+		_data = values;
+		_size = values.length;
+	}
+
 	public int size() {
 		return _size;
 	}
@@ -94,4 +99,17 @@
 		// resize data array and copy existing contents
 		_data = Arrays.copyOf(_data, _data.length * RESIZE_FACTOR);
 	}
+
+	@Override
+	public String toString(){
+		StringBuilder sb = new StringBuilder();
+		sb.append("IntArrayList ");
+		sb.append("size: " + _size);
+		if(_size == 1){
+			sb.append(" [" + _val0+ "]");
+		} else{
+			sb.append(" " + Arrays.toString(_data));
+		}
+		return sb.toString();
+	}
 }
diff --git a/src/main/java/org/apache/sysds/runtime/functionobjects/Builtin.java b/src/main/java/org/apache/sysds/runtime/functionobjects/Builtin.java
index 8ff29ec..33aeae0 100644
--- a/src/main/java/org/apache/sysds/runtime/functionobjects/Builtin.java
+++ b/src/main/java/org/apache/sysds/runtime/functionobjects/Builtin.java
@@ -289,4 +289,9 @@
 			throw new DMLRuntimeException("Builtin.execute(): Unknown operation: " + bFunc);
 		}
 	}
+
+	@Override
+	public String toString(){
+		return "Builtin:" + bFunc;
+	}
 }
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/CPInstructionParser.java b/src/main/java/org/apache/sysds/runtime/instructions/CPInstructionParser.java
index 82a604e..f7838ae 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/CPInstructionParser.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/CPInstructionParser.java
@@ -437,7 +437,7 @@
 				return CovarianceCPInstruction.parseInstruction(str);
 
 			case Compression:
-				return (CPInstruction) CompressionCPInstruction.parseInstruction(str);
+				return CompressionCPInstruction.parseInstruction(str);
 			
 			case SpoofFused:
 				return SpoofCPInstruction.parseInstruction(str);
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/cp/CompressionCPInstruction.java b/src/main/java/org/apache/sysds/runtime/instructions/cp/CompressionCPInstruction.java
index 90ea352..56125d5 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/cp/CompressionCPInstruction.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/cp/CompressionCPInstruction.java
@@ -22,7 +22,6 @@
 import org.apache.sysds.hops.OptimizerUtils;
 import org.apache.sysds.runtime.compress.CompressedMatrixBlockFactory;
 import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
-import org.apache.sysds.runtime.instructions.Instruction;
 import org.apache.sysds.runtime.instructions.InstructionUtils;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.matrix.operators.Operator;
@@ -33,7 +32,7 @@
 		super(CPType.Compression, op, in, null, null, out, opcode, istr);
 	}
 
-	public static Instruction parseInstruction(String str) {
+	public static CompressionCPInstruction parseInstruction(String str) {
 		String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
 		String opcode = parts[0];
 		CPOperand in1 = new CPOperand(parts[1]);
@@ -46,7 +45,7 @@
 		// Get matrix block input
 		MatrixBlock in = ec.getMatrixInput(input1.getName());
 		// Compress the matrix block
-		MatrixBlock out = CompressedMatrixBlockFactory.compress(in, OptimizerUtils.getConstrainedNumThreads(-1));
+		MatrixBlock out = CompressedMatrixBlockFactory.compress(in, OptimizerUtils.getConstrainedNumThreads(-1)).getLeft();
 		// Set output and release input
 		ec.releaseMatrixInput(input1.getName());
 		ec.setMatrixOutput(output.getName(), out);
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/spark/CompressionSPInstruction.java b/src/main/java/org/apache/sysds/runtime/instructions/spark/CompressionSPInstruction.java
index f27325a..f0ff849 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/spark/CompressionSPInstruction.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/spark/CompressionSPInstruction.java
@@ -62,7 +62,7 @@
 
 		@Override
 		public MatrixBlock call(MatrixBlock arg0) throws Exception {
-			return CompressedMatrixBlockFactory.compress(arg0);
+			return CompressedMatrixBlockFactory.compress(arg0).getLeft();
 		}
 	}
 }
diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixCountDistinct.java b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixCountDistinct.java
index 0ae1b92..c078e36 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixCountDistinct.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixCountDistinct.java
@@ -21,7 +21,7 @@
 
 import java.util.Collections;
 import java.util.HashSet;
-import java.util.Iterator;
+import java.util.List;
 import java.util.PriorityQueue;
 import java.util.Set;
 
@@ -30,6 +30,10 @@
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysds.api.DMLException;
 import org.apache.sysds.runtime.DMLRuntimeException;
+import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
+import org.apache.sysds.runtime.compress.colgroup.ColGroup;
+import org.apache.sysds.runtime.data.DenseBlock;
+import org.apache.sysds.runtime.data.SparseBlock;
 import org.apache.sysds.runtime.matrix.operators.CountDistinctOperator;
 import org.apache.sysds.runtime.matrix.operators.CountDistinctOperator.CountDistinctTypes;
 import org.apache.sysds.utils.Hash;
@@ -75,9 +79,9 @@
 			throw new NotImplementedException("HyperLogLog not implemented");
 		}
 		// shortcut in simplest case.
-		if( in.getLength() == 1 || in.isEmpty() )
+		if(in.getLength() == 1 || in.isEmpty())
 			return 1;
-		else if( in.getNonZeros() < minimumSize ) {
+		else if(in.getNonZeros() < minimumSize) {
 			// Just use naive implementation if the number of nonZeros values size is small.
 			res = countDistinctValuesNaive(in);
 		}
@@ -93,9 +97,9 @@
 					throw new DMLException("Invalid or not implemented Estimator Type");
 			}
 		}
-		
+
 		if(res == 0)
-			throw new DMLRuntimeException("Imposible estimate of distinct values");
+			throw new DMLRuntimeException("Impossible estimate of distinct values");
 		return res;
 	}
 
@@ -109,30 +113,51 @@
 	 */
 	private static int countDistinctValuesNaive(MatrixBlock in) {
 		Set<Double> distinct = new HashSet<>();
-
-		// TODO performance: direct sparse block /dense block access
-		if(in.isInSparseFormat()) {
-			Iterator<IJV> it = in.getSparseBlockIterator();
-			while(it.hasNext()) {
-				distinct.add(it.next().getV());
+		double[] data;
+		long nonZeros = in.getNonZeros();
+		if(nonZeros < in.getNumColumns() * in.getNumRows()){
+			distinct.add(0d);
+		}
+		if(in.sparseBlock == null && in.denseBlock == null) {
+			List<ColGroup> colGroups = ((CompressedMatrixBlock) in).getColGroups();
+			for(ColGroup cg : colGroups) {
+				countDistinctValuesNaive(cg.getValues(), distinct);
 			}
-			if( in.getNonZeros() < in.getLength() )
-				distinct.add(0d);
+		}
+		else if(in.sparseBlock != null) {
+			SparseBlock sb = in.sparseBlock;
+			
+			if(in.sparseBlock.isContiguous()) {
+				data = sb.values(0);
+				countDistinctValuesNaive(data, distinct);
+			}
+			else {
+				for(int i = 0; i < in.getNumRows(); i++) {
+					if(!sb.isEmpty(i)) {
+						data = in.sparseBlock.values(i);
+						countDistinctValuesNaive(data, distinct);
+					}
+				}
+			}
 		}
 		else {
-			//TODO fix for large dense blocks, where this call will fail
-			double[] data = in.getDenseBlockValues();
-			if(data == null) {
-				throw new DMLRuntimeException("Not valid execution");
-			}
-			//TODO avoid redundantly adding zero if not entirly dense
-			for(double v : data) {
-				distinct.add(v);
+			DenseBlock db = in.denseBlock;
+			for(int i = 0; i <= db.numBlocks(); i++) {
+				data = db.valuesAt(i);
+				countDistinctValuesNaive(data, distinct);
 			}
 		}
+
 		return distinct.size();
 	}
 
+	private static Set<Double> countDistinctValuesNaive(double[] valuesPart, Set<Double> distinct) {
+		for(double v : valuesPart) {
+			distinct.add(v);
+		}
+		return distinct;
+	}
+
 	/**
 	 * KMV synopsis(for k minimum values) Distinct-Value Estimation
 	 * 
@@ -166,27 +191,7 @@
 		int k = D > 64 ? 64 : (int) D;
 		SmallestPriorityQueue spq = new SmallestPriorityQueue(k);
 
-		if(in.isInSparseFormat()) {
-			Iterator<IJV> it = in.getSparseBlockIterator();
-			while(it.hasNext()) {
-				double fullValue = it.next().getV();
-				int hash = Hash.hash(fullValue, op.hashType);
-				// Since Java does not have unsigned integer, the hash value is abs.
-				int v = (Math.abs(hash)) % (M - 1) + 1;
-				spq.add(v);
-			}
-			if( in.getNonZeros() < in.getLength() )
-				spq.add(Hash.hash(0d, op.hashType));
-		}
-		else {
-			//TODO fix for large dense blocks, where this call will fail
-			double[] data = in.getDenseBlockValues();
-			for(double fullValue : data) {
-				int hash = Hash.hash(fullValue, op.hashType);
-				int v = (Math.abs(hash)) % (M - 1) + 1;
-				spq.add(v);
-			}
-		}
+		countDistinctValuesKVM(in, op.hashType, k, spq, M);
 
 		LOG.debug("M: " + M);
 		LOG.debug("smallest hash:" + spq.peek());
@@ -201,11 +206,55 @@
 			double estimate = (double) (k - 1) / U_k;
 			LOG.debug("Estimate: " + estimate);
 			double ceilEstimate = Math.min(estimate, (double) D);
-			LOG.debug("Ceil worst case: " + ceilEstimate);
+			LOG.debug("Ceil worst case: " + D);
 			return (int) ceilEstimate;
 		}
 	}
 
+	private static void countDistinctValuesKVM(MatrixBlock in, HashType hashType, int k, SmallestPriorityQueue spq,
+		int m) {
+		double[] data;
+		if(in.sparseBlock == null && in.denseBlock == null) {
+			List<ColGroup> colGroups = ((CompressedMatrixBlock) in).getColGroups();
+			for(ColGroup cg : colGroups) {
+				countDistinctValuesKVM(cg.getValues(), hashType, k, spq, m);
+			}
+		}
+		else if(in.sparseBlock != null) {
+			SparseBlock sb = in.sparseBlock;
+			if(in.sparseBlock.isContiguous()) {
+				data = sb.values(0);
+				countDistinctValuesKVM(data, hashType, k, spq, m);
+			}
+			else {
+				for(int i = 0; i < in.getNumRows(); i++) {
+					if(!sb.isEmpty(i)) {
+						data = in.sparseBlock.values(i);
+						countDistinctValuesKVM(data, hashType, k, spq, m);
+					}
+				}
+			}
+		}
+		else {
+			DenseBlock db = in.denseBlock;
+			final int bil = db.index(0);
+			final int biu = db.index(in.rlen);
+			for(int i = bil; i <= biu; i++) {
+				data = db.valuesAt(i);
+				countDistinctValuesKVM(data, hashType, k, spq, m);
+			}
+		}
+	}
+
+	private static void countDistinctValuesKVM(double[] data, HashType hashType, int k, SmallestPriorityQueue spq,
+		int m) {
+		for(double fullValue : data) {
+			int hash = Hash.hash(fullValue, hashType);
+			int v = (Math.abs(hash)) % (m - 1) + 1;
+			spq.add(v);
+		}
+	}
+
 	/**
 	 * Deceiving name, but is used to contain the k smallest values inserted.
 	 * 
diff --git a/src/main/java/org/apache/sysds/runtime/util/DataConverter.java b/src/main/java/org/apache/sysds/runtime/util/DataConverter.java
index 086408f..fe72ebc 100644
--- a/src/main/java/org/apache/sysds/runtime/util/DataConverter.java
+++ b/src/main/java/org/apache/sysds/runtime/util/DataConverter.java
@@ -1349,6 +1349,13 @@
 			ret[i] = data[i];
 		return ret;
 	}
+
+	public static double[] toDouble(byte[] data) {
+		double[] ret = new double[data.length];
+		for(int i=0; i<data.length; i++)
+			ret[i] = data[i];
+		return ret;
+	}
 	
 	public static double[] toDouble(BitSet data, int len) {
 		double[] ret = new double[len];
diff --git a/src/test/java/org/apache/sysds/test/component/compress/AbstractCompressedUnaryTests.java b/src/test/java/org/apache/sysds/test/component/compress/AbstractCompressedUnaryTests.java
index 3f5d71b..3717be7 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/AbstractCompressedUnaryTests.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/AbstractCompressedUnaryTests.java
@@ -38,12 +38,12 @@
 public abstract class AbstractCompressedUnaryTests extends CompressedTestBase {
 
 	public AbstractCompressedUnaryTests(SparsityType sparType, ValueType valType, ValueRange valRange,
-		CompressionSettings compSettings, MatrixTypology matrixTypology) {
-		super(sparType, valType, valRange, compSettings, matrixTypology);
+		CompressionSettings compSettings, MatrixTypology matrixTypology, int parallelism) {
+		super(sparType, valType, valRange, compSettings, matrixTypology, parallelism);
 	}
 
 	enum AggType {
-		ROWSUMS, COLSUMS, SUM, ROWSUMSSQ, COLSUMSSQ, SUMSQ, ROWMAXS, COLMAXS, MAX, ROWMINS, COLMINS, MIN,
+		ROWSUMS, COLSUMS, SUM, ROWSUMSSQ, COLSUMSSQ, SUMSQ, ROWMAXS, COLMAXS, MAX, ROWMINS, COLMINS, MIN, MEAN
 	}
 
 	@Test
@@ -93,45 +93,55 @@
 
 	@Test
 	public void testUnaryOperator_ROWMINS() {
-		testUnaryOperators(AggType.MAX);
+		testUnaryOperators(AggType.ROWMINS);
 	}
 
 	@Test
 	public void testUnaryOperator_COLMINS() {
-		testUnaryOperators(AggType.MAX);
+		testUnaryOperators(AggType.COLMINS);
 	}
 
 	@Test
 	public void testUnaryOperator_MIN() {
-		testUnaryOperators(AggType.MAX);
+		testUnaryOperators(AggType.MIN);
 	}
 
-	protected AggregateUnaryOperator getUnaryOperator(AggType aggType, int k) {
+	@Test(expected = NotImplementedException.class)
+	public void testUnaryOperator_MEAN() {
+		// if Input was not compressed then just pass test
+		if(!(cmb instanceof CompressedMatrixBlock))
+			throw new NotImplementedException("Test Passed");
+		testUnaryOperators(AggType.MEAN);
+	}
+
+	protected AggregateUnaryOperator getUnaryOperator(AggType aggType, int threads) {
 		switch(aggType) {
 			case SUM:
-				return InstructionUtils.parseBasicAggregateUnaryOperator("uak+", k);
+				return InstructionUtils.parseBasicAggregateUnaryOperator("uak+", threads);
 			case ROWSUMS:
-				return InstructionUtils.parseBasicAggregateUnaryOperator("uark+", k);
+				return InstructionUtils.parseBasicAggregateUnaryOperator("uark+", threads);
 			case COLSUMS:
-				return InstructionUtils.parseBasicAggregateUnaryOperator("uack+", k);
+				return InstructionUtils.parseBasicAggregateUnaryOperator("uack+", threads);
 			case SUMSQ:
-				return InstructionUtils.parseBasicAggregateUnaryOperator("uasqk+", k);
+				return InstructionUtils.parseBasicAggregateUnaryOperator("uasqk+", threads);
 			case ROWSUMSSQ:
-				return InstructionUtils.parseBasicAggregateUnaryOperator("uarsqk+", k);
+				return InstructionUtils.parseBasicAggregateUnaryOperator("uarsqk+", threads);
 			case COLSUMSSQ:
-				return InstructionUtils.parseBasicAggregateUnaryOperator("uacsqk+", k);
+				return InstructionUtils.parseBasicAggregateUnaryOperator("uacsqk+", threads);
 			case MAX:
-				return InstructionUtils.parseBasicAggregateUnaryOperator("uamax", k);
+				return InstructionUtils.parseBasicAggregateUnaryOperator("uamax", threads);
 			case ROWMAXS:
-				return InstructionUtils.parseBasicAggregateUnaryOperator("uarmax", k);
+				return InstructionUtils.parseBasicAggregateUnaryOperator("uarmax", threads);
 			case COLMAXS:
-				return InstructionUtils.parseBasicAggregateUnaryOperator("uacmax", k);
+				return InstructionUtils.parseBasicAggregateUnaryOperator("uacmax", threads);
 			case MIN:
-				return InstructionUtils.parseBasicAggregateUnaryOperator("uamin", k);
+				return InstructionUtils.parseBasicAggregateUnaryOperator("uamin", threads);
 			case ROWMINS:
-				return InstructionUtils.parseBasicAggregateUnaryOperator("uarmin", k);
+				return InstructionUtils.parseBasicAggregateUnaryOperator("uarmin", threads);
 			case COLMINS:
-				return InstructionUtils.parseBasicAggregateUnaryOperator("uacmin", k);
+				return InstructionUtils.parseBasicAggregateUnaryOperator("uacmin", threads);
+			case MEAN:
+				return InstructionUtils.parseBasicAggregateUnaryOperator("uamean", threads);
 			default:
 				throw new NotImplementedException("Not Supported Aggregate Unary operator in test");
 		}
@@ -165,8 +175,7 @@
 				if(aggType == AggType.COLSUMS) {
 					TestUtils.compareMatrices(d1, d2, lossyTolerance * 30 * dim2);
 				}
-				else 
-				if(aggType == AggType.ROWSUMS) {
+				else if(aggType == AggType.ROWSUMS) {
 					TestUtils.compareMatrices(d1, d2, lossyTolerance * 16 * dim1);
 				}
 				else {
@@ -183,6 +192,9 @@
 				TestUtils.compareMatricesBitAvgDistance(d1, d2, 2048, 20, compressionSettings.toString());
 			}
 		}
+		catch(NotImplementedException e) {
+			throw e;
+		}
 		catch(Exception e) {
 			e.printStackTrace();
 			throw new RuntimeException(this.toString() + "\n" + e.getMessage(), e);
diff --git a/src/test/java/org/apache/sysds/test/component/compress/CompressedMatrixTest.java b/src/test/java/org/apache/sysds/test/component/compress/CompressedMatrixTest.java
index 7acf790..ff09d45 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/CompressedMatrixTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/CompressedMatrixTest.java
@@ -19,6 +19,7 @@
 
 package org.apache.sysds.test.component.compress;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import java.io.ByteArrayInputStream;
@@ -34,10 +35,11 @@
 import org.apache.sysds.runtime.compress.colgroup.ColGroup;
 import org.apache.sysds.runtime.functionobjects.Multiply;
 import org.apache.sysds.runtime.functionobjects.Plus;
+import org.apache.sysds.runtime.matrix.data.LibMatrixCountDistinct;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
-import org.apache.sysds.runtime.matrix.operators.AggregateBinaryOperator;
-import org.apache.sysds.runtime.matrix.operators.AggregateOperator;
 import org.apache.sysds.runtime.matrix.operators.AggregateUnaryOperator;
+import org.apache.sysds.runtime.matrix.operators.CountDistinctOperator;
+import org.apache.sysds.runtime.matrix.operators.CountDistinctOperator.CountDistinctTypes;
 import org.apache.sysds.runtime.matrix.operators.RightScalarOperator;
 import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
 import org.apache.sysds.runtime.util.DataConverter;
@@ -46,6 +48,7 @@
 import org.apache.sysds.test.component.compress.TestConstants.SparsityType;
 import org.apache.sysds.test.component.compress.TestConstants.ValueRange;
 import org.apache.sysds.test.component.compress.TestConstants.ValueType;
+import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
@@ -59,27 +62,7 @@
 
 	public CompressedMatrixTest(SparsityType sparType, ValueType valType, ValueRange valRange,
 		CompressionSettings compSettings, MatrixTypology matrixTypology) {
-		super(sparType, valType, valRange, compSettings, matrixTypology);
-	}
-
-	@Test
-	public void testConstruction() {
-		try {
-			if(!(cmb instanceof CompressedMatrixBlock)) {
-				return; // Input was not compressed then just pass test
-				// Assert.assertTrue("Compression Failed \n" + this.toString(), false);
-			}
-			if(compressionSettings.lossy) {
-				TestUtils.compareMatrices(input, deCompressed, lossyTolerance);
-			}
-			else {
-				TestUtils.compareMatricesBitAvgDistance(input, deCompressed, 0, 0, compressionSettings.toString());
-			}
-		}
-		catch(Exception e) {
-			e.printStackTrace();
-			throw new RuntimeException(this.toString() + "\n" + e.getMessage(), e);
-		}
+		super(sparType, valType, valRange, compSettings, matrixTypology, 1);
 	}
 
 	@Test
@@ -177,7 +160,7 @@
 					TestUtils.compareMatricesPercentageDistance(d1, d2, 0.95, 0.95, compressionSettings.toString());
 				}
 				else {
-					TestUtils.compareMatricesBitAvgDistance(d1, d2, 512, 32, compressionSettings.toString());
+					TestUtils.compareMatricesBitAvgDistance(d1, d2, 512, 350, compressionSettings.toString());
 				}
 			}
 		}
@@ -223,98 +206,6 @@
 	}
 
 	@Test
-	public void testMatrixVectorMult01() {
-		testMatrixVectorMult(1.0, 1.1);
-	}
-
-	@Test
-	public void testMatrixVectorMult02() {
-		testMatrixVectorMult(0.7, 1.0);
-	}
-
-	@Test
-	public void testMatrixVectorMult03() {
-		testMatrixVectorMult(-1.0, 1.0);
-	}
-
-	@Test
-	public void testMatrixVectorMult04() {
-		testMatrixVectorMult(1.0, 5.0);
-	}
-
-	public void testMatrixVectorMult(double min, double max) {
-		try {
-			if(!(cmb instanceof CompressedMatrixBlock))
-				return; // Input was not compressed then just pass test
-
-			MatrixBlock vector = DataConverter
-				.convertToMatrixBlock(TestUtils.generateTestMatrix(cols, 1, min, max, 1.0, 3));
-
-			// Make Operator
-			AggregateOperator aop = new AggregateOperator(0, Plus.getPlusFnObject());
-			AggregateBinaryOperator abop = new AggregateBinaryOperator(Multiply.getMultiplyFnObject(), aop);
-
-			// matrix-vector uncompressed
-			MatrixBlock ret1 = mb.aggregateBinaryOperations(mb, vector, new MatrixBlock(), abop);
-
-			// matrix-vector compressed
-			MatrixBlock ret2 = cmb.aggregateBinaryOperations(cmb, vector, new MatrixBlock(), abop);
-
-			// compare result with input
-			double[][] d1 = DataConverter.convertToDoubleMatrix(ret1);
-			double[][] d2 = DataConverter.convertToDoubleMatrix(ret2);
-
-			if(compressionSettings.lossy) {
-				// TODO Make actual calculation to know the actual tolerance
-				double scaledTolerance = lossyTolerance * 30 * max;
-				TestUtils.compareMatrices(d1, d2, scaledTolerance);
-			}
-			else {
-				TestUtils.compareMatricesBitAvgDistance(d1, d2, 2048, 5, compressionSettings.toString());
-			}
-		}
-		catch(Exception e) {
-			e.printStackTrace();
-			throw new RuntimeException(this.toString() + "\n" + e.getMessage(), e);
-		}
-	}
-
-	@Test
-	public void testVectorMatrixMult() {
-		try {
-			if(!(cmb instanceof CompressedMatrixBlock))
-				return; // Input was not compressed then just pass test
-
-			MatrixBlock vector = DataConverter
-				.convertToMatrixBlock(TestUtils.generateTestMatrix(1, rows, 0.5, 1.5, 1.0, 3));
-
-			// Make Operator
-			AggregateOperator aop = new AggregateOperator(0, Plus.getPlusFnObject());
-			AggregateBinaryOperator abop = new AggregateBinaryOperator(Multiply.getMultiplyFnObject(), aop);
-
-			// vector-matrix uncompressed
-			MatrixBlock ret1 = mb.aggregateBinaryOperations(vector, mb, new MatrixBlock(), abop);
-
-			// vector-matrix compressed
-			MatrixBlock ret2 = cmb.aggregateBinaryOperations(vector, cmb, new MatrixBlock(), abop);
-
-			// compare result with input
-			double[][] d1 = DataConverter.convertToDoubleMatrix(ret1);
-			double[][] d2 = DataConverter.convertToDoubleMatrix(ret2);
-			if(compressionSettings.lossy) {
-				TestUtils.compareMatricesPercentageDistance(d1, d2, 0.60, 0.97, compressionSettings.toString());
-			}
-			else {
-				TestUtils.compareMatricesBitAvgDistance(d1, d2, 10000, 500, compressionSettings.toString());
-			}
-		}
-		catch(Exception e) {
-			e.printStackTrace();
-			throw new RuntimeException(this.toString() + "\n" + e.getMessage(), e);
-		}
-	}
-
-	@Test
 	public void testScalarOperationsSparseUnsafe() {
 		try {
 			if(!(cmb instanceof CompressedMatrixBlock))
@@ -381,6 +272,37 @@
 		}
 	}
 
+	@Test
+	public void testCountDistinct() {
+		try {
+			if(!(cmb instanceof CompressedMatrixBlock))
+				return; // Input was not compressed then just pass test
+			// compare result with input
+
+			// matrix-scalar uncompressed
+			CountDistinctOperator op = new CountDistinctOperator(CountDistinctTypes.COUNT);
+			int ret1 = LibMatrixCountDistinct.estimateDistinctValues(mb, op);
+			// matrix-scalar compressed
+			int ret2 = LibMatrixCountDistinct.estimateDistinctValues(cmb, op);
+
+			// assertTrue(compressionSettings.toString(), ret1 == ret2);
+			String base = compressionSettings.toString() + "\n";
+			if(compressionSettings.lossy) {
+				// The number of distinct values should be significantly lower in lossy mode.
+				assertTrue(base + "estimate is less than actual", ret1 >= ret2);
+				assertTrue(base + "estimate is greater than 0", 0 < ret2);
+			}
+			else {
+				assertEquals(base, ret1, ret2);
+			}
+
+		}
+		catch(Exception e) {
+			e.printStackTrace();
+			throw new RuntimeException(this.toString() + "\n" + e.getMessage(), e);
+		}
+	}
+
 	@Override
 	public void testUnaryOperators(AggType aggType) {
 		AggregateUnaryOperator auop = super.getUnaryOperator(aggType, 1);
@@ -428,7 +350,7 @@
 		try {
 			if(!(cmb instanceof CompressedMatrixBlock))
 				return;
-			CompressionStatistics cStat = ((CompressedMatrixBlock) cmb).getCompressionStatistics();
+			CompressionStatistics cStat = cmbStats;
 			assertTrue("Compression ration if compressed should be larger than 1", cStat.ratio > 1);
 		}
 		catch(Exception e) {
@@ -442,7 +364,7 @@
 		try {
 			if(!(cmb instanceof CompressedMatrixBlock))
 				return;
-			CompressionStatistics cStat = ((CompressedMatrixBlock) cmb).getCompressionStatistics();
+			CompressionStatistics cStat = cmbStats;
 			long colsEstimate = cStat.estimatedSizeCols;
 			long actualSize = cStat.size;
 			long originalSize = cStat.originalSize;
@@ -471,15 +393,16 @@
 		}
 	}
 
+	@Ignore
 	@Test
 	public void testCompressionEstimationVSJolEstimate() {
 		try {
 			if(!(cmb instanceof CompressedMatrixBlock))
 				return;
-			CompressionStatistics cStat = ((CompressedMatrixBlock) cmb).getCompressionStatistics();
+			CompressionStatistics cStat = cmbStats;
 			long actualSize = cStat.size;
 			long originalSize = cStat.originalSize;
-			long JolEstimatedSize = getJolSize(((CompressedMatrixBlock) cmb));
+			long JolEstimatedSize = getJolSize(((CompressedMatrixBlock) cmb), cmbStats);
 
 			StringBuilder builder = new StringBuilder();
 			builder.append("\n\t" + String.format("%-40s - %12d", "Actual compressed size: ", actualSize));
@@ -511,7 +434,7 @@
 			if(!(cmb instanceof CompressedMatrixBlock))
 				return;
 
-			CompressionStatistics cStat = ((CompressedMatrixBlock) cmb).getCompressionStatistics();
+			CompressionStatistics cStat = cmbStats;
 
 			double compressRatio = cStat.ratio;
 			long actualSize = cStat.size;
@@ -533,11 +456,10 @@
 		}
 	}
 
-	private static long getJolSize(CompressedMatrixBlock cmb) {
+	private static long getJolSize(CompressedMatrixBlock cmb, CompressionStatistics cStat) {
 		Layouter l = new HotSpotLayouter(new X86_64_DataModel());
 		long jolEstimate = 0;
-		CompressionStatistics cStat = cmb.getCompressionStatistics();
-		for(Object ob : new Object[] {cmb, cStat, cStat.getColGroups(), cStat.getTimeArrayList(), cmb.getColGroups()}) {
+		for(Object ob : new Object[] {cmb, cmb.getColGroups()}) {
 			jolEstimate += ClassLayout.parseInstance(ob, l).instanceSize();
 		}
 		for(ColGroup cg : cmb.getColGroups()) {
@@ -546,26 +468,4 @@
 		return jolEstimate;
 	}
 
-	@SuppressWarnings("unused")
-	private static String getJolSizeString(CompressedMatrixBlock cmb) {
-		StringBuilder builder = new StringBuilder();
-		Layouter l = new HotSpotLayouter(new X86_64_DataModel());
-		long diff;
-		long jolEstimate = 0;
-		CompressionStatistics cStat = cmb.getCompressionStatistics();
-		for(Object ob : new Object[] {cmb, cStat, cStat.getColGroups(), cStat.getTimeArrayList(), cmb.getColGroups()}) {
-			ClassLayout cl = ClassLayout.parseInstance(ob, l);
-			diff = cl.instanceSize();
-			jolEstimate += diff;
-			builder.append(cl.toPrintable());
-			builder.append("TOTAL MEM: " + jolEstimate + " diff " + diff + "\n");
-		}
-		for(ColGroup cg : cmb.getColGroups()) {
-			diff = cg.estimateInMemorySize();
-			jolEstimate += diff;
-			builder.append(cg.getCompType());
-			builder.append("TOTAL MEM: " + jolEstimate + " diff " + diff + "\n");
-		}
-		return builder.toString();
-	}
 }
diff --git a/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java b/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java
index 990b83b..57d60d4 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java
@@ -23,104 +23,120 @@
 
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.List;
+import java.util.EnumSet;
 
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.sysds.lops.MapMultChain.ChainType;
 import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
 import org.apache.sysds.runtime.compress.CompressedMatrixBlockFactory;
 import org.apache.sysds.runtime.compress.CompressionSettings;
 import org.apache.sysds.runtime.compress.CompressionSettingsBuilder;
+import org.apache.sysds.runtime.compress.CompressionStatistics;
 import org.apache.sysds.runtime.compress.colgroup.ColGroup.CompressionType;
+import org.apache.sysds.runtime.functionobjects.Multiply;
+import org.apache.sysds.runtime.functionobjects.Plus;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+import org.apache.sysds.runtime.matrix.operators.AggregateBinaryOperator;
+import org.apache.sysds.runtime.matrix.operators.AggregateOperator;
 import org.apache.sysds.runtime.util.DataConverter;
+import org.apache.sysds.test.TestUtils;
 import org.apache.sysds.test.component.compress.TestConstants.MatrixTypology;
 import org.apache.sysds.test.component.compress.TestConstants.SparsityType;
 import org.apache.sysds.test.component.compress.TestConstants.ValueRange;
 import org.apache.sysds.test.component.compress.TestConstants.ValueType;
+import org.junit.Test;
 import org.junit.runners.Parameterized.Parameters;
 
-public class CompressedTestBase extends TestBase {
-
+public abstract class CompressedTestBase extends TestBase {
+	protected static final Log LOG = LogFactory.getLog(CompressedTestBase.class.getName());
 	protected static SparsityType[] usedSparsityTypes = new SparsityType[] { // Sparsity 0.9, 0.1, 0.01 and 0.0
+		// SparsityType.FULL,
 		SparsityType.DENSE,
-		// SparsityType.SPARSE,
+		SparsityType.SPARSE, 
 		// SparsityType.ULTRA_SPARSE,
 		// SparsityType.EMPTY
 	};
+
 	protected static ValueType[] usedValueTypes = new ValueType[] {
-		ValueType.RAND, 
-		ValueType.CONST,
-		ValueType.RAND_ROUND, 
-		ValueType.OLE_COMPRESSIBLE, 
-		ValueType.RLE_COMPRESSIBLE,
+		// ValueType.RAND,
+		// ValueType.CONST,
+		ValueType.RAND_ROUND,
+		//  ValueType.OLE_COMPRESSIBLE,
+		// ValueType.RLE_COMPRESSIBLE,
 	};
 
 	protected static ValueRange[] usedValueRanges = new ValueRange[] {
-		// ValueRange.SMALL,
+		// ValueRange.SMALL, 
 		ValueRange.LARGE,
+		// ValueRange.BYTE
 	};
 
-	private static List<CompressionType> DDCOnly = new ArrayList<>();
-	private static List<CompressionType> OLEOnly = new ArrayList<>();
-	private static List<CompressionType> RLEOnly = new ArrayList<>();
-	private static List<CompressionType> QuanOnly = new ArrayList<>();
-
-	static {
-		DDCOnly.add(CompressionType.DDC);
-		OLEOnly.add(CompressionType.OLE);
-		RLEOnly.add(CompressionType.RLE);
-		QuanOnly.add(CompressionType.QUAN);
-	}
-
 	private static final int compressionSeed = 7;
 
 	protected static CompressionSettings[] usedCompressionSettings = new CompressionSettings[] {
 		// new CompressionSettingsBuilder().setSamplingRatio(0.1).setAllowSharedDDCDictionary(false)
-		// 	.setSeed(compressionSeed).setValidCompressions(DDCOnly).setInvestigateEstimate(true).create(),
-		// new CompressionSettingsBuilder().setSamplingRatio(0.1).setAllowSharedDDCDictionary(true)
-		// 	.setSeed(compressionSeed).setValidCompressions(DDCOnly).setInvestigateEstimate(true).create(),
-		// new CompressionSettingsBuilder().setSamplingRatio(0.1).setSeed(compressionSeed).setValidCompressions(OLEOnly)
-		// 	.setInvestigateEstimate(true).create(),
-		// new CompressionSettingsBuilder().setSamplingRatio(0.1).setSeed(compressionSeed).setValidCompressions(RLEOnly)
-		// 	.setInvestigateEstimate(true).create(),
-		new CompressionSettingsBuilder().setSamplingRatio(1.0).setSeed(compressionSeed).setInvestigateEstimate(true)
+		// .setSeed(compressionSeed).setValidCompressions(EnumSet.of(CompressionType.DDC)).setInvestigateEstimate(true).create(),
+		new CompressionSettingsBuilder().setSamplingRatio(0.1)//.setAllowSharedDDCDictionary(true)
+			.setSeed(compressionSeed).setValidCompressions(EnumSet.of(CompressionType.DDC)).setInvestigateEstimate(true)
 			.create(),
-		new CompressionSettingsBuilder().setSamplingRatio(1.0).setSeed(compressionSeed).setValidCompressions(QuanOnly)
-			.setInvestigateEstimate(true).create()
-		};
+		new CompressionSettingsBuilder().setSamplingRatio(0.1).setSeed(compressionSeed)
+			.setValidCompressions(EnumSet.of(CompressionType.OLE)).setInvestigateEstimate(true).create(),
+		new CompressionSettingsBuilder().setSamplingRatio(0.1).setSeed(compressionSeed)
+			.setValidCompressions(EnumSet.of(CompressionType.RLE)).setInvestigateEstimate(true).create(),
+		new CompressionSettingsBuilder().setSamplingRatio(0.1).setSeed(compressionSeed).setInvestigateEstimate(true)
+			.create(),
+		// new CompressionSettingsBuilder().setSamplingRatio(1.0).setSeed(compressionSeed).setInvestigateEstimate(true)
+		// .addValidCompression(CompressionType.QUAN).create(),
+		new CompressionSettingsBuilder().setSamplingRatio(1.0).setSeed(compressionSeed).setInvestigateEstimate(true)
+		.setAllowSharedDDCDictionary(false).setmaxStaticColGroupCoCode(1).create(),
+		new CompressionSettingsBuilder().setSamplingRatio(1.0).setSeed(compressionSeed).setInvestigateEstimate(true)
+		.setAllowSharedDDCDictionary(false).setmaxStaticColGroupCoCode(1).setLossy(true).create(),
+		// new CompressionSettingsBuilder().setSamplingRatio(1.0).setSeed(compressionSeed).setInvestigateEstimate(true)
+		// .setAllowSharedDDCDictionary(false).setmaxStaticColGroupCoCode(20).create(),
+		// new CompressionSettingsBuilder().setSamplingRatio(1.0).setSeed(compressionSeed).setInvestigateEstimate(true)
+		// .setAllowSharedDDCDictionary(false).setmaxStaticColGroupCoCode(20).setLossy(true).create()
+	};
 
 	protected static MatrixTypology[] usedMatrixTypology = new MatrixTypology[] { // Selected Matrix Types
-		MatrixTypology.SMALL, MatrixTypology.FEW_COL,
+		// MatrixTypology.SMALL,
+		// MatrixTypology.FEW_COL,
 		// MatrixTypology.FEW_ROW,
 		MatrixTypology.LARGE,
 		// MatrixTypology.SINGLE_COL,
 		// MatrixTypology.SINGLE_ROW,
-		MatrixTypology.L_ROWS,
+		// MatrixTypology.L_ROWS,
 		// MatrixTypology.XL_ROWS,
 	};
 
 	// Compressed Block
 	protected MatrixBlock cmb;
+	protected CompressionStatistics cmbStats;
 
 	// Decompressed Result
 	protected MatrixBlock cmbDeCompressed;
 	protected double[][] deCompressed;
 
-	// Threads
-	protected int k = 1;
+	/** Method returning the number of threads used for the operation */
+	protected final int _k;
 
 	protected int sampleTolerance = 1024;
 
 	protected double lossyTolerance;
 
 	public CompressedTestBase(SparsityType sparType, ValueType valType, ValueRange valueRange,
-		CompressionSettings compSettings, MatrixTypology MatrixTypology) {
+		CompressionSettings compSettings, MatrixTypology MatrixTypology, int parallelism) {
 		super(sparType, valType, valueRange, compSettings, MatrixTypology);
+		_k = parallelism;
 
 		try {
 			if(compSettings.lossy)
 				setLossyTolerance(valueRange);
-			cmb = CompressedMatrixBlockFactory.compress(mb, k, compressionSettings);
-
+			Pair<MatrixBlock, CompressionStatistics> pair = CompressedMatrixBlockFactory
+				.compress(mb, _k, compressionSettings);
+			cmb = pair.getLeft();
+			cmbStats = pair.getRight();
 			if(cmb instanceof CompressedMatrixBlock) {
 				cmbDeCompressed = ((CompressedMatrixBlock) cmb).decompress();
 				if(cmbDeCompressed != null) {
@@ -161,7 +177,6 @@
 					for(CompressionSettings cs : usedCompressionSettings) {
 						for(MatrixTypology mt : usedMatrixTypology) {
 							tests.add(new Object[] {st, vt, vr, cs, mt});
-
 						}
 					}
 				}
@@ -170,4 +185,155 @@
 
 		return tests;
 	}
+
+	// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+	// %%%%%%%%%%%%%%%%% TESTS START! %%%%%%%%%%%%%%%%%
+	// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+	@Test
+	public void testConstruction() {
+		try {
+			if(!(cmb instanceof CompressedMatrixBlock)) {
+				return; // Input was not compressed then just pass test
+				// Assert.assertTrue("Compression Failed \n" + this.toString(), false);
+			}
+			if(compressionSettings.lossy) {
+				TestUtils.compareMatrices(input, deCompressed, lossyTolerance);
+			}
+			else {
+				TestUtils.compareMatricesBitAvgDistance(input, deCompressed, 0, 0, compressionSettings.toString());
+			}
+		}
+		catch(Exception e) {
+			e.printStackTrace();
+			throw new RuntimeException(this.toString() + "\n" + e.getMessage(), e);
+		}
+	}
+
+	@Test
+	public void testDecompress() {
+		try {
+			if(!(cmb instanceof CompressedMatrixBlock)) {
+				return; // Input was not compressed then just pass test
+				// Assert.assertTrue("Compression Failed \n" + this.toString(), false);
+			}
+			double[][] deCompressed = DataConverter.convertToDoubleMatrix(((CompressedMatrixBlock) cmb).decompress(_k));
+			if(compressionSettings.lossy) {
+				TestUtils.compareMatrices(input, deCompressed, lossyTolerance);
+			}
+			else {
+				TestUtils.compareMatricesBitAvgDistance(input, deCompressed, 0, 0, compressionSettings.toString());
+			}
+		}
+		catch(Exception e) {
+			e.printStackTrace();
+			throw new RuntimeException(this.toString() + "\n" + e.getMessage(), e);
+		}
+	}
+
+	@Test
+	public void testMatrixMultChain() {
+		try {
+			if(!(cmb instanceof CompressedMatrixBlock))
+				return; // Input was not compressed then just pass test
+
+			MatrixBlock vector1 = DataConverter
+				.convertToMatrixBlock(TestUtils.generateTestMatrix(cols, 1, 0.5, 1.5, 1.0, 3));
+
+			// ChainType ctype = ChainType.XtwXv;
+			// Linear regression .
+			for(ChainType ctype : new ChainType[] {ChainType.XtwXv, ChainType.XtXv,
+				// ChainType.XtXvy
+			}) {
+
+				MatrixBlock vector2 = (ctype == ChainType.XtwXv) ? DataConverter
+					.convertToMatrixBlock(TestUtils.generateTestMatrix(rows, 1, 0.5, 1.5, 1.0, 3)) : null;
+
+				// matrix-vector uncompressed
+				MatrixBlock ret1 = mb.chainMatrixMultOperations(vector1, vector2, new MatrixBlock(), ctype, _k);
+
+				// matrix-vector compressed
+				MatrixBlock ret2 = cmb.chainMatrixMultOperations(vector1, vector2, new MatrixBlock(), ctype, _k);
+
+				// compare result with input
+				double[][] d1 = DataConverter.convertToDoubleMatrix(ret1);
+				double[][] d2 = DataConverter.convertToDoubleMatrix(ret2);
+
+				if(compressionSettings.lossy) {
+					// TODO Make actual calculation to know the tolerance
+					// double scaledTolerance = lossyTolerance * d1.length * d1.length * 1.5;
+					// if(ctype == ChainType.XtwXv){
+					// scaledTolerance *= d1.length * d1.length * 0.5;
+					// }
+					// TestUtils.compareMatrices(d1, d2, d1.length, d1[0].length, scaledTolerance );
+					TestUtils.compareMatricesPercentageDistance(d1, d2, 0.95, 0.95, compressionSettings.toString());
+				}
+				else {
+					TestUtils.compareMatricesBitAvgDistance(d1, d2, 2048, 350, compressionSettings.toString());
+				}
+			}
+		}
+		catch(Exception e) {
+			e.printStackTrace();
+			throw new RuntimeException(this.toString() + "\n" + e.getMessage(), e);
+		}
+	}
+
+	@Test
+	public void testMatrixVectorMult01() {
+		testMatrixVectorMult(1.0, 1.1);
+	}
+
+	@Test
+	public void testMatrixVectorMult02() {
+		testMatrixVectorMult(0.7, 1.0);
+	}
+
+	@Test
+	public void testMatrixVectorMult03() {
+		testMatrixVectorMult(-1.0, 1.0);
+	}
+
+	@Test
+	public void testMatrixVectorMult04() {
+		testMatrixVectorMult(1.0, 5.0);
+	}
+
+	public void testMatrixVectorMult(double min, double max) {
+		try {
+			if(!(cmb instanceof CompressedMatrixBlock))
+				return; // Input was not compressed then just pass test
+
+			MatrixBlock vector = DataConverter
+				.convertToMatrixBlock(TestUtils.generateTestMatrix(cols, 1, min, max, 1.0, 3));
+
+			// Make Operator // matrix-vector uncompressed
+			// AggregateBinaryOperator abop = InstructionUtils.getMatMultOperator(_k);
+			AggregateOperator aop = new AggregateOperator(0, Plus.getPlusFnObject());
+			AggregateBinaryOperator abop = new AggregateBinaryOperator(Multiply.getMultiplyFnObject(), aop);
+
+			// matrix-vector uncompressed
+			MatrixBlock ret1 = mb.aggregateBinaryOperations(mb, vector, new MatrixBlock(), abop);
+
+			// matrix-vector compressed
+			MatrixBlock ret2 = cmb.aggregateBinaryOperations(cmb, vector, new MatrixBlock(), abop);
+
+			// compare result with input
+			double[][] d1 = DataConverter.convertToDoubleMatrix(ret1);
+			double[][] d2 = DataConverter.convertToDoubleMatrix(ret2);
+
+			if(compressionSettings.lossy) {
+				// TODO Make actual calculation to know the actual tolerance
+				double scaledTolerance = lossyTolerance * 30 * max;
+				TestUtils.compareMatrices(d1, d2, scaledTolerance);
+			}
+			else {
+				TestUtils.compareMatricesBitAvgDistance(d1, d2, 120000, 128, compressionSettings.toString());
+			}
+		}
+		catch(Exception e) {
+			e.printStackTrace();
+			throw new RuntimeException(this.toString() + "\n" + e.getMessage(), e);
+		}
+	}
 }
diff --git a/src/test/java/org/apache/sysds/test/component/compress/CompressedVectorTest.java b/src/test/java/org/apache/sysds/test/component/compress/CompressedVectorTest.java
index 2607b92..0f42ac4 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/CompressedVectorTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/CompressedVectorTest.java
@@ -33,6 +33,7 @@
 import org.apache.sysds.test.component.compress.TestConstants.SparsityType;
 import org.apache.sysds.test.component.compress.TestConstants.ValueRange;
 import org.apache.sysds.test.component.compress.TestConstants.ValueType;
+import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
@@ -41,11 +42,17 @@
 @RunWith(value = Parameterized.class)
 public class CompressedVectorTest extends CompressedTestBase {
 
+	private final int _k = 1;
+
 	protected static MatrixTypology[] usedMatrixTypologyLocal = new MatrixTypology[] {// types
 		MatrixTypology.SINGLE_COL,
 		// MatrixTypology.SINGLE_COL_L
 	};
 
+	protected int getK(){
+		return _k;
+	}
+
 	@Parameters
 	public static Collection<Object[]> data() {
 		ArrayList<Object[]> tests = new ArrayList<>();
@@ -65,9 +72,11 @@
 
 	public CompressedVectorTest(SparsityType sparType, ValueType valType, ValueRange valRange,
 		CompressionSettings compSettings, MatrixTypology matrixTypology) {
-		super(sparType, valType, valRange, compSettings, matrixTypology);
+		super(sparType, valType, valRange, compSettings, matrixTypology, 1);
 	}
 
+
+	@Ignore
 	@Test
 	public void testCentralMoment() throws Exception {
 		// TODO: Make Central Moment Test work on Multi dimensional Matrix
@@ -96,6 +105,7 @@
 		}
 	}
 
+	@Ignore
 	@Test
 	public void testQuantile() {
 		try {
diff --git a/src/test/java/org/apache/sysds/test/component/compress/CompressibleInputGenerator.java b/src/test/java/org/apache/sysds/test/component/compress/CompressibleInputGenerator.java
index 54c9414..be78e2f 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/CompressibleInputGenerator.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/CompressibleInputGenerator.java
@@ -20,9 +20,7 @@
 package org.apache.sysds.test.component.compress;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
-import java.util.DoubleSummaryStatistics;
 import java.util.List;
 import java.util.Random;
 
@@ -32,29 +30,28 @@
 import org.apache.sysds.runtime.util.DataConverter;
 
 /**
- * WARNING, this compressible input generator generates transposed inputs, (rows
- * and cols are switched) this is because then the test does not need to
- * transpose the input for the colGroups that expect transposed inputs.
+ * WARNING, this compressible input generator generates transposed inputs, (rows and cols are switched) this is because
+ * then the test does not need to transpose the input for the colGroups that expect transposed inputs.
  * 
  */
 public class CompressibleInputGenerator {
 
-	public static MatrixBlock getInput(int rows, int cols, CompressionType ct, int nrUnique, 
-			double sparsity, int seed) {
+	public static MatrixBlock getInput(int rows, int cols, CompressionType ct, int nrUnique, double sparsity,
+		int seed) {
 		double[][] output = getInputDoubleMatrix(rows, cols, ct, nrUnique, 1000000, -1000000, sparsity, seed, false);
 		return DataConverter.convertToMatrixBlock(output);
 	}
 
 	public static MatrixBlock getInput(int rows, int cols, CompressionType ct, int nrUnique, int max, int min,
-			double sparsity, int seed) {
+		double sparsity, int seed) {
 		double[][] output = getInputDoubleMatrix(rows, cols, ct, nrUnique, max, min, sparsity, seed, false);
 		return DataConverter.convertToMatrixBlock(output);
 	}
 
 	public static double[][] getInputDoubleMatrix(int rows, int cols, CompressionType ct, int nrUnique, int max,
-			int min, double sparsity, int seed, boolean transpose) {
+		int min, double sparsity, int seed, boolean transpose) {
 		double[][] output;
-		switch (ct) {
+		switch(ct) {
 			case RLE:
 				output = rle(rows, cols, nrUnique, max, min, sparsity, seed, transpose);
 				break;
@@ -64,56 +61,48 @@
 			default:
 				throw new NotImplementedException("Not implemented generator.");
 		}
-		for(double[] x : output){
-			DoubleSummaryStatistics dss =  Arrays.stream(x).summaryStatistics();
-			if(dss.getMax() > max) {
-				throw new RuntimeException("Incorrect matrix generated "+ct+", max to high was: " + dss.getMax() + " should be :" + max);
-			}
-			if(dss.getMin() < min) {
-				throw new RuntimeException("Incorrect matrix generated "+ct+", min to low was: " + dss.getMin()  + " should be :" + min);
-			}
-		}
+
 		return output;
 	}
 
 	private static double[][] rle(int rows, int cols, int nrUnique, int max, int min, double sparsity, int seed,
-			boolean transpose) {
+		boolean transpose) {
 
 		Random r = new Random(seed);
 		List<Double> values = getNRandomValues(nrUnique, r, max, min);
 
 		double[][] matrix = transpose ? new double[rows][cols] : new double[cols][rows];
 
-		for (int colNr = 0; colNr < cols; colNr++) {
+		for(int colNr = 0; colNr < cols; colNr++) {
 			Collections.shuffle(values, r);
 
 			// Generate a Dirichlet distribution, to distribute the values
 			int[] occurences = makeDirichletDistribution(nrUnique, rows, r);
 
-			// double[] col = new double[rows];
-
 			int pointer = 0;
 			int valuePointer = 0;
-			for (int nr : occurences) {
+			for(int nr : occurences) {
 				int zeros = (int) (Math.floor(nr * (1.0 - sparsity)));
 				int before = (zeros > 0) ? r.nextInt(zeros) : 0;
 				int after = zeros - before;
 				pointer += before;
-				for (int i = before; i < nr - after; i++) {
-					if (transpose) {
+				for(int i = before; i < nr - after; i++) {
+					if(transpose) {
 						matrix[pointer][colNr] = values.get(valuePointer);
-					} else {
+					}
+					else {
 						matrix[colNr][pointer] = values.get(valuePointer);
 					}
 					pointer++;
 				}
 				pointer += after;
 				valuePointer++;
-				if (valuePointer == values.size() && after == 0) {
-					while (pointer < rows) {
-						if (transpose) {
+				if(valuePointer == values.size() && after == 0) {
+					while(pointer < rows) {
+						if(transpose) {
 							matrix[pointer][colNr] = values.get(nrUnique - 1);
-						} else {
+						}
+						else {
 							matrix[colNr][pointer] = values.get(nrUnique - 1);
 						}
 						pointer++;
@@ -125,52 +114,55 @@
 	}
 
 	/**
-	 * Note ole compress the best if there are multiple correlated columns.
-	 * Therefore the multiple columns are needed for good compressions. Also Nr
-	 * Unique is only associated to a specific column in this compression, so the
-	 * number of uniques are only in a single column, making actual the nrUnique
-	 * (cols * nrUnique) Does not guaranty that all the nr uniques are in use, since
-	 * the values are randomly selected.
+	 * Note ole compress the best if there are multiple correlated columns. Therefore the multiple columns are needed
+	 * for good compressions. Also Nr Unique is only associated to a specific column in this compression, so the number
+	 * of uniques are only in a single column, making actual the nrUnique (cols * nrUnique) Does not guaranty that all
+	 * the nr uniques are in use, since the values are randomly selected.
 	 * 
 	 * @param rows      Number of rows in generated output
 	 * @param cols      Number of cols in generated output
-	 * @param nrUnique  Number of unique values in generated output, Note this means
-	 *                  base unique in this case. and this number will grow
-	 *                  according to sparsity as well.
+	 * @param nrUnique  Number of unique values in generated output, Note this means base unique in this case. and this
+	 *                  number will grow according to sparsity as well.
 	 * @param max       The Maximum Value contained
 	 * @param min       The Minimum value contained
-	 * @param sparsity  The sparsity of the generated matrix
+	 * @param sparsity  The sparsity of the generated matrix (only applicable to the first column)
 	 * @param seed      The seed of the generated matrix
 	 * @param transpose If the output should be a transposed matrix or not
 	 * @return Generated nicely compressible OLE col Group.
 	 */
 	private static double[][] ole(int rows, int cols, int nrUnique, int max, int min, double sparsity, int seed,
-			boolean transpose) {
+		boolean transpose) {
 		// chose some random values
 		Random r = new Random(seed);
 		List<Double> values = getNRandomValues(nrUnique, r, max, min);
 		double[][] matrix = transpose ? new double[rows][cols] : new double[cols][rows];
 
 		// Generate the first column.
-		for (int x = 0; x < rows; x++) {
-			if (r.nextDouble() < sparsity) {
-				if (transpose) {
+		for(int x = 0; x < rows; x++) {
+			if(r.nextDouble() < sparsity) {
+				if(transpose) {
 					matrix[x][0] = values.get(r.nextInt(nrUnique));
-				} else {
+				}
+				else {
 					matrix[0][x] = values.get(r.nextInt(nrUnique));
 				}
 			}
 		}
 
-		for (int y = 1; y < cols; y++) {
-			for (int x = 0; x < rows; x++) {
-				if (r.nextDouble() < sparsity) {
-					if (transpose) {
-						matrix[x][y] = matrix[x][0];
-					} else {
-						matrix[y][x] = matrix[0][x];
+		for(int y = 1; y < cols; y++) {
+			for(int x = 0; x < rows; x++) {
+				// if(r.nextDouble() < sparsity) {
+				if(transpose) {
+					if(matrix[x][0] != 0) {
+						matrix[x][y] = (matrix[x][0] * y + y) % (max - min) + min;
 					}
 				}
+				else {
+					if(matrix[0][x] != 0) {
+						matrix[y][x] = (matrix[0][x] * y + y) % (max - min) + min;
+					}
+				}
+				// }
 			}
 		}
 		return matrix;
@@ -179,13 +171,13 @@
 	private static int[] makeDirichletDistribution(int nrUnique, int rows, Random r) {
 		double[] distribution = new double[nrUnique];
 		double sum = 0;
-		for (int i = 0; i < nrUnique; i++) {
+		for(int i = 0; i < nrUnique; i++) {
 			distribution[i] = r.nextDouble();
 			sum += distribution[i];
 		}
 
 		int[] occurences = new int[nrUnique];
-		for (int i = 0; i < nrUnique; i++) {
+		for(int i = 0; i < nrUnique; i++) {
 			occurences[i] = (int) (((double) distribution[i] / (double) sum) * (double) rows);
 		}
 		return occurences;
@@ -193,9 +185,9 @@
 
 	private static List<Double> getNRandomValues(int nrUnique, Random r, int max, int min) {
 		List<Double> values = new ArrayList<>();
-		for (int i = 0; i < nrUnique; i++) {
-			double v = (r.nextDouble() * (double)(max - min)) + (double)min;
-			values.add( Math.floor(v));
+		for(int i = 0; i < nrUnique; i++) {
+			double v = (r.nextDouble() * (double) (max - min)) + (double) min;
+			values.add(Math.floor(v));
 		}
 		return values;
 	}
diff --git a/src/test/java/org/apache/sysds/test/component/compress/ParCompressedMatrixTest.java b/src/test/java/org/apache/sysds/test/component/compress/ParCompressedMatrixTest.java
index 8ed8f01..23fd604 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/ParCompressedMatrixTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/ParCompressedMatrixTest.java
@@ -20,7 +20,6 @@
 package org.apache.sysds.test.component.compress;
 
 import org.apache.sysds.lops.MMTSJ.MMTSJType;
-import org.apache.sysds.lops.MapMultChain.ChainType;
 import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
 import org.apache.sysds.runtime.compress.CompressionSettings;
 import org.apache.sysds.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
@@ -41,32 +40,10 @@
 @RunWith(value = Parameterized.class)
 public class ParCompressedMatrixTest extends AbstractCompressedUnaryTests {
 
-	private int k = InfrastructureAnalyzer.getLocalParallelism();
 
 	public ParCompressedMatrixTest(SparsityType sparType, ValueType valType, ValueRange valRange,
 		CompressionSettings compressionSettings, MatrixTypology matrixTypology) {
-		super(sparType, valType, valRange, compressionSettings, matrixTypology);
-	}
-
-	@Test
-	public void testConstruction() {
-		try {
-			if(!(cmb instanceof CompressedMatrixBlock)) {
-				// TODO Compress EVERYTHING!
-				return; // Input was not compressed then just pass test
-				// Assert.assertTrue("Compression Failed \n" + this.toString(), false);
-			}
-			if(compressionSettings.lossy) {
-				TestUtils.compareMatrices(input, deCompressed, lossyTolerance);
-			}
-			else {
-				TestUtils.compareMatricesBitAvgDistance(input, deCompressed, rows, cols, 0, 0);
-			}
-		}
-		catch(Exception e) {
-			e.printStackTrace();
-			throw new RuntimeException(this.toString() + "\n" + e.getMessage(), e);
-		}
+		super(sparType, valType, valRange, compressionSettings, matrixTypology, InfrastructureAnalyzer.getLocalParallelism());
 	}
 
 	@Test
@@ -95,46 +72,6 @@
 	}
 
 	@Test
-	public void testMatrixMultChain() {
-		try {
-			if(!(cmb instanceof CompressedMatrixBlock))
-				return; // Input was not compressed then just pass test
-
-			MatrixBlock vector1 = DataConverter
-				.convertToMatrixBlock(TestUtils.generateTestMatrix(cols, 1, 0.5, 1.5, 1.0, 3));
-
-			// ChainType ctype = ChainType.XtwXv;
-			for(ChainType ctype : new ChainType[] {ChainType.XtwXv, ChainType.XtXv,
-				// ChainType.XtXvy
-			}) {
-
-				MatrixBlock vector2 = (ctype == ChainType.XtwXv) ? DataConverter
-					.convertToMatrixBlock(TestUtils.generateTestMatrix(rows, 1, 0.5, 1.5, 1.0, 3)) : null;
-
-				// matrix-vector uncompressed
-				MatrixBlock ret1 = mb.chainMatrixMultOperations(vector1, vector2, new MatrixBlock(), ctype, k);
-
-				// matrix-vector compressed
-				MatrixBlock ret2 = cmb.chainMatrixMultOperations(vector1, vector2, new MatrixBlock(), ctype, k);
-
-				// compare result with input
-				double[][] d1 = DataConverter.convertToDoubleMatrix(ret1);
-				double[][] d2 = DataConverter.convertToDoubleMatrix(ret2);
-				if(compressionSettings.lossy) {
-					TestUtils.compareMatricesPercentageDistance(d1, d2, 0.92, 0.95, compressionSettings.toString());
-				}
-				else {
-					TestUtils.compareMatricesBitAvgDistance(d1, d2, 2048, 32, compressionSettings.toString());
-				}
-			}
-		}
-		catch(Exception e) {
-			e.printStackTrace();
-			throw new RuntimeException(this.toString() + "\n" + e.getMessage(), e);
-		}
-	}
-
-	@Test
 	public void testTransposeSelfMatrixMult() {
 		try {
 			if(!(cmb instanceof CompressedMatrixBlock))
@@ -144,10 +81,10 @@
 				// MMTSJType.RIGHT
 			}) {
 				// matrix-vector uncompressed
-				MatrixBlock ret1 = mb.transposeSelfMatrixMultOperations(new MatrixBlock(), mType, k);
+				MatrixBlock ret1 = mb.transposeSelfMatrixMultOperations(new MatrixBlock(), mType, _k);
 
 				// matrix-vector compressed
-				MatrixBlock ret2 = cmb.transposeSelfMatrixMultOperations(new MatrixBlock(), mType, k);
+				MatrixBlock ret2 = cmb.transposeSelfMatrixMultOperations(new MatrixBlock(), mType, _k);
 
 				// compare result with input
 				double[][] d1 = DataConverter.convertToDoubleMatrix(ret1);
@@ -171,54 +108,6 @@
 	}
 
 	@Test
-	public void testMatrixVectorMult02() {
-		testMatrixVectorMult(0.7, 1.0);
-	}
-
-	@Test
-	public void testMatrixVectorMult03() {
-		testMatrixVectorMult(-1.0, 1.0);
-	}
-
-	@Test
-	public void testMatrixVectorMult04() {
-		testMatrixVectorMult(1.0, 5.0);
-	}
-
-	public void testMatrixVectorMult(double min, double max) {
-		try {
-			if(!(cmb instanceof CompressedMatrixBlock))
-				return; // Input was not compressed then just pass test
-
-			MatrixBlock vector = DataConverter
-				.convertToMatrixBlock(TestUtils.generateTestMatrix(cols, 1, min, max, 1.0, 3));
-
-			// matrix-vector uncompressed
-			AggregateBinaryOperator abop = InstructionUtils.getMatMultOperator(k);
-			MatrixBlock ret1 = mb.aggregateBinaryOperations(mb, vector, new MatrixBlock(), abop);
-
-			// matrix-vector compressed
-			MatrixBlock ret2 = cmb.aggregateBinaryOperations(cmb, vector, new MatrixBlock(), abop);
-
-			// compare result with input
-			double[][] d1 = DataConverter.convertToDoubleMatrix(ret1);
-			double[][] d2 = DataConverter.convertToDoubleMatrix(ret2);
-			if(compressionSettings.lossy) {
-				// TODO Make actual calculation to know the actual tolerance
-				double scaledTolerance = lossyTolerance * 30 * max;
-				TestUtils.compareMatrices(d1, d2, scaledTolerance);
-			}
-			else {
-				TestUtils.compareMatricesBitAvgDistance(d1, d2, 2048, 5, compressionSettings.toString());
-			}
-		}
-		catch(Exception e) {
-			e.printStackTrace();
-			throw new RuntimeException(this.toString() + "\n" + e.getMessage(), e);
-		}
-	}
-
-	@Test
 	public void testVectorMatrixMult() {
 		try {
 			if(!(cmb instanceof CompressedMatrixBlock))
@@ -228,7 +117,7 @@
 				.convertToMatrixBlock(TestUtils.generateTestMatrix(1, rows, 1, 1, 1.0, 3));
 
 			// Make Operator
-			AggregateBinaryOperator abop = InstructionUtils.getMatMultOperator(k);
+			AggregateBinaryOperator abop = InstructionUtils.getMatMultOperator(_k);
 
 			// vector-matrix uncompressed
 			MatrixBlock ret1 = mb.aggregateBinaryOperations(vector, mb, new MatrixBlock(), abop);
@@ -254,7 +143,7 @@
 
 	@Override
 	public void testUnaryOperators(AggType aggType) {
-		AggregateUnaryOperator auop = super.getUnaryOperator(aggType, k);
+		AggregateUnaryOperator auop = super.getUnaryOperator(aggType, _k);
 		testUnaryOperators(aggType, auop);
 	}
 
diff --git a/src/test/java/org/apache/sysds/test/component/compress/TestConstants.java b/src/test/java/org/apache/sysds/test/component/compress/TestConstants.java
index 440b60b..83c2f37 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/TestConstants.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/TestConstants.java
@@ -26,13 +26,13 @@
 
 	private static final int rows[] = {4, 2008, 1283, 5, 1, 251, 5000, 100000, 3123};
 	private static final int cols[] = {20, 20, 13, 998, 321, 1, 8, 10, 1};
-	private static final double[] sparsityValues = {0.9, 0.1, 0.01, 0.0};
+	private static final double[] sparsityValues = {0.9, 0.1, 0.01, 0.0, 1.0};
 
 	private static final int[] mins = {-10, -2147};
 	private static final int[] maxs = {10, 2147};
 
 	public enum SparsityType {
-		DENSE, SPARSE, ULTRA_SPARSE, EMPTY,
+		DENSE, SPARSE, ULTRA_SPARSE, EMPTY, FULL
 	}
 
 	public enum ValueType {
@@ -57,7 +57,8 @@
 
 	public enum ValueRange {
 		SMALL, 
-		LARGE
+		LARGE,
+		BYTE
 	}
 
 
@@ -71,6 +72,8 @@
 				return sparsityValues[2];
 			case EMPTY:
 				return sparsityValues[3];
+			case FULL:
+				return sparsityValues[4];
 			default:
 				throw new RuntimeException("Invalid Sparsity type"); 
 		}
@@ -82,6 +85,8 @@
 				return mins[0];
 			case LARGE:
 				return mins[1];
+			case BYTE:
+				return -127;
 			default:
 			throw new RuntimeException("Invalid range value enum type"); 
 		}
@@ -93,6 +98,8 @@
 				return maxs[0];
 			case LARGE:
 				return maxs[1];
+			case BYTE:
+				return 127;
 			default:
 				throw new RuntimeException("Invalid range value enum type"); 
 		}
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateDDCTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateDDCTest.java
index 4cc5b8c..e36da12 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateDDCTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateDDCTest.java
@@ -31,7 +31,7 @@
 import org.junit.runners.Parameterized.Parameters;
 
 @RunWith(value = Parameterized.class)
-public class JolEstimateDDCTest extends JolEstimateTest{
+public class JolEstimateDDCTest extends JolEstimateTest {
 
 	@Parameters
 	public static Collection<Object[]> data() {
@@ -45,67 +45,66 @@
 		// that also encode 0 values the same as all the other values.
 
 		mb = DataConverter.convertToMatrixBlock(new double[][] {{0}});
-		tests.add(new Object[] {mb, new int[]{1}, 8});
+		tests.add(new Object[] {mb, 8});
 
 		mb = DataConverter.convertToMatrixBlock(new double[][] {{1}});
-		tests.add(new Object[] {mb, new int[]{1}, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = DataConverter.convertToMatrixBlock(new double[][] {{1, 2}});
-		tests.add(new Object[] {mb, new int[]{2}, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = DataConverter.convertToMatrixBlock(new double[][] {{1, 2, 3}});
-		tests.add(new Object[] {mb, new int[]{3}, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = DataConverter.convertToMatrixBlock(new double[][] {{1, 2, 3, 4}});
-		tests.add(new Object[] {mb, new int[]{4}, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = DataConverter.convertToMatrixBlock(new double[][] {{1, 2, 3, 4, 5}});
-		tests.add(new Object[] {mb, new int[]{5}, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = DataConverter.convertToMatrixBlock(new double[][] {{1, 2, 3, 4, 5, 6}});
-		tests.add(new Object[] {mb, new int[]{6}, 0});
+		tests.add(new Object[] {mb, 0});
 
 		// Dense Random
 		mb = DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrix(1, 20, 0, 20, 1.0, 7));
-		tests.add(new Object[] {mb, new int[]{20}, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrix(1, 100, 0, 20, 1.0, 7));
-		tests.add(new Object[] {mb, new int[]{100}, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrix(1, 500, 0, 20, 1.0, 7));
-		tests.add(new Object[] {mb, new int[]{500}, 0});
+		tests.add(new Object[] {mb, 0});
 
 		// Random Sparse Very big, because 0 is materialized.
 		mb = DataConverter
 			.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 4000, 0, 254, 0.01, 7)));
-		tests.add(new Object[] {mb, new int[]{45}, 8});
+		tests.add(new Object[] {mb, 16});
 		mb = DataConverter
 			.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 8000, 0, 254, 0.01, 7)));
-		tests.add(new Object[] {mb, new int[]{73}, 8});
+		tests.add(new Object[] {mb, 8});
 		mb = DataConverter
 			.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 16000, 0, 254, 0.01, 7)));
-		tests.add(new Object[] {mb, new int[]{120}, 8});
+		tests.add(new Object[] {mb, 8});
 
 		mb = DataConverter
 			.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 4000, 0, 254, 0.001, 7)));
-		tests.add(new Object[] {mb, new int[]{6}, 8});
+		tests.add(new Object[] {mb, 8});
 		mb = DataConverter
 			.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 8000, 0, 254, 0.001, 7)));
-		tests.add(new Object[] {mb, new int[]{7}, 8});
+		tests.add(new Object[] {mb, 8});
 		mb = DataConverter
 			.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 16000, 0, 254, 0.001, 7)));
-		tests.add(new Object[] {mb, new int[]{17}, 8});
+		tests.add(new Object[] {mb, 8});
 
 		// DDC2 instances, need more unique values than 255
 
 		mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 4000, 0, 512, 0.7, 7)));
-		tests.add(new Object[] {mb, new int[]{511}, 8});
+		tests.add(new Object[] {mb, 8});
 		mb = DataConverter
 			.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 8000, 0, 1024, 0.7, 7)));
-		tests.add(new Object[] {mb, new int[]{1020}, 8});
+		tests.add(new Object[] {mb, 8});
 		mb = DataConverter
 			.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 16000, 0, 2048, 0.7, 7)));
-		tests.add(new Object[] {mb, new int[]{2039}, 8});
-		
+		tests.add(new Object[] {mb, 8});
+
 		return tests;
 	}
 
-
-	public JolEstimateDDCTest(MatrixBlock mb, int[] sizes, int tolerance) {
-		super(mb,sizes,tolerance);
+	public JolEstimateDDCTest(MatrixBlock mb, int tolerance) {
+		super(mb, tolerance);
 	}
 
 	@Override
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateOLETest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateOLETest.java
index 97daf72..4d20eef 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateOLETest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateOLETest.java
@@ -41,71 +41,71 @@
 		MatrixBlock mb;
 		// base tests
 		mb = DataConverter.convertToMatrixBlock(new double[][] { { 1 } });
-		tests.add(new Object[] { mb, new int[] { 1, 2, 2, 1 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 		mb = DataConverter.convertToMatrixBlock(new double[][] { { 0 } });
-		tests.add(new Object[] { mb, new int[] { 0, 1, 0, 0 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 		mb = DataConverter.convertToMatrixBlock(new double[][] { { 0, 0, 0, 0, 0 } });
-		tests.add(new Object[] { mb, new int[] { 0, 1, 0, 0 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 
 		// The size of the compression increase at repeated values.
 		mb = DataConverter.convertToMatrixBlock(new double[][] { { 0, 0, 0, 0, 5, 0 } });
-		tests.add(new Object[] { mb, new int[] { 1, 2, 2, 1 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 		mb = DataConverter.convertToMatrixBlock(new double[][] { { 0, 0, 0, 0, 5, 5, 0 } });
-		tests.add(new Object[] { mb, new int[] { 1, 2, 3, 1 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 		mb = DataConverter.convertToMatrixBlock(new double[][] { { 0, 0, 0, 0, 5, 5, 5, 0 } });
-		tests.add(new Object[] { mb, new int[] { 1, 2, 4, 1 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 		mb = DataConverter.convertToMatrixBlock(new double[][] { { 0, 0, 0, 0, 5, 5, 5, 5, 5, 5 } });
-		tests.add(new Object[] { mb, new int[] { 1, 2, 7, 1 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 
 		// all values grow by 1 if new value is introduced
 		mb = DataConverter.convertToMatrixBlock(new double[][] { { 0, 0, 0, 0, 5, 7, 0 } });
-		tests.add(new Object[] { mb, new int[] { 2, 3, 4, 2 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 		mb = DataConverter.convertToMatrixBlock(new double[][] { { 0, 0, 0, 0, 5, 2, 1, 0 } });
-		tests.add(new Object[] { mb, new int[] { 3, 4, 6, 3 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 		mb = DataConverter.convertToMatrixBlock(new double[][] { { 0, 0, 0, 0, 5, 2, 1, 3, 6, 7 } });
-		tests.add(new Object[] { mb, new int[] { 6, 7, 12, 6 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 
-		// Dense random... Horrible compression
+		// Dense random... Horrible compression at full precision
 		mb = DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrix(1, 100, 0, 100, 1.0, 7));
-		tests.add(new Object[] { mb, new int[] { 100, 100 + 1, 200, 100 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 		mb = DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrix(1, 1000, 0, 100, 1.0, 7));
-		tests.add(new Object[] { mb, new int[] { 1000, 1000 + 1, 2000, 1000 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 		mb = DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrix(1, 10000, 0, 100, 1.0, 7));
-		tests.add(new Object[] { mb, new int[] { 10000, 10000 + 1, 20000, 10000 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 
 		// Random rounded numbers dense
 		mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 1523, 0, 99, 1.0, 7)));
-		tests.add(new Object[] { mb, new int[] { 99, 100, 1616, 99 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 		mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 4000, 0, 255, 1.0, 7)));
-		tests.add(new Object[] { mb, new int[] { 255, 256, 4250, 255 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 
 		// Sparse rounded numbers
 		mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 1523, 0, 99, 0.1, 7)));
-		tests.add(new Object[] { mb, new int[] { 76, 77, 225, 76 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 		mb = DataConverter
 				.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 1621, 0, 99, 0.1, 142)));
-		tests.add(new Object[] { mb, new int[] { 81, 82, 238, 81 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 		mb = DataConverter
 				.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 2321, 0, 99, 0.1, 512)));
-		tests.add(new Object[] { mb, new int[] { 92, 93, 332, 92 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 		mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 4000, 0, 255, 0.1, 7)));
-		tests.add(new Object[] { mb, new int[] { 195, 196, 573, 195 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 
 		mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 1523, 0, 99, 0.5, 7)));
-		tests.add(new Object[] { mb, new int[] { 98, 99, 826, 99 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 		mb = DataConverter
 				.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 1621, 0, 99, 0.5, 142)));
-		tests.add(new Object[] { mb, new int[] { 99, 100, 913, 99 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 		mb = DataConverter
 				.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 2321, 0, 99, 0.5, 512)));
-		tests.add(new Object[] { mb, new int[] { 99, 100, 1292, 99 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 		mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 4000, 0, 255, 0.5, 7)));
-		tests.add(new Object[] { mb, new int[] { 255, 256, 2208, 255 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 
 		// Paper
 		mb = DataConverter.convertToMatrixBlock(
 				new double[][] { { 7, 3, 7, 7, 3, 7, 3, 3, 7, 3 }, { 6, 4, 6, 5, 4, 5, 4, 4, 6, 4 } });
-		tests.add(new Object[] { mb, new int[] { 6, 4, 13, 3 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 
 		// Dream Inputs
 		int[] cols = new int[] { 2, 6, 111 };
@@ -115,20 +115,20 @@
 			for (int x : rows) {
 				for (int u : unique) {
 					mb = CompressibleInputGenerator.getInput(x, y, CompressionType.OLE, u, 1.0, 5);
-					tests.add(new Object[] { mb, new int[] { u * y, u + 1, x + u, u }, 0 });
+					tests.add(new Object[] { mb,  0 });
 				}
 			}
 		}
 
 		// Sparse test.
 		mb = CompressibleInputGenerator.getInput(571, 1, CompressionType.OLE, 40, 0.6, 5);
-		tests.add(new Object[] { mb, new int[] { 40 * 1, 40 + 1, ((571 + 40) / 10) * 6, 40 }, 0 });
+		tests.add(new Object[] { mb,  0 });
 
 		return tests;
 	}
 
-	public JolEstimateOLETest(MatrixBlock mb, int[] sizes, int tolerance) {
-		super(mb, sizes, tolerance);
+	public JolEstimateOLETest(MatrixBlock mb, int tolerance) {
+		super(mb, tolerance);
 	}
 
 	@Override
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateRLETest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateRLETest.java
index afab30f..26211d0 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateRLETest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateRLETest.java
@@ -38,131 +38,123 @@
 	public static Collection<Object[]> data() {
 		ArrayList<Object[]> tests = new ArrayList<>();
 
-		// dataListSize is dependent on the sparsity and the number of rows originally.
-		// The (- numRows) in the end is the actual number of runs in the compressed representation, and it is here we
-		// get the
-		// compressed sizes from
-		// dataListSize = (nrRows * 2 ) * sparsity - numRuns
-
-		// The actual sizes are are within a range of these estimates, therefore we have a tolerance set on these tests.
-
 		MatrixBlock mb;
 		mb = DataConverter.convertToMatrixBlock(new double[][] {{1}});
-		tests.add(new Object[] {mb, 1, 2, 0});
+		tests.add(new Object[] {mb, 0});
 
 		// The size of the compression is the same even at different numbers of repeated values.
 		mb = DataConverter.convertToMatrixBlock(new double[][] {{0, 0, 0, 0, 5, 0}});
-		tests.add(new Object[] {mb, 1, 2, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = DataConverter.convertToMatrixBlock(new double[][] {{0, 0, 0, 0, 5, 5, 0}});
-		tests.add(new Object[] {mb, 1, 2, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = DataConverter.convertToMatrixBlock(new double[][] {{0, 0, 0, 0, 5, 5, 5, 0}});
-		tests.add(new Object[] {mb, 1, 2, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = DataConverter.convertToMatrixBlock(new double[][] {{0, 0, 0, 0, 5, 5, 5, 5, 5, 5}});
-		tests.add(new Object[] {mb, 1, 2, 0});
+		tests.add(new Object[] {mb, 0});
 
 		// Worst case all random numbers dense.
 		mb = DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrix(1, 100, 0, 100, 1.0, 7));
-		tests.add(new Object[] {mb, 100, 200, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrix(1, 1000, 0, 100, 1.0, 7));
-		tests.add(new Object[] {mb, 1000, 2000, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrix(1, 10000, 0, 100, 1.0, 7));
-		tests.add(new Object[] {mb, 10000, 20000, 0});
+		tests.add(new Object[] {mb, 0});
 
 		// Random rounded numbers dense
 		mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 1523, 0, 99, 1.0, 7)));
-		tests.add(new Object[] {mb, 99, 3006, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 4000, 0, 255, 1.0, 7)));
-		tests.add(new Object[] {mb, 255, 7966, 0});
+		tests.add(new Object[] {mb, 0});
 
 		// Sparse rounded numbers
 		// Scale directly with sparsity
 		mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 1523, 0, 99, 0.1, 7)));
-		tests.add(new Object[] {mb, 76, 298, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = DataConverter
 			.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 1621, 0, 99, 0.1, 142)));
-		tests.add(new Object[] {mb, 81, 314, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = DataConverter
 			.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 2321, 0, 99, 0.1, 512)));
-		tests.add(new Object[] {mb, 92, 480, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 4000, 0, 255, 0.1, 7)));
-		tests.add(new Object[] {mb, 195, 756, 250});
+		tests.add(new Object[] {mb, 250});
 
 		// Medium sparsity
 		mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 1523, 0, 99, 0.5, 7)));
-		tests.add(new Object[] {mb, 98, 1446, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = DataConverter
 			.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 1621, 0, 99, 0.5, 142)));
-		tests.add(new Object[] {mb, 99, 1620, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = DataConverter
 			.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 2321, 0, 99, 0.5, 512)));
-		tests.add(new Object[] {mb, 99, 2366, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 4000, 0, 255, 0.5, 7)));
-		tests.add(new Object[] {mb, 255, 3900, 0});
+		tests.add(new Object[] {mb, 0});
 
 		// Dream inputs.
 		// 1 unique value
 		mb = CompressibleInputGenerator.getInput(10000, 1, CompressionType.RLE, 1, 1.0, 132);
-		tests.add(new Object[] {mb, 1, 1 * 2, 0});
+		tests.add(new Object[] {mb, 0});
 
 		// when the rows length is larger than overflowing the character value,
 		// the run gets split into two
 		// char overflows into the next position increasing size by 1 char.
 		int charMax = Character.MAX_VALUE;
 		mb = CompressibleInputGenerator.getInput(charMax, 1, CompressionType.RLE, 1, 1.0, 132);
-		tests.add(new Object[] {mb, 1, 1 * 2, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = CompressibleInputGenerator.getInput(charMax + 1, 1, CompressionType.RLE, 1, 1.0, 132);
-		tests.add(new Object[] {mb, 1, 2 * 2, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = CompressibleInputGenerator.getInput(charMax * 2 + 1, 1, CompressionType.RLE, 1, 1.0, 132);
-		tests.add(new Object[] {mb, 1, 3 * 2, 0});
+		tests.add(new Object[] {mb, 0});
 
 		// 10 unique values ordered such that all 10 instances is in the same run.
 		// Results in same size no matter the number of original rows.
 		mb = CompressibleInputGenerator.getInput(100, 1, CompressionType.RLE, 10, 1.0, 1);
-		tests.add(new Object[] {mb, 8, 8 * 2, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = CompressibleInputGenerator.getInput(1000, 1, CompressionType.RLE, 10, 1.0, 1312);
-		tests.add(new Object[] {mb, 10, 10 * 2, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = CompressibleInputGenerator.getInput(10000, 1, CompressionType.RLE, 10, 1.0, 14512);
-		tests.add(new Object[] {mb, 10, 10 * 2, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = CompressibleInputGenerator.getInput(100000, 1, CompressionType.RLE, 10, 1.0, 132);
-		tests.add(new Object[] {mb, 10, 12 * 2, 0});
+		tests.add(new Object[] {mb, 0});
 
 		// Sparse Dream inputs.
 		mb = CompressibleInputGenerator.getInput(100, 1, CompressionType.RLE, 10, 0.1, 1);
-		tests.add(new Object[] {mb, 8, 8*2, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = CompressibleInputGenerator.getInput(1000, 1, CompressionType.RLE, 10, 0.1, 1312);
-		tests.add(new Object[] {mb, 10, 10 * 2, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = CompressibleInputGenerator.getInput(10000, 1, CompressionType.RLE, 10, 0.1, 14512);
-		tests.add(new Object[] {mb, 10, 10 * 2, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = CompressibleInputGenerator.getInput(100000, 1, CompressionType.RLE, 10, 0.1, 132);
-		tests.add(new Object[] {mb, 10, 24, 0});
+		tests.add(new Object[] {mb, 0});
 		mb = CompressibleInputGenerator.getInput(1000000, 1, CompressionType.RLE, 10, 0.1, 132);
-		tests.add(new Object[] {mb, 10, 134, 0});
+		tests.add(new Object[] {mb, 0});
 
 		mb = CompressibleInputGenerator.getInput(1000000, 1, CompressionType.RLE, 1, 1.0, 132);
-		tests.add(new Object[] {mb, 1, 32, 0});
+		tests.add(new Object[] {mb, 0});
 
 		// Multi Column
-		// two identical columns 
+		// two identical columns
 		mb = CompressibleInputGenerator.getInput(10, 2, CompressionType.RLE, 2, 1.0, 132);
-		tests.add(new Object[] {mb, 3, 6, 0});
+		tests.add(new Object[] {mb, 0});
 
 		mb = CompressibleInputGenerator.getInput(10, 6, CompressionType.RLE, 2, 1.0, 132);
-		tests.add(new Object[] {mb, 5, 10, 0});
+		tests.add(new Object[] {mb, 0});
 
 		mb = CompressibleInputGenerator.getInput(10, 100, CompressionType.RLE, 2, 1.0, 132);
-		tests.add(new Object[] {mb, 10, 20, 0});
+		tests.add(new Object[] {mb, 0});
 
 		mb = CompressibleInputGenerator.getInput(101, 17, CompressionType.RLE, 2, 1.0, 132);
-		tests.add(new Object[] {mb, 15, 15*2, 0});
+		tests.add(new Object[] {mb, 0});
 
 		mb = CompressibleInputGenerator.getInput(101, 17, CompressionType.RLE, 3, 1.0, 132);
-		tests.add(new Object[] {mb, 31, 62, 0});
+		tests.add(new Object[] {mb, 0});
 
 		return tests;
 	}
 
-	public JolEstimateRLETest(MatrixBlock mb, int numDistinct, int dataListSize, int tolerance) {
-		super(mb,new int[]{numDistinct * mb.getNumRows(), numDistinct + 1, dataListSize}, tolerance);
+	public JolEstimateRLETest(MatrixBlock mb, int tolerance) {
+		super(mb, tolerance);
 	}
 
 	@Override
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java
index 120eb86..25db7d5 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java
@@ -21,29 +21,22 @@
 
 import static org.junit.Assert.assertTrue;
 
-import java.util.ArrayList;
-import java.util.List;
+import java.util.EnumSet;
 
-import org.apache.commons.lang.NotImplementedException;
 import org.apache.sysds.runtime.compress.BitmapEncoder;
 import org.apache.sysds.runtime.compress.CompressionSettings;
 import org.apache.sysds.runtime.compress.CompressionSettingsBuilder;
-import org.apache.sysds.runtime.compress.UncompressedBitmap;
 import org.apache.sysds.runtime.compress.colgroup.ColGroup;
 import org.apache.sysds.runtime.compress.colgroup.ColGroup.CompressionType;
 import org.apache.sysds.runtime.compress.colgroup.ColGroupFactory;
 import org.apache.sysds.runtime.compress.estim.CompressedSizeEstimator;
 import org.apache.sysds.runtime.compress.estim.CompressedSizeEstimatorFactory;
 import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup;
+import org.apache.sysds.runtime.compress.utils.AbstractBitmap;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
-import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
-import org.openjdk.jol.datamodel.X86_64_DataModel;
-import org.openjdk.jol.info.ClassLayout;
-import org.openjdk.jol.layouters.HotSpotLayouter;
-import org.openjdk.jol.layouters.Layouter;
 
 @RunWith(value = Parameterized.class)
 public abstract class JolEstimateTest {
@@ -59,87 +52,35 @@
 	private final long tolerance;
 	private final MatrixBlock mbt;
 	private final CompressionSettings cs;
-	private final int[] sizes;
+	private final CompressionSettings csl;// Compression Settings Lossy;
 	private ColGroup cg;
+	private ColGroup cgl; // ColGroup Lossy;
 
 	public abstract CompressionType getCT();
 
-	public JolEstimateTest(MatrixBlock mb, int[] sizes, int tolerance) {
+	public JolEstimateTest(MatrixBlock mb, int tolerance) {
 		this.mbt = mb;
-		this.sizes = sizes;
 		this.tolerance = tolerance;
-		List<CompressionType> vc = new ArrayList<>();
-		vc.add(getCT());
-		this.cs = new CompressionSettingsBuilder().setSeed(seed).setSamplingRatio(1.0).setValidCompressions(vc).create();
+		EnumSet<CompressionType> vc = EnumSet.of(getCT());
+		CompressionSettingsBuilder csb = new CompressionSettingsBuilder().setSeed(seed).setSamplingRatio(1.0)
+			.setValidCompressions(vc);
+		this.cs = csb.create();
+		this.csl = csb.setLossy(true).setSortValuesByLength(false).create();
 
 		int[] colIndexes = new int[mbt.getNumRows()];
 		for(int x = 0; x < mbt.getNumRows(); x++) {
 			colIndexes[x] = x;
 		}
 		try {
-			UncompressedBitmap ubm = BitmapEncoder.extractBitmap(colIndexes, mbt, cs);
+			AbstractBitmap ubm = BitmapEncoder.extractBitmap(colIndexes, mbt, cs);
 			cg = ColGroupFactory.compress(colIndexes, mbt.getNumColumns(), ubm, getCT(), cs, mbt);
+			AbstractBitmap ubml = BitmapEncoder.extractBitmap(colIndexes, mbt, csl);
+			cgl = ColGroupFactory.compress(colIndexes, mbt.getNumColumns(), ubml, getCT(), csl, mbt);
+
 		}
 		catch(Exception e) {
 			e.printStackTrace();
-			assertTrue("Failed to compress colgroup! " + e.getMessage(), false);
-		}
-	}
-
-	@Test
-	@Ignore //TODO this method is a maintenance obstacle (e.g., why do we expect int arrays in the number of rows?)
-	public void instanceSize() {
-		assertTrue("Failed Test, because ColGroup is null", cg != null);
-		try {
-			Layouter l = new HotSpotLayouter(new X86_64_DataModel());
-			long jolEstimate = 0;
-			long diff;
-			StringBuilder sb = new StringBuilder();
-			Object[] contains;
-			if(cg.getCompType() == ddc) {
-				if(sizes[0] < 256) {
-					contains = new Object[] {cg, new int[mbt.getNumRows()], new double[sizes[0]],
-						new byte[mbt.getNumColumns()]};
-				}
-				else {
-					contains = new Object[] {cg, new int[mbt.getNumRows()], new double[sizes[0]],
-						new char[mbt.getNumColumns()]};
-				}
-			}
-			else if(cg.getCompType() == ole) {
-				contains = new Object[] {cg, new int[mbt.getNumRows()], new double[sizes[0]], new int[sizes[1]],
-					new char[sizes[2]], new int[sizes[3]]};
-			}
-			else if(cg.getCompType() == rle) {
-				contains = new Object[] {cg, new int[mbt.getNumRows()], new double[sizes[0]], new int[sizes[1]],
-					new char[sizes[2]]};
-			}
-			else if(cg.getCompType() == unc) {
-				// Unlike the other tests, in the uncompressed col groups it is assumed that the MatrixBlock default
-				// implementation estimates correctly.
-				// Thereby making this test only fail in cases where the estimation error is located inside the
-				// compression package.
-				jolEstimate += MatrixBlock.estimateSizeInMemory(mbt.getNumColumns(), mbt.getNumRows(), mbt.getSparsity());
-				contains = new Object[] {cg, new int[mbt.getNumRows()]};
-			}
-			else {
-				throw new NotImplementedException("Not Implemented Case for JolEstimate Test");
-			}
-
-			for(Object ob : contains) {
-				ClassLayout cl = ClassLayout.parseInstance(ob, l);
-				diff = cl.instanceSize();
-				jolEstimate += diff;
-				sb.append(ob.getClass());
-				sb.append("  TOTAL MEM: " + jolEstimate + " diff " + diff + "\n");
-			}
-			long estimate = cg.estimateInMemorySize();
-			String errorMessage = " estimate " + estimate + " should be equal to JOL " + jolEstimate + "\n";
-			assertTrue(errorMessage + sb.toString() + "\n" + cg.toString(), estimate == jolEstimate);
-		}
-		catch(Exception e) {
-			e.printStackTrace();
-			assertTrue("Failed Test: " + e.getMessage(), false);
+			assertTrue("Failed to compress colGroup! " + e.getMessage(), false);
 		}
 	}
 
@@ -167,25 +108,49 @@
 		}
 	}
 
+	@Test
+	public void compressedSizeInfoEstimatorExactLossy() {
+		try {
+			// CompressionSettings cs = new CompressionSettings(1.0);
+			CompressedSizeEstimator cse = CompressedSizeEstimatorFactory.getSizeEstimator(mbt, csl);
+			CompressedSizeInfoColGroup csi = cse.estimateCompressedColGroupSize();
+			long estimateCSI = csi.getCompressionSize(getCT());
+			long estimateObject = cgl.estimateInMemorySize();
+			String errorMessage = "CSI estimate " + estimateCSI + " should be exactly " + estimateObject + "\n"
+				+ cg.toString();
+			boolean res = Math.abs(estimateCSI - estimateObject) <= tolerance;
+			if(res && !(estimateCSI == estimateObject)) {
+				// Make a warning in case that it is not exactly the same.
+				// even if the test allows some tolerance.
+				System.out.println("NOT EXACTLY THE SAME! " + this.getClass().getName() + " " + errorMessage);
+			}
+			assertTrue(errorMessage, res);
+		}
+		catch(Exception e) {
+			e.printStackTrace();
+			assertTrue("Failed Test", false);
+		}
+	}
+
 	// @Test
 	// public void compressedSizeInfoEstimatorSampler() {
-	// 	try {
-	// 		CompressionSettings cs = new CompressionSettingsBuilder().copySettings(this.cs).setSamplingRatio(0.1).create();
-	// 		CompressedSizeEstimator cse = CompressedSizeEstimatorFactory.getSizeEstimator(mbt, cs);
-	// 		CompressedSizeInfoColGroup csi = cse.computeCompressedSizeInfos(1).compressionInfo[0];
-	// 		long estimateCSI = csi.getCompressionSize(getCT());
-	// 		long estimateObject = cg.estimateInMemorySize();
-	// 		String errorMessage = "CSI Sampled estimate " + estimateCSI + " should be larger than actual "
-	// 			+ estimateObject + " but not more than " + (tolerance + kbTolerance) + " off";
-	// 		if(!(estimateCSI == estimateObject)) {
-	// 			System.out.println("NOT EXACTLY THE SAME IN SAMPLING! " + errorMessage);
-	// 		}
-	// 		boolean res = Math.abs(estimateCSI - estimateObject) <= tolerance + kbTolerance;
-	// 		assertTrue(errorMessage, res);
-	// 	}
-	// 	catch(Exception e) {
-	// 		e.printStackTrace();
-	// 		assertTrue("Failed Test", false);
-	// 	}
+	// try {
+	// CompressionSettings cs = new CompressionSettingsBuilder().copySettings(this.cs).setSamplingRatio(0.1).create();
+	// CompressedSizeEstimator cse = CompressedSizeEstimatorFactory.getSizeEstimator(mbt, cs);
+	// CompressedSizeInfoColGroup csi = cse.computeCompressedSizeInfos(1).compressionInfo[0];
+	// long estimateCSI = csi.getCompressionSize(getCT());
+	// long estimateObject = cg.estimateInMemorySize();
+	// String errorMessage = "CSI Sampled estimate " + estimateCSI + " should be larger than actual "
+	// + estimateObject + " but not more than " + (tolerance + kbTolerance) + " off";
+	// if(!(estimateCSI == estimateObject)) {
+	// System.out.println("NOT EXACTLY THE SAME IN SAMPLING! " + errorMessage);
+	// }
+	// boolean res = Math.abs(estimateCSI - estimateObject) <= tolerance + kbTolerance;
+	// assertTrue(errorMessage, res);
+	// }
+	// catch(Exception e) {
+	// e.printStackTrace();
+	// assertTrue("Failed Test", false);
+	// }
 	// }
 }
\ No newline at end of file
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTestEmpty.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTestEmpty.java
deleted file mode 100644
index 57053aa..0000000
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTestEmpty.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysds.test.component.compress.colgroup;
-
-import static org.junit.Assert.assertTrue;
-
-import java.util.ArrayList;
-import java.util.Collection;
-
-import org.apache.sysds.runtime.compress.colgroup.ColGroup;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupDDC;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupDDC1;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupDDC2;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupOLE;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupOffset;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupRLE;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupSizes;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupUncompressed;
-import org.apache.sysds.runtime.compress.colgroup.ColGroupValue;
-import org.apache.sysds.runtime.compress.colgroup.Dictionary;
-import org.apache.sysds.runtime.data.DenseBlockFP64;
-import org.apache.sysds.runtime.matrix.data.MatrixBlock;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-import org.openjdk.jol.datamodel.X86_64_DataModel;
-import org.openjdk.jol.info.ClassLayout;
-import org.openjdk.jol.info.FieldLayout;
-import org.openjdk.jol.layouters.HotSpotLayouter;
-import org.openjdk.jol.layouters.Layouter;
-
-@RunWith(value = Parameterized.class)
-public class JolEstimateTestEmpty {
-
-	@Parameters
-	public static Collection<Object[]> data() {
-		ArrayList<Object[]> tests = new ArrayList<>();
-
-		// Only add a single selected test of constructor with no compression
-		tests.add(new Object[] {ColGroupUncompressed.class});
-		tests.add(new Object[] {ColGroup.class});
-		tests.add(new Object[] {ColGroupValue.class});
-		tests.add(new Object[] {ColGroupOLE.class});
-		tests.add(new Object[] {ColGroupDDC.class});
-		tests.add(new Object[] {ColGroupDDC1.class});
-		tests.add(new Object[] {ColGroupDDC2.class});
-		tests.add(new Object[] {ColGroupRLE.class});
-		tests.add(new Object[] {ColGroupOffset.class});
-
-		return tests;
-	}
-
-	protected final Class<?> colGroupClass;
-	private Layouter l;
-
-	public JolEstimateTestEmpty(Class<?> colGroupClass) {
-		this.colGroupClass = colGroupClass;
-	}
-
-	@Test
-	public void estimate() {
-		try {
-			long estimate = ColGroupSizes.getEmptyMemoryFootprint(colGroupClass);
-			long jolEstimate = getWorstCaseMemory(colGroupClass);
-			assertTrue(
-				"Memory Estimate of " + estimate + " Incorrect compared to " + jolEstimate + "\n"
-					+ printWorstCaseMemoryEstimate(colGroupClass),
-				estimate == jolEstimate);
-		}
-		catch(Exception e) {
-			e.printStackTrace();
-			assertTrue("Test Failed, " + e.getMessage(), false);
-		}
-	}
-
-	private String printWorstCaseMemoryEstimate(Class<?> klass) {
-		StringBuilder sb = new StringBuilder();
-		l = new HotSpotLayouter(new X86_64_DataModel());
-		sb.append("***** " + l);
-		sb.append(ClassLayout.parseClass(klass, l).toPrintable());
-		for(FieldLayout fl : ClassLayout.parseClass(klass, l).fields()) {
-			if(fl.typeClass() == "org.apache.sysds.runtime.matrix.data.MatrixBlock") {
-				sb.append(ClassLayout.parseClass(MatrixBlock.class, l).toPrintable());
-				sb.append(ClassLayout.parseClass(DenseBlockFP64.class, l).toPrintable());
-			}
-		}
-		return sb.toString();
-	}
-
-	private long getWorstCaseMemory(Class<?> klass) {
-		l = new HotSpotLayouter(new X86_64_DataModel());
-		long size = ClassLayout.parseClass(klass, l).instanceSize();
-
-		for(FieldLayout fl : ClassLayout.parseClass(klass, l).fields()) {
-			// If the type of filed is an Array, then add the cost of having such a thing.
-			if(fl.typeClass().contains("[]")) {
-				size += 20;
-				size += 4;
-			}
-			if(fl.typeClass().equals(MatrixBlock.class.getName())) {
-				size += MatrixBlock.estimateSizeDenseInMemory(0, 0);
-			}
-			else if(fl.typeClass().equals(Dictionary.class.getName())) {
-				size += getWorstCaseMemory(Dictionary.class);
-			}
-		}
-
-		return size;
-	}
-
-}
\ No newline at end of file
diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateUncompressedTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateUncompressedTest.java
index 185c868..b170ec9 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateUncompressedTest.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateUncompressedTest.java
@@ -49,15 +49,12 @@
 		mb.add(DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrix(1, 100000, 0, 100, 0.01, 7)));
 
 		// Multi column
-		mb.add(DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrix(2, 10, 0, 100, 1.0, 7)));
-		mb.add(DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrix(13, 100, 0, 100, 1.0, 7)));
+		// TODO Fix uncompressed columns in lossy situation
+		// mb.add(DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrix(2, 10, 0, 100, 1.0, 7)));
+		// mb.add(DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrix(13, 100, 0, 100, 1.0, 7)));
 
 		// sparse
-		// TODO: Currently it is assumed not to be sparse.
-		// But is should be possible to contain a sparse matrix block inside the ColGroups, and compare compression
-		// rates to that. The Main Issue is that the compression ratio then should still be calculated from the
-		// assumption of a dense representation, but the compression ratio achieved by sparse representations should be
-		// included.
+
 		// mb.add(DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrix(13, 100, 0, 100, 0.3, 7)));
 		// mb.add(DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrix(100, 100, 0, 100, 0.01, 7)));
 
@@ -69,7 +66,7 @@
 	}
 
 	public JolEstimateUncompressedTest(MatrixBlock mb) {
-		super(mb, new int[0], 0);
+		super(mb, 0);
 	}
 
 	@Override
diff --git a/src/test/java/org/apache/sysds/test/component/matrix/CountDistinctTest.java b/src/test/java/org/apache/sysds/test/component/matrix/CountDistinctTest.java
index a8e3e2b..038ce4a 100644
--- a/src/test/java/org/apache/sysds/test/component/matrix/CountDistinctTest.java
+++ b/src/test/java/org/apache/sysds/test/component/matrix/CountDistinctTest.java
@@ -44,10 +44,7 @@
 
 	private static CountDistinctTypes[] esT = new CountDistinctTypes[] {
 		// The different types of Estimators
-		CountDistinctTypes.COUNT, 
-		CountDistinctTypes.KMV,
-		CountDistinctTypes.HLL
-	};
+		CountDistinctTypes.COUNT, CountDistinctTypes.KMV, CountDistinctTypes.HLL};
 
 	@Parameters
 	public static Collection<Object[]> data() {
@@ -67,9 +64,9 @@
 
 		// Sparse Multicol random values (most likely each value is unique)
 		inputs.add(DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrix(100, 10, 0.0, 100.0, 0.1, 7)));
-		actualUnique.add(98L); //dense representation
+		actualUnique.add(98L); // dense representation
 		inputs.add(DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrix(100, 1000, 0.0, 100.0, 0.1, 7)));
-		actualUnique.add(9823L+1); //sparse representation
+		actualUnique.add(9823L + 1); // sparse representation
 
 		// MultiCol Inputs (using integers)
 		inputs.add(DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrixIntV(5000, 5000, 1, 100, 1, 8)));
@@ -88,10 +85,6 @@
 		// Sparse Inputs
 		inputs.add(DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrixIntV(1024, 10241, 0, 3000, 0.1, 7)));
 		actualUnique.add(3000L);
-		// inputs.add(DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrixIntV(10240, 10241, 0, 5000, 0.1, 7)));
-		// actualUnique.add(5000L);
-		// inputs.add(DataConverter.convertToMatrixBlock(TestUtils.generateTestMatrixIntV(10240, 10241, 0, 10000, 0.1, 7)));
-		// actualUnique.add(10000L);
 
 		for(CountDistinctTypes et : esT) {
 			for(HashType ht : HashType.values()) {
@@ -105,14 +98,14 @@
 					tests.add(new Object[] {et, inputs.get(0), actualUnique.get(0), ht, NotImplementedException.class,
 						"HyperLogLog not implemented", 0.0});
 				}
-				else if (et != CountDistinctTypes.COUNT) {
+				else if(et != CountDistinctTypes.COUNT) {
 					for(int i = 0; i < inputs.size(); i++) {
 						// allowing the estimate to be 15% off
 						tests.add(new Object[] {et, inputs.get(i), actualUnique.get(i), ht, null, null, 0.15});
 					}
 				}
 			}
-			if (et == CountDistinctTypes.COUNT){
+			if(et == CountDistinctTypes.COUNT) {
 				for(int i = 0; i < inputs.size(); i++) {
 					tests.add(new Object[] {et, inputs.get(i), actualUnique.get(i), null, null, null, 0.0001});
 				}
@@ -180,7 +173,7 @@
 	public String toString() {
 		StringBuilder sb = new StringBuilder();
 		sb.append(et);
-		if(ht != null){
+		if(ht != null) {
 			sb.append("-" + ht);
 		}
 		sb.append("  nrUnique:" + nrUnique);
diff --git a/src/test/java/org/apache/sysds/test/functions/countDistinct/CountDistinct.java b/src/test/java/org/apache/sysds/test/functions/countDistinct/CountDistinct.java
index 74772e0..e5872e9 100644
--- a/src/test/java/org/apache/sysds/test/functions/countDistinct/CountDistinct.java
+++ b/src/test/java/org/apache/sysds/test/functions/countDistinct/CountDistinct.java
@@ -44,6 +44,6 @@
 	public void testSimple1by1() {
 		// test simple 1 by 1.
 		LopProperties.ExecType ex = LopProperties.ExecType.CP;
-		countDistinctTest(1, 1, 1, ex, 0.00001);
+		countDistinctTest(1, 1, 1, 1.0, ex, 0.00001);
 	}
 }
\ No newline at end of file
diff --git a/src/test/java/org/apache/sysds/test/functions/countDistinct/CountDistinctApprox.java b/src/test/java/org/apache/sysds/test/functions/countDistinct/CountDistinctApprox.java
index 8d0d242..18193ca 100644
--- a/src/test/java/org/apache/sysds/test/functions/countDistinct/CountDistinctApprox.java
+++ b/src/test/java/org/apache/sysds/test/functions/countDistinct/CountDistinctApprox.java
@@ -28,7 +28,7 @@
 	private final static String TEST_DIR = "functions/countDistinct/";
 	private final static String TEST_CLASS_DIR = TEST_DIR + CountDistinctApprox.class.getSimpleName() + "/";
 
-	public CountDistinctApprox(){
+	public CountDistinctApprox() {
 		percentTolerance = 0.1;
 	}
 
@@ -36,7 +36,14 @@
 	public void testXXLarge() {
 		LopProperties.ExecType ex = LopProperties.ExecType.CP;
 		double tolerance = 9000 * percentTolerance;
-		countDistinctTest(9000, 10000, 5000, ex, tolerance);
+		countDistinctTest(9000, 10000, 5000, 0.1, ex, tolerance);
+	}
+
+	@Test
+	public void testSparse500Unique(){
+		LopProperties.ExecType ex = LopProperties.ExecType.CP;
+		double tolerance = 0.00001 + 120 * percentTolerance;
+		countDistinctTest(500, 100, 100000, 0.1, ex, tolerance);
 	}
 
 	@Override
diff --git a/src/test/java/org/apache/sysds/test/functions/countDistinct/CountDistinctBase.java b/src/test/java/org/apache/sysds/test/functions/countDistinct/CountDistinctBase.java
index 6a9b096..9f797ca 100644
--- a/src/test/java/org/apache/sysds/test/functions/countDistinct/CountDistinctBase.java
+++ b/src/test/java/org/apache/sysds/test/functions/countDistinct/CountDistinctBase.java
@@ -31,13 +31,16 @@
 public abstract class CountDistinctBase extends AutomatedTestBase {
 
 	protected abstract String getTestClassDir();
+
 	protected abstract String getTestName();
+
 	protected abstract String getTestDir();
 
 	@Override
 	public void setUp() {
 		TestUtils.clearAssertionInformation();
-		addTestConfiguration(getTestName(), new TestConfiguration(getTestClassDir(), getTestName(), new String[] { "A.scalar" }));
+		addTestConfiguration(getTestName(),
+			new TestConfiguration(getTestClassDir(), getTestName(), new String[] {"A.scalar"}));
 	}
 
 	protected double percentTolerance = 0.0;
@@ -46,46 +49,61 @@
 	@Test
 	public void testSmall() {
 		LopProperties.ExecType ex = LopProperties.ExecType.CP;
-		double tolerance = baseTolerance  + 50 *  percentTolerance;
-		countDistinctTest(50, 50, 50, ex,tolerance);
+		double tolerance = baseTolerance + 50 * percentTolerance;
+		countDistinctTest(50, 50, 50, 1.0, ex, tolerance);
 	}
 
 	@Test
 	public void testLarge() {
 		LopProperties.ExecType ex = LopProperties.ExecType.CP;
-		double tolerance = baseTolerance + 800 *  percentTolerance;
-		countDistinctTest(800, 1000, 1000, ex,tolerance);
+		double tolerance = baseTolerance + 800 * percentTolerance;
+		countDistinctTest(800, 1000, 1000, 1.0, ex, tolerance);
 	}
 
 	@Test
 	public void testXLarge() {
 		LopProperties.ExecType ex = LopProperties.ExecType.CP;
-		double tolerance = baseTolerance + 1723 *  percentTolerance;
-		countDistinctTest(1723, 5000, 2000, ex,tolerance);
+		double tolerance = baseTolerance + 1723 * percentTolerance;
+		countDistinctTest(1723, 5000, 2000, 1.0, ex, tolerance);
 	}
 
 	@Test
 	public void test1Unique() {
 		LopProperties.ExecType ex = LopProperties.ExecType.CP;
 		double tolerance = 0.00001;
-		countDistinctTest(1, 100, 1000, ex,tolerance);
+		countDistinctTest(1, 100, 1000, 1.0, ex, tolerance);
 	}
 
 	@Test
 	public void test2Unique() {
 		LopProperties.ExecType ex = LopProperties.ExecType.CP;
 		double tolerance = 0.00001;
-		countDistinctTest(2, 100, 1000, ex,tolerance);
+		countDistinctTest(2, 100, 1000, 1.0, ex, tolerance);
 	}
 
 	@Test
 	public void test120Unique() {
 		LopProperties.ExecType ex = LopProperties.ExecType.CP;
-		double tolerance = 0.00001 + 120 *  percentTolerance;
-		countDistinctTest(120, 100, 1000, ex,tolerance);
+		double tolerance = 0.00001 + 120 * percentTolerance;
+		countDistinctTest(120, 100, 1000, 1.0, ex, tolerance);
 	}
 
-	public void countDistinctTest(int numberDistinct, int cols, int rows, LopProperties.ExecType instType, double tolerance) {
+	@Test
+	public void testSparse500Unique() {
+		LopProperties.ExecType ex = LopProperties.ExecType.CP;
+		double tolerance = 0.00001 + 500 * percentTolerance;
+		countDistinctTest(500, 100, 640000, 0.1, ex, tolerance);
+	}
+
+	@Test
+	public void testSparse120Unique(){
+		LopProperties.ExecType ex = LopProperties.ExecType.CP;
+		double tolerance = 0.00001 + 120 * percentTolerance;
+		countDistinctTest(120, 100, 64000, 0.1, ex, tolerance);
+	}
+
+	public void countDistinctTest(int numberDistinct, int cols, int rows, double sparsity,
+		LopProperties.ExecType instType, double tolerance) {
 		Types.ExecMode platformOld = setExecMode(instType);
 		try {
 			loadTestConfiguration(getTestConfiguration(getTestName()));
@@ -93,16 +111,18 @@
 			fullDMLScriptName = HOME + getTestName() + ".dml";
 			String out = output("A");
 			System.out.println(out);
-			programArgs = new String[] { "-args", String.valueOf(numberDistinct), String.valueOf(rows),
-					String.valueOf(cols), out};
+			programArgs = new String[] {"-args", String.valueOf(numberDistinct), String.valueOf(rows),
+				String.valueOf(cols), String.valueOf(sparsity), out};
 
 			runTest(true, false, null, -1);
 			writeExpectedScalar("A", numberDistinct);
 			compareResults(tolerance);
-		} catch (Exception e) {
+		}
+		catch(Exception e) {
 			e.printStackTrace();
 			assertTrue("Exception in execution: " + e.getMessage(), false);
-		} finally {
+		}
+		finally {
 			rtplatform = platformOld;
 		}
 	}
diff --git a/src/test/scripts/functions/countDistinct/countDistinct.dml b/src/test/scripts/functions/countDistinct/countDistinct.dml
index a12ffe2..a0da780 100644
--- a/src/test/scripts/functions/countDistinct/countDistinct.dml
+++ b/src/test/scripts/functions/countDistinct/countDistinct.dml
@@ -19,6 +19,7 @@
 #
 #-------------------------------------------------------------
 
-input = round(rand(rows = $2, cols = $3, min = 0, max = $1 -1, seed = 7))
+input = round(rand(rows = $2, cols = $3, min = 0, max = $1 -1, sparsity= $4,  seed = 7))
 res = countDistinct(input)
-write(res, $4, format="text")
+print(res)
+write(res, $5, format="text")
diff --git a/src/test/scripts/functions/countDistinct/countDistinctApprox.dml b/src/test/scripts/functions/countDistinct/countDistinctApprox.dml
index e8b964e..eeb5bfc 100644
--- a/src/test/scripts/functions/countDistinct/countDistinctApprox.dml
+++ b/src/test/scripts/functions/countDistinct/countDistinctApprox.dml
@@ -19,6 +19,6 @@
 #
 #-------------------------------------------------------------
 
-input = round(rand(rows = $2, cols = $3, min = 0, max = $1 -1, seed = 7))
+input = round(rand(rows = $2, cols = $3, min = 0, max = $1 -1, sparsity= $4, seed = 7))
 res = countDistinctApprox(input)
-write(res, $4, format="text")
\ No newline at end of file
+write(res, $5, format="text")
\ No newline at end of file