blob: b79fa85fae14e06d19fa5f510cae877f72d83f41 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.core.keygenerator.mdkey;
/**
* It compresses the data as per max cardinality. It takes only the required bits for each key.
*/
public class NumberCompressor {
/**
* Bits MAX_LENGTH
*/
private static final int MAX_LENGTH = 63;
private static final int LONG_LENGTH = 64;
private static final int BYTE_LENGTH = 8;
/**
* LONG_MAX.
*/
private static final long LONG_MAX = 0x7fffffffffffffffL;
private byte bitsLength;
public NumberCompressor(int cardinality) {
bitsLength = (byte) Long.toBinaryString(cardinality).length();
}
public byte[] compress(int[] keys) {
int[] sizes = getWordsAndByteSize(keys.length);
long[] words = get(keys, sizes[0]);
return getByteValues(sizes, words);
}
private byte[] getByteValues(int[] sizes, long[] words) {
byte[] bytes = new byte[sizes[1]];
int l = sizes[1] - 1;
for (int i = 0; i < words.length; i++) {
long val = words[i];
for (int j = BYTE_LENGTH - 1; j > 0 && l > 0; j--) {
bytes[l] = (byte) val;
val >>>= 8;
l--;
}
bytes[l] = (byte) val;
l--;
}
return bytes;
}
protected long[] get(int[] keys, int wsize) {
long[] words = new long[wsize];
int ll = 0;
int index = 0;
int pos = 0;
int nextIndex = 0;
for (int i = keys.length - 1; i >= 0; i--) {
long val = keys[i];
index = ll >> 6; // divide by 64 to get the new word index
pos = ll & 0x3f; // to ignore sign bit and consider the remaining
// val = val & controlBits;
long mask = (val << pos);
long word = words[index];
words[index] = (word | mask);
ll += bitsLength;
nextIndex = ll >> 6; // This is divide by 64
if (nextIndex != index) {
int consideredBits = bitsLength - ll & 0x3f;
// Check for spill over only if all the bits are not considered
if (consideredBits < bitsLength) {
// Check for spill over
mask = (val >> (bitsLength - ll & 0x3f));
words[nextIndex] |= mask;
}
}
}
return words;
}
public int[] unCompress(byte[] key, int offset, int length) {
int arrayLength = (length * BYTE_LENGTH) / bitsLength;
long[] words = new long[getWordsSizeFromBytesSize(length)];
unCompressVal(key, length, words, offset);
return getArray(words, arrayLength);
}
private void unCompressVal(byte[] key, int ls, long[] words, int offset) {
for (int i = 0; i < words.length; i++) {
long l = 0;
ls -= BYTE_LENGTH;
int m = 0;
if (ls < 0) {
m = ls + BYTE_LENGTH;
ls = 0;
} else {
m = ls + BYTE_LENGTH;
}
for (int j = ls; j < m; j++) {
l <<= BYTE_LENGTH;
l ^= key[offset + j] & 0xFF;
}
words[i] = l;
}
}
private int[] getArray(long[] words, int arrayLength) {
int[] vals = new int[arrayLength];
int ll = 0;
long globalMask = LONG_MAX >>> (MAX_LENGTH - bitsLength);
for (int i = arrayLength - 1; i >= 0; i--) {
int index = ll >> 6;
int pos = ll & 0x3f;
long val = words[index];
long mask = globalMask << pos;
long value = (val & mask) >>> pos;
ll += bitsLength;
int nextIndex = ll >> 6;
if (nextIndex != index) {
pos = ll & 0x3f;
// Number of bits pending for current key is zero, no spill over
if (pos != 0) {
mask = (LONG_MAX >>> (MAX_LENGTH - pos));
val = words[nextIndex];
value = value | ((val & mask) << (bitsLength - pos));
}
}
vals[i] = (int) value;
}
return vals;
}
private int[] getWordsAndByteSize(int arrayLength) {
int length = arrayLength * bitsLength;
int wsize = length / LONG_LENGTH;
int byteSize = length / BYTE_LENGTH;
if (length % LONG_LENGTH != 0) {
wsize++;
}
if (length % BYTE_LENGTH != 0) {
byteSize++;
}
return new int[] { wsize, byteSize };
}
private int getWordsSizeFromBytesSize(int byteSize) {
int wsize = byteSize / BYTE_LENGTH;
if (byteSize % BYTE_LENGTH != 0) {
wsize++;
}
return wsize;
}
}