blob: 4bc85ae5a1a1b93d238cb436c58a3e746710506c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.theta;
import static org.apache.datasketches.QuickSelect.selectExcludingZeros;
import static org.apache.datasketches.theta.PreambleUtil.LG_ARR_LONGS_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.extractCurCount;
import static org.apache.datasketches.theta.PreambleUtil.extractLgArrLongs;
import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong;
import static org.apache.datasketches.theta.PreambleUtil.insertCurCount;
import static org.apache.datasketches.theta.PreambleUtil.insertLgArrLongs;
import static org.apache.datasketches.theta.PreambleUtil.insertThetaLong;
import org.apache.datasketches.HashOperations;
import org.apache.datasketches.Util;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
/**
* Handles common resize, rebuild and move operations.
* The Memory based operations assume a specific data structure that is unique to the theta sketches.
*
* @author Lee Rhodes
*/
final class Rebuilder {
private Rebuilder() {}
/**
* Rebuild the hashTable in the given Memory at its current size. Changes theta and thus count.
* This assumes a Memory preamble of standard form with correct values of curCount and thetaLong.
* ThetaLong and curCount will change.
* Afterwards, caller must update local class members curCount and thetaLong from Memory.
*
* @param mem the Memory the given Memory
* @param preambleLongs size of preamble in longs
* @param lgNomLongs the log_base2 of k, the configuration parameter of the sketch
*/
static final void quickSelectAndRebuild(final WritableMemory mem, final int preambleLongs,
final int lgNomLongs) {
//Note: This copies the Memory data onto the heap and then at the end copies the result
// back to Memory. Even if we tried to do this directly into Memory it would require pre-clearing,
// and the internal loops would be slower. The bulk copies are performed at a low level and
// are quite fast. Measurements reveal that we are not paying much of a penalty.
//Pull data into tmp arr for QS algo
final int lgArrLongs = extractLgArrLongs(mem);
final int curCount = extractCurCount(mem);
final int arrLongs = 1 << lgArrLongs;
final long[] tmpArr = new long[arrLongs];
final int preBytes = preambleLongs << 3;
mem.getLongArray(preBytes, tmpArr, 0, arrLongs); //copy mem data to tmpArr
//Do the QuickSelect on a tmp arr to create new thetaLong
final int pivot = (1 << lgNomLongs) + 1; // (K+1) pivot for QS
final long newThetaLong = selectExcludingZeros(tmpArr, curCount, pivot);
insertThetaLong(mem, newThetaLong); //UPDATE thetalong
//Rebuild to clean up dirty data, update count
final long[] tgtArr = new long[arrLongs];
final int newCurCount =
HashOperations.hashArrayInsert(tmpArr, tgtArr, lgArrLongs, newThetaLong);
insertCurCount(mem, newCurCount); //UPDATE curCount
//put the rebuilt array back into memory
mem.putLongArray(preBytes, tgtArr, 0, arrLongs);
}
/**
* Moves me (the entire sketch) to a new larger Memory location and rebuilds the hash table.
* This assumes a Memory preamble of standard form with the correct value of thetaLong.
* Afterwards, the caller must update the local Memory reference, lgArrLongs
* and hashTableThreshold from the dstMemory and free the source Memory.
*
* @param srcMem the source Memory
* @param preambleLongs size of preamble in longs
* @param srcLgArrLongs size (log_base2) of source hash table
* @param dstMem the destination Memory, which may be garbage
* @param dstLgArrLongs the destination hash table target size
* @param thetaLong theta as a long
*/
static final void moveAndResize(final Memory srcMem, final int preambleLongs,
final int srcLgArrLongs, final WritableMemory dstMem, final int dstLgArrLongs, final long thetaLong) {
//Note: This copies the Memory data onto the heap and then at the end copies the result
// back to Memory. Even if we tried to do this directly into Memory it would require pre-clearing,
// and the internal loops would be slower. The bulk copies are performed at a low level and
// are quite fast. Measurements reveal that we are not paying much of a penalty.
//Move Preamble to destination memory
final int preBytes = preambleLongs << 3;
srcMem.copyTo(0, dstMem, 0, preBytes); //copy the preamble
//Bulk copy source to on-heap buffer
final int srcHTLen = 1 << srcLgArrLongs;
final long[] srcHTArr = new long[srcHTLen];
srcMem.getLongArray(preBytes, srcHTArr, 0, srcHTLen);
//Create destination buffer
final int dstHTLen = 1 << dstLgArrLongs;
final long[] dstHTArr = new long[dstHTLen];
//Rebuild hash table in destination buffer
HashOperations.hashArrayInsert(srcHTArr, dstHTArr, dstLgArrLongs, thetaLong);
//Bulk copy to destination memory
dstMem.putLongArray(preBytes, dstHTArr, 0, dstHTLen);
dstMem.putByte(LG_ARR_LONGS_BYTE, (byte)dstLgArrLongs); //update in dstMem
}
/**
* Resizes existing hash array into a larger one within a single Memory assuming enough space.
* This assumes a Memory preamble of standard form with the correct value of thetaLong.
* The Memory lgArrLongs will change.
* Afterwards, the caller must update local copies of lgArrLongs and hashTableThreshold from
* Memory.
*
* @param mem the Memory
* @param preambleLongs the size of the preamble in longs
* @param srcLgArrLongs the size of the source hash table
* @param tgtLgArrLongs the LgArrLongs value for the new hash table
*/
static final void resize(final WritableMemory mem, final int preambleLongs,
final int srcLgArrLongs, final int tgtLgArrLongs) {
//Note: This copies the Memory data onto the heap and then at the end copies the result
// back to Memory. Even if we tried to do this directly into Memory it would require pre-clearing,
// and the internal loops would be slower. The bulk copies are performed at a low level and
// are quite fast. Measurements reveal that we are not paying much of a penalty.
//Preamble stays in place
final int preBytes = preambleLongs << 3;
//Bulk copy source to on-heap buffer
final int srcHTLen = 1 << srcLgArrLongs; //current value
final long[] srcHTArr = new long[srcHTLen]; //on-heap src buffer
mem.getLongArray(preBytes, srcHTArr, 0, srcHTLen);
//Create destination on-heap buffer
final int dstHTLen = 1 << tgtLgArrLongs;
final long[] dstHTArr = new long[dstHTLen]; //on-heap dst buffer
//Rebuild hash table in destination buffer
final long thetaLong = extractThetaLong(mem);
HashOperations.hashArrayInsert(srcHTArr, dstHTArr, tgtLgArrLongs, thetaLong);
//Bulk copy to destination memory
mem.putLongArray(preBytes, dstHTArr, 0, dstHTLen); //put it back, no need to clear
insertLgArrLongs(mem, tgtLgArrLongs); //update in mem
}
/**
* Returns the actual log2 Resize Factor that can be used to grow the hash table. This will be
* an integer value between zero and the given lgRF, inclusive;
* @param capBytes the current memory capacity in bytes
* @param lgArrLongs the current lg hash table size in longs
* @param preLongs the current preamble size in longs
* @param lgRF the configured lg Resize Factor
* @return the actual log2 Resize Factor that can be used to grow the hash table
*/
static final int actLgResizeFactor(final long capBytes, final int lgArrLongs, final int preLongs,
final int lgRF) {
final int maxHTLongs = Util.floorPowerOf2(((int)(capBytes >>> 3) - preLongs));
final int lgFactor = Math.max(Integer.numberOfTrailingZeros(maxHTLongs) - lgArrLongs, 0);
return (lgFactor >= lgRF) ? lgRF : lgFactor;
}
}