blob: 9da0b5639fbf1337be1e6fdfd28d181fe95ce554 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.theta;
import static org.apache.datasketches.theta.PreambleUtil.checkMemorySeedHash;
import static org.apache.datasketches.theta.PreambleUtil.extractCurCount;
import static org.apache.datasketches.theta.PreambleUtil.extractFamilyID;
import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;
import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.Util;
import org.apache.datasketches.memory.Memory;
/**
* Used to convert older serialization versions 1 and 2 to version 3. The Serialization
* Version is the version of the sketch binary image format and should not be confused with the
* version number of the Open Source DataSketches Library.
*
* @author Lee Rhodes
*/
final class ForwardCompatibility {
/**
* Convert a serialization version (SerVer) 1 sketch (~Feb 2014) to a SerVer 3 sketch.
* Note: SerVer 1 sketches always have (metadata) preamble-longs of 3 and are always stored
* in a compact ordered form, but with 3 different sketch types. All SerVer 1 sketches will
* be converted to a SerVer 3 sketches. There is no concept of p-sampling, no empty bit.
*
* @param srcMem the image of a SerVer 1 sketch
*
* @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
* The seed used for building the sketch image in srcMem.
* Note: SerVer 1 sketches do not have the concept of the SeedHash, so the seed provided here
* MUST be the actual seed that was used when the SerVer 1 sketches were built.
* @return a SerVer 3 {@link CompactSketch}.
*/
static final CompactSketch heapify1to3(final Memory srcMem, final long seed) {
final short seedHash = Util.computeSeedHash(seed);
return heapify1to3(srcMem, seedHash);
}
/**
* Convert a serialization version (SerVer) 1 sketch (~Feb 2014) to a SerVer 3 sketch.
* Note: SerVer 1 sketches always have (metadata) preamble-longs of 3 and are always stored
* in a compact ordered form, but with 3 different sketch types. All SerVer 1 sketches will
* be converted to a SerVer 3 sketches. There is no concept of p-sampling, no empty bit.
*
* @param srcMem the image of a SerVer 1 sketch
*
* @param seedHash <a href="{@docRoot}/resources/dictionary.html#seedHash">See Seed Hash</a>.
* The seedHash that matches the seedHash of the original seed used to construct the sketch.
* Note: SerVer 1 sketches do not have the concept of the SeedHash, so the seedHash provided here
* MUST be derived from the actual seed that was used when the SerVer 1 sketches were built.
* @return a SerVer 3 {@link CompactSketch}.
*/
static final CompactSketch heapify1to3(final Memory srcMem, final short seedHash) {
final int memCap = (int) srcMem.getCapacity();
final int preLongs = extractPreLongs(srcMem); //always 3 for serVer 1
if (preLongs != 3) {
throw new SketchesArgumentException("PreLongs must be 3 for SerVer 1: " + preLongs);
}
final int familyId = extractFamilyID(srcMem); //1,2,3,4
if ((familyId < 1) || (familyId > 3)) {
throw new SketchesArgumentException("Family (Sketch Type) must be 1 to 3: " + familyId);
}
final int curCount = extractCurCount(srcMem);
final long thetaLong = extractThetaLong(srcMem);
final boolean empty = (curCount == 0) && (thetaLong == Long.MAX_VALUE);
if (empty || (memCap <= 24)) { //return empty
return EmptyCompactSketch.getInstance();
}
final int reqCap = (curCount + preLongs) << 3;
validateInputSize(reqCap, memCap);
if ((thetaLong == Long.MAX_VALUE) && (curCount == 1)) {
final long hash = srcMem.getLong(preLongs << 3);
return new SingleItemSketch(hash, seedHash);
}
//theta < 1.0 and/or curCount > 1
final long[] compactOrderedCache = new long[curCount];
srcMem.getLongArray(preLongs << 3, compactOrderedCache, 0, curCount);
return new HeapCompactSketch(compactOrderedCache, false, seedHash, curCount, thetaLong, true);
}
/**
* Convert a serialization version (SerVer) 2 sketch to a SerVer 3 HeapCompactOrderedSketch.
* Note: SerVer 2 sketches can have metadata-longs of 1,2 or 3 and are always stored
* in a compact ordered form (not as a hash table), but with 4 different sketch types.
* @param srcMem the image of a SerVer 2 sketch
* @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
* The seed used for building the sketch image in srcMem
* @return a SerVer 3 HeapCompactOrderedSketch
*/
static final CompactSketch heapify2to3(final Memory srcMem, final long seed) {
final short seedHash = checkMemorySeedHash(srcMem, seed);
final int memCap = (int) srcMem.getCapacity();
final int preLongs = extractPreLongs(srcMem); //1,2 or 3
final int familyId = extractFamilyID(srcMem); //1,2,3,4
if ((familyId < 1) || (familyId > 4)) {
throw new SketchesArgumentException("Family (Sketch Type) must be 1 to 4: " + familyId);
}
int reqBytesIn = 8;
int curCount = 0;
long thetaLong = Long.MAX_VALUE;
if (preLongs == 1) {
reqBytesIn = 8;
validateInputSize(reqBytesIn, memCap);
return EmptyCompactSketch.getInstance();
}
if (preLongs == 2) { //includes pre0 + count, no theta (== 1.0)
reqBytesIn = preLongs << 3;
validateInputSize(reqBytesIn, memCap);
curCount = extractCurCount(srcMem);
if (curCount == 0) {
return EmptyCompactSketch.getInstance();
}
if (curCount == 1) {
reqBytesIn = (preLongs + 1) << 3;
validateInputSize(reqBytesIn, memCap);
final long hash = srcMem.getLong(preLongs << 3);
return new SingleItemSketch(hash, seed);
}
//curCount > 1
reqBytesIn = (curCount + preLongs) << 3;
validateInputSize(reqBytesIn, memCap);
final long[] compactOrderedCache = new long[curCount];
srcMem.getLongArray(preLongs << 3, compactOrderedCache, 0, curCount);
return new HeapCompactSketch(compactOrderedCache, false, seedHash, curCount, thetaLong,true);
}
if (preLongs == 3) { //pre0 + count + theta
reqBytesIn = (preLongs) << 3; //
validateInputSize(reqBytesIn, memCap);
curCount = extractCurCount(srcMem);
thetaLong = extractThetaLong(srcMem);
if ((curCount == 0) && (thetaLong == Long.MAX_VALUE)) {
return EmptyCompactSketch.getInstance();
}
if ((curCount == 1) && (thetaLong == Long.MAX_VALUE)) {
reqBytesIn = (preLongs + 1) << 3;
validateInputSize(reqBytesIn, memCap);
final long hash = srcMem.getLong(preLongs << 3);
return new SingleItemSketch(hash, seed);
}
//curCount > 1 and/or theta < 1.0
reqBytesIn = (curCount + preLongs) << 3;
validateInputSize(reqBytesIn, memCap);
final long[] compactOrderedCache = new long[curCount];
srcMem.getLongArray(preLongs << 3, compactOrderedCache, 0, curCount);
return new HeapCompactSketch(compactOrderedCache, false, seedHash, curCount, thetaLong, true);
}
throw new SketchesArgumentException("PreLongs must be 1,2, or 3: " + preLongs);
}
private static final void validateInputSize(final int reqBytesIn, final int memCap) {
if (reqBytesIn > memCap) {
throw new SketchesArgumentException(
"Input Memory or byte[] size is too small: Required Bytes: " + reqBytesIn
+ ", bytesIn: " + memCap);
}
}
}