blob: 1d5f8bc19b4872045d0b3bfd59c14777d0a59b50 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.frequencies;
import static org.apache.datasketches.Util.LS;
import static org.apache.datasketches.Util.zeroPad;
import org.apache.datasketches.Family;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.memory.Memory;
// @formatter:off
/**
* This class defines the preamble data structure and provides basic utilities for some of the key
* fields.
* <p>
* The intent of the design of this class was to isolate the detailed knowledge of the bit and byte
* layout of the serialized form of the sketches derived from the Sketch class into one place. This
* allows the possibility of the introduction of different serialization schemes with minimal impact
* on the rest of the library.
* </p>
*
* <p>
* MAP: Low significance bytes of this <i>long</i> data structure are on the right. However, the
* multi-byte integers (<i>int</i> and <i>long</i>) are stored in native byte order. The <i>byte</i>
* values are treated as unsigned.
* </p>
*
* <p>
* An empty FrequentItems only requires 8 bytes. All others require 32 bytes of preamble.
* </p>
*
* <pre>
* * Long || Start Byte Adr:
* Adr:
* || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
* 0 ||-------SerDeId-----|-Flags--|-LgCur--| LgMax | FamID | SerVer | PreambleLongs |
* || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 |
* 1 ||------------(unused)-----------------|--------ActiveItems------------------------|
* || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 |
* 2 ||-----------------------------------streamLength----------------------------------|
* || 31 | 30 | 29 | 28 | 27 | 26 | 25 | 24 |
* 3 ||---------------------------------offset------------------------------------------|
* || 39 | 38 | 37 | 36 | 35 | 34 | 33 | 32 |
* 5 ||----------start of values buffer, followed by keys buffer------------------------|
* </pre>
*
* @author Lee Rhodes
*/
final class PreambleUtil {
private PreambleUtil() {}
// ###### DO NOT MESS WITH THIS FROM HERE ...
// Preamble byte Addresses
static final int PREAMBLE_LONGS_BYTE = 0; // either 1 or 4
static final int SER_VER_BYTE = 1;
static final int FAMILY_BYTE = 2;
static final int LG_MAX_MAP_SIZE_BYTE = 3;
static final int LG_CUR_MAP_SIZE_BYTE = 4;
static final int FLAGS_BYTE = 5;
static final int SER_DE_ID_SHORT = 6; // to 7
static final int ACTIVE_ITEMS_INT = 8; // to 11 : 0 to 4 in pre1
static final int STREAMLENGTH_LONG = 16; // to 23 : pre2
static final int OFFSET_LONG = 24; // to 31 : pre3
// flag bit masks
static final int EMPTY_FLAG_MASK = 4;
// Specific values for this implementation
static final int SER_VER = 1;
/**
* Returns a human readable string summary of the preamble state of the given Memory.
* Note: other than making sure that the given Memory size is large
* enough for just the preamble, this does not do much value checking of the contents of the
* preamble as this is primarily a tool for debugging the preamble visually.
*
* @param srcMem the given Memory.
* @return the summary preamble string.
*/
public static String preambleToString(final Memory srcMem) {
final long pre0 = checkPreambleSize(srcMem); //make sure we can get the assumed preamble
final int preLongs = extractPreLongs(pre0); //byte 0
final int serVer = extractSerVer(pre0); //byte 1
final Family family = Family.idToFamily(extractFamilyID(pre0)); //byte 2
final int lgMaxMapSize = extractLgMaxMapSize(pre0); //byte 3
final int lgCurMapSize = extractLgCurMapSize(pre0); //byte 4
final int flags = extractFlags(pre0); //byte 5
final int type = extractSerDeId(pre0); //byte 6
final String flagsStr = zeroPad(Integer.toBinaryString(flags), 8) + ", " + (flags);
final boolean empty = (flags & EMPTY_FLAG_MASK) > 0;
final int maxMapSize = 1 << lgMaxMapSize;
final int curMapSize = 1 << lgCurMapSize;
final int maxPreLongs = Family.FREQUENCY.getMaxPreLongs();
//Assumed if preLongs == 1
int activeItems = 0;
long streamLength = 0;
long offset = 0;
//Assumed if preLongs == maxPreLongs
if (preLongs == maxPreLongs) {
//get full preamble
final long[] preArr = new long[preLongs];
srcMem.getLongArray(0, preArr, 0, preLongs);
activeItems = extractActiveItems(preArr[1]);
streamLength = preArr[2];
offset = preArr[3];
}
final StringBuilder sb = new StringBuilder();
sb.append(LS)
.append("### FREQUENCY SKETCH PREAMBLE SUMMARY:").append(LS)
.append("Byte 0: Preamble Longs : ").append(preLongs).append(LS)
.append("Byte 1: Serialization Version: ").append(serVer).append(LS)
.append("Byte 2: Family : ").append(family.toString()).append(LS)
.append("Byte 3: MaxMapSize : ").append(maxMapSize).append(LS)
.append("Byte 4: CurMapSize : ").append(curMapSize).append(LS)
.append("Byte 5: Flags Field : ").append(flagsStr).append(LS)
.append(" EMPTY : ").append(empty).append(LS)
.append("Byte 6: Freq Sketch Type : ").append(type).append(LS);
if (preLongs == 1) {
sb.append(" --ABSENT, ASSUMED:").append(LS);
} else { //preLongs == maxPreLongs
sb.append("Bytes 8-11 : ActiveItems : ").append(activeItems).append(LS);
sb.append("Bytes 16-23: StreamLength : ").append(streamLength).append(LS)
.append("Bytes 24-31: Offset : ").append(offset).append(LS);
}
sb.append( "Preamble Bytes : ").append(preLongs * 8).append(LS);
sb.append( "TOTAL Sketch Bytes : ").append((preLongs + (activeItems * 2)) << 3)
.append(LS)
.append("### END FREQUENCY SKETCH PREAMBLE SUMMARY").append(LS);
return sb.toString();
}
// @formatter:on
static int extractPreLongs(final long pre0) { //Byte 0
final long mask = 0X3FL; //Lower 6 bits
return (int) (pre0 & mask);
}
static int extractSerVer(final long pre0) { //Byte 1
final int shift = SER_VER_BYTE << 3;
final long mask = 0XFFL;
return (int) ((pre0 >>> shift) & mask);
}
static int extractFamilyID(final long pre0) { //Byte 2
final int shift = FAMILY_BYTE << 3;
final long mask = 0XFFL;
return (int) ((pre0 >>> shift) & mask);
}
static int extractLgMaxMapSize(final long pre0) { //Byte 3
final int shift = LG_MAX_MAP_SIZE_BYTE << 3;
final long mask = 0XFFL;
return (int) ((pre0 >>> shift) & mask);
}
static int extractLgCurMapSize(final long pre0) { //Byte 4
final int shift = LG_CUR_MAP_SIZE_BYTE << 3;
final long mask = 0XFFL;
return (int) ((pre0 >>> shift) & mask);
}
static int extractFlags(final long pre0) { //Byte 5
final int shift = FLAGS_BYTE << 3;
final long mask = 0XFFL;
return (int) ((pre0 >>> shift) & mask);
}
static short extractSerDeId(final long pre0) { //Byte 6,7
final int shift = SER_DE_ID_SHORT << 3;
final long mask = 0XFFFFL;
return (short) ((pre0 >>> shift) & mask);
}
static int extractActiveItems(final long pre1) { //Bytes 8 to 11
final long mask = 0XFFFFFFFFL;
return (int) (pre1 & mask) ;
}
static long insertPreLongs(final int preLongs, final long pre0) { //Byte 0
final long mask = 0X3FL; //Lower 6 bits
return (preLongs & mask) | (~mask & pre0);
}
static long insertSerVer(final int serVer, final long pre0) { //Byte 1
final int shift = SER_VER_BYTE << 3;
final long mask = 0XFFL;
return ((serVer & mask) << shift) | (~(mask << shift) & pre0);
}
static long insertFamilyID(final int familyID, final long pre0) { //Byte 2
final int shift = FAMILY_BYTE << 3;
final long mask = 0XFFL;
return ((familyID & mask) << shift) | (~(mask << shift) & pre0);
}
static long insertLgMaxMapSize(final int lgMaxMapSize, final long pre0) { //Byte 3
final int shift = LG_MAX_MAP_SIZE_BYTE << 3;
final long mask = 0XFFL;
return ((lgMaxMapSize & mask) << shift) | (~(mask << shift) & pre0);
}
static long insertLgCurMapSize(final int lgCurMapSize, final long pre0) { //Byte 4
final int shift = LG_CUR_MAP_SIZE_BYTE << 3;
final long mask = 0XFFL;
return ((lgCurMapSize & mask) << shift) | (~(mask << shift) & pre0);
}
static long insertFlags(final int flags, final long pre0) { //Byte 5
final int shift = FLAGS_BYTE << 3;
final long mask = 0XFFL;
return ((flags & mask) << shift) | (~(mask << shift) & pre0);
}
static long insertSerDeId(final short serDeId, final long pre0) { //Byte 6,7
final int shift = SER_DE_ID_SHORT << 3;
final long mask = 0XFFFFL;
return ((serDeId & mask) << shift) | (~(mask << shift) & pre0);
}
static long insertActiveItems(final int activeItems, final long pre1) { //Bytes 8 to 11
final long mask = 0XFFFFFFFFL;
return (activeItems & mask) | (~mask & pre1);
}
/**
* Checks Memory for capacity to hold the preamble and returns the first 8 bytes.
* @param mem the given Memory
* @return the first 8 bytes of preamble as a long.
*/
static long checkPreambleSize(final Memory mem) {
final long cap = mem.getCapacity();
if (cap < 8) { throwNotBigEnough(cap, 8); }
final long pre0 = mem.getLong(0);
final int preLongs = (int) (pre0 & 0X3FL); //lower 6 bits
final int required = Math.max(preLongs << 3, 8);
if (cap < required) { throwNotBigEnough(cap, required); }
return pre0;
}
private static void throwNotBigEnough(final long cap, final int required) {
throw new SketchesArgumentException(
"Possible Corruption: "
+ "Size of byte array or Memory not large enough for Preamble: Size: " + cap
+ ", Required: " + required);
}
}