blob: 56a6929af2f1187a30c6eaba068a2d43c8bdbb40 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.theta;
import static org.apache.datasketches.theta.PreambleUtil.COMPACT_FLAG_MASK;
import static org.apache.datasketches.theta.PreambleUtil.ORDERED_FLAG_MASK;
import static org.apache.datasketches.theta.PreambleUtil.READ_ONLY_FLAG_MASK;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
import org.testng.annotations.Test;
/**
* Empty essentially means that the sketch has never seen data.
*
* @author Lee Rhodes
*/
@SuppressWarnings("javadoc")
public class EmptyTest {
@Test
public void checkEmpty() {
final UpdateSketch sk1 = Sketches.updateSketchBuilder().build();
final UpdateSketch sk2 = Sketches.updateSketchBuilder().build();
final Intersection inter = Sketches.setOperationBuilder().buildIntersection();
final int u = 100;
for (int i = 0; i < u; i++) { //disjoint
sk1.update(i);
sk2.update(i + u);
}
inter.intersect(sk1);
inter.intersect(sk2);
final CompactSketch csk = inter.getResult();
//The intersection of two disjoint, exact-mode sketches is empty, T == 1.0.
println(csk.toString());
assertTrue(csk.isEmpty());
final AnotB aNotB = Sketches.setOperationBuilder().buildANotB();
final CompactSketch csk2 = aNotB.aNotB(csk, sk1);
//The AnotB of an empty, T == 1.0 sketch with another exact-mode sketch is empty, T == 1.0
assertTrue(csk2.isEmpty());
}
@Test
public void checkNotEmpty() {
final UpdateSketch sk1 = Sketches.updateSketchBuilder().build();
final UpdateSketch sk2 = Sketches.updateSketchBuilder().build();
final Intersection inter = Sketches.setOperationBuilder().buildIntersection();
final int u = 10000; //estimating
for (int i = 0; i < u; i++) { //disjoint
sk1.update(i);
sk2.update(i + u);
}
inter.intersect(sk1);
inter.intersect(sk2);
final CompactSketch csk = inter.getResult();
println(csk.toString());
//The intersection of two disjoint, est-mode sketches is not-empty, T < 1.0.
assertFalse(csk.isEmpty());
AnotB aNotB = Sketches.setOperationBuilder().buildANotB();
final CompactSketch csk2 = aNotB.aNotB(csk, sk1); //empty, T < 1.0; with est-mode sketch
println(csk2.toString());
//The AnotB of an empty, T < 1.0 sketch with another exact-mode sketch is not-empty.
assertFalse(csk2.isEmpty());
final UpdateSketch sk3 = Sketches.updateSketchBuilder().build();
aNotB = Sketches.setOperationBuilder().buildANotB();
final CompactSketch csk3 = aNotB.aNotB(sk3, sk1); //empty, T == 1.0; with est-mode sketch
println(csk3.toString());
//the AnotB of an empty, T == 1.0 sketch with another est-mode sketch is empty, T < 1.0
assertTrue(csk3.isEmpty());
}
@Test
public void checkPsampling() {
final UpdateSketch sk1 = Sketches.updateSketchBuilder().setP(.5F).build();
assertTrue(sk1.isEmpty());
//An empty P-sampling sketch where T < 1.0 and has never seen data is also empty
// and will have a full preamble of 24 bytes. But when compacted, theta returns to 1.0, so
// it will be stored as only 8 bytes.
assertEquals(sk1.compact().toByteArray().length, 8);
}
//These 3 tests reproduce a failure mode where an "old" empty sketch of 8 bytes without
// its empty-flag bit set is read.
@Test
public void checkBackwardCompatibility1() {
final int k = 16;
final int bytes = Sketches.getMaxUnionBytes(k); //288
final Union union = SetOperation.builder().buildUnion(WritableMemory.allocate(bytes));
final Memory mem = badEmptySk();
final Sketch wsk = Sketches.wrapSketch(mem);
union.union(wsk); //union has memory
}
@Test
public void checkBackwardCompatibility2() {
final Union union = SetOperation.builder().setNominalEntries(16).buildUnion();
final Memory mem = badEmptySk();
final Sketch wsk = Sketches.wrapSketch(mem);
union.union(wsk); //heap union
}
@Test
public void checkBackwardCompatibility3() {
final Memory mem = badEmptySk();
Sketches.heapifySketch(mem);
}
@Test
public void checkEmptyToCompact() {
final UpdateSketch sk1 = Sketches.updateSketchBuilder().build();
final CompactSketch csk = sk1.compact();
assertTrue(csk instanceof EmptyCompactSketch);
final CompactSketch csk2 = csk.compact();
assertTrue(csk2 instanceof EmptyCompactSketch);
final CompactSketch csk3 = csk.compact(true, WritableMemory.allocate(8));
assertTrue(csk3 instanceof DirectCompactSketch);
assertEquals(csk2.getCurrentPreambleLongs(), 1);
}
//SerVer 2 had an empty sketch where preLongs = 1, but empty bit was not set.
private static Memory badEmptySk() {
final long preLongs = 1;
final long serVer = 2;
final long family = 3; //compact
final long flags = ORDERED_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK;
final long seedHash = 0x93CC;
final long badEmptySk = seedHash << 48 | flags << 40
| family << 16 | serVer << 8 | preLongs;
final WritableMemory wmem = WritableMemory.allocate(8);
wmem.putLong(0, badEmptySk);
return wmem;
}
/**
* @param s value to print
*/
static void println(final String s) {
//System.out.println(s); //disable here
}
}