blob: a8ab57f0ff47c8e84477cafdccbc1c32f327dfa0 [file] [log] [blame]
/*
* Copyright 2015, Yahoo! Inc.
* Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
*/
package com.yahoo.sketches.theta;
import static com.yahoo.sketches.theta.Sketch.getMaxUpdateSketchBytes;
import static com.yahoo.sketches.theta.SetOperation.*;
import static com.yahoo.sketches.Family.A_NOT_B;
import static com.yahoo.sketches.Family.INTERSECTION;
import static com.yahoo.sketches.Family.UNION;
import static com.yahoo.sketches.ResizeFactor.X4;
import static org.testng.Assert.assertEquals;
//import static org.testng.Assert.assertTrue;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
import java.nio.ByteBuffer;
import org.testng.annotations.Test;
import com.yahoo.sketches.Family;
import com.yahoo.sketches.ResizeFactor;
import com.yahoo.sketches.theta.Sketch;
import com.yahoo.sketches.memory.Memory;
import com.yahoo.sketches.memory.MemoryRegion;
import com.yahoo.sketches.memory.NativeMemory;
/**
* @author Lee Rhodes
*/
public class SetOperationTest {
@Test
public void checkBuilder() {
int k = 2048;
long seed = 1021;
UpdateSketch usk1 = UpdateSketch.builder().setSeed(seed).build(k);
UpdateSketch usk2 = UpdateSketch.builder().setSeed(seed).build(k);
for (int i=0; i<k/2; i++) usk1.update(i); //256
for (int i=k/2; i<k; i++) usk2.update(i); //256 no overlap
ResizeFactor rf = X4;
//use default size
Union union = SetOperation.builder().setSeed(seed).setResizeFactor(rf).buildUnion();
union.update(usk1);
union.update(usk2);
double exactUnionAnswer = k;
CompactSketch comp1 = union.getResult(false, null); //ordered: false
double compEst = comp1.getEstimate();
assertEquals(compEst, exactUnionAnswer, 0.0);
}
@Test
public void checkBuilder2() {
SetOperationBuilder bldr = SetOperation.builder();
long seed = 12345L;
bldr.setSeed(seed);
assertEquals(seed, bldr.getSeed());
float p = (float)0.5;
bldr.setP(p);
assertEquals(p, bldr.getP());
ResizeFactor rf = ResizeFactor.X4;
bldr.setResizeFactor(rf);
assertEquals(rf, bldr.getResizeFactor());
Memory mem = new NativeMemory(new byte[16]);
bldr.initMemory(mem);
assertEquals(mem, bldr.getMemory());
int lgK = 10;
int k = 1 << lgK;
bldr.setNominalEntries(k);
assertEquals(lgK, bldr.getLgNominalEntries());
println(bldr.toString());
}
@Test
public void checkBuilderNonPowerOf2() {
SetOperation.builder().buildUnion(1000);
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void checkBuilderBadFamily() {
SetOperation.builder().build(Family.ALPHA);
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void checkBuilderIllegalPhi() {
float p = (float)1.5;
SetOperation.builder().setP(p).buildUnion();
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void checkBuilderIllegalPlo() {
float p = 0;
SetOperation.builder().setP(p).buildUnion();
}
@Test
public void checkBuilderValidP() {
float p = (float).5;
SetOperation.builder().setP(p).buildUnion();
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void checkBuilderAnotB_noMem() {
Memory mem = new NativeMemory(new byte[64]);
SetOperation.builder().initMemory(mem).buildANotB();
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void checkBuilderBadSeedHashes() {
int k = 2048;
long seed = 1021;
UpdateSketch usk1 = UpdateSketch.builder().setSeed(seed).build(k);
UpdateSketch usk2 = UpdateSketch.builder().build(k);
for (int i=0; i<k/2; i++) usk1.update(i); //256
for (int i=k/2; i<k; i++) usk2.update(i); //256 no overlap
ResizeFactor rf = X4;
Union union = SetOperation.builder().setSeed(seed).setResizeFactor(rf).buildUnion(k);
union.update(usk1);
union.update(usk2); //throws seed exception here
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void checkIllegalSetOpHeapify() {
int k = 64;
UpdateSketch usk1 = UpdateSketch.builder().build(k);
for (int i=0; i<k; i++) usk1.update(i); //64
byte[] byteArray = usk1.toByteArray();
Memory mem = new NativeMemory(byteArray);
SetOperation.heapify(mem);
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void checkIllegalSetOpWrap() {
int k = 64;
UpdateSketch usk1 = UpdateSketch.builder().build(k);
for (int i=0; i<k; i++) usk1.update(i); //64
byte[] byteArray = usk1.toByteArray();
Memory mem = new NativeMemory(byteArray);
SetOperation.wrap(mem);
}
@Test
public void checkBuildSetOps() {
SetOperationBuilder bldr = Sketches.setOperationBuilder();
bldr.buildUnion();
bldr.buildIntersection();
bldr.buildANotB();
}
@Test
public void checkComputeLgArrLongs() {
assertEquals(computeMinLgArrLongsFromCount(30), 5);
assertEquals(computeMinLgArrLongsFromCount(31), 6);
}
/**
* The objective is to union 3 16K sketches into a union SetOperation and get the result.
* All operations are to be performed within a single direct ByteBuffer as the backing store.
* First we will make the union size large enough so that its answer will be exact (with this
* specific example).
* <p> Next, we recover the Union SetOp and the 3 sketches and the space for the result. Then
* recompute the union using a Union of the same size as the input sketches, where the end result
* will be an estimate.
*/
@Test
public void checkDirectUnionExample() {
//The first task is to compute how much direct memory we need and set the heap large enough.
//For the first trial, we will set the Union large enough for an exact result for THIS example.
int sketchNomEntries = 1 << 14; //16K
int unionNomEntries = 1 << 15; //32K
int[] heapLayout = getHeapLayout(sketchNomEntries, unionNomEntries);
//This BB belongs to you and you always retain a link to it until you are completely
// done and then let java garbage collect it.
//I use a heap backing array, because for this example it is easier to peak into it and
// see what is going on.
byte[] backingArr = new byte[heapLayout[5]];
ByteBuffer heapBuf = ByteBuffer.wrap(backingArr);
// Attaches a NM object to the underlying memory of heapBuf.
// heapMem will have a Read/Write view of the complete backing memory of heapBuf (direct or not).
// Any R/W action from heapMem will be visible via heapBuf and visa versa.
//
// However, if you had created this NM object directly in raw, off-heap "native" memory
// you would have the responsibility to clear it, and free it to the OS when you
// are done. But, since it was allocated via BB, it does the clearing and freeing for you.
NativeMemory heapMem = new NativeMemory(heapBuf);
double result = directUnionTrial1(heapMem, heapLayout, sketchNomEntries, unionNomEntries);
println("1st est: "+result);
int expected = sketchNomEntries*2;
assertEquals(result, expected, 0.0); //est must be exact.
//For trial 2, we will use the same union space but use only part of it.
unionNomEntries = 1 << 14; //16K
result = directUnionTrial2(heapMem, heapLayout, sketchNomEntries, unionNomEntries);
//intentially loose bounds
assertEquals(result, expected, expected*0.05);
println("2nd est: "+result);
println("Error %: "+((result/expected -1.0)*100));
//We are done with this NativeMemory. It is a good habit to free it, even though,
// in this specific case, the BB backing memory is managed by the JVM, so it is
// not strictly necessary.
//Forgetting to do this could lead to segment faults, which are very difficult to troubleshoot.
heapMem.freeMemory();
}
/**
* Compute offsets for MyHeap for Union, sketch1, sketch2, sketch3, resultSketch, total layout.
* @return array of offsets for Union, sketch1, sketch2, sketch3, resultSketch, total layout
*/
private static int[] getHeapLayout(int sketchNomEntries, int unionNomEntries) {
int[] heapLayout = new int[6];
int unionBytes = SetOperation.getMaxUnionBytes(unionNomEntries);
int sketchBytes = getMaxUpdateSketchBytes(sketchNomEntries);
int resultBytes = Sketch.getMaxCompactSketchBytes(unionNomEntries);
heapLayout[0] = 0; //offset for Union
heapLayout[1] = unionBytes; //offset for sketch1
heapLayout[2] = unionBytes + sketchBytes; //offset for sketch2
heapLayout[3] = unionBytes + 2*sketchBytes; //offset for sketch3
heapLayout[4] = unionBytes + 3*sketchBytes; //offset for result
heapLayout[5] = unionBytes + 3*sketchBytes + resultBytes; //total
return heapLayout;
}
private static double directUnionTrial1(
Memory heapMem, int[] heapLayout, int sketchNomEntries, int unionNomEntries) {
int offset = heapLayout[0];
int bytes = heapLayout[1] - offset;
Memory unionMem = new MemoryRegion(heapMem, offset, bytes);
Union union = SetOperation.builder().initMemory(unionMem).buildUnion(unionNomEntries);
Memory sketch1mem = new MemoryRegion(heapMem, heapLayout[1], heapLayout[2]-heapLayout[1]);
Memory sketch2mem = new MemoryRegion(heapMem, heapLayout[2], heapLayout[3]-heapLayout[2]);
Memory sketch3mem = new MemoryRegion(heapMem, heapLayout[3], heapLayout[4]-heapLayout[3]);
Memory resultMem = new MemoryRegion(heapMem, heapLayout[4], heapLayout[5]-heapLayout[4]);
//Initialize the 3 sketches
UpdateSketch sk1 = UpdateSketch.builder().initMemory(sketch1mem).build(sketchNomEntries);
UpdateSketch sk2 = UpdateSketch.builder().initMemory(sketch2mem).build(sketchNomEntries);
UpdateSketch sk3 = UpdateSketch.builder().initMemory(sketch3mem).build(sketchNomEntries);
//This little trial has sk1 and sk2 distinct and sk2 overlap both.
//Build the sketches.
for (int i=0; i< sketchNomEntries; i++) {
sk1.update(i);
sk2.update(i + sketchNomEntries/2);
sk3.update(i + sketchNomEntries);
}
//confirm that each of these 3 sketches is exact.
assertEquals(sk1.getEstimate(), sketchNomEntries, 0.0);
assertEquals(sk2.getEstimate(), sketchNomEntries, 0.0);
assertEquals(sk3.getEstimate(), sketchNomEntries, 0.0);
//Let's union the first 2 sketches
union.update(sk1);
union.update(sk2);
//Let's recover the union and the 3rd sketch
union = Sketches.wrapUnion(unionMem);
sk3 = (UpdateSketch) Sketch.wrap(sketch3mem);
union.update(sk3);
Sketch resSk = union.getResult(true, resultMem);
double est = resSk.getEstimate();
return est;
}
private static double directUnionTrial2(
Memory heapMem, int[] heapLayout, int sketchNomEntries, int unionNomEntries) {
Memory unionMem = new MemoryRegion(heapMem, heapLayout[0], heapLayout[1]-heapLayout[0]);
Memory sketch1mem = new MemoryRegion(heapMem, heapLayout[1], heapLayout[2]-heapLayout[1]);
Memory sketch2mem = new MemoryRegion(heapMem, heapLayout[2], heapLayout[3]-heapLayout[2]);
Memory sketch3mem = new MemoryRegion(heapMem, heapLayout[3], heapLayout[4]-heapLayout[3]);
Memory resultMem = new MemoryRegion(heapMem, heapLayout[4], heapLayout[5]-heapLayout[4]);
//Recover the 3 sketches
UpdateSketch sk1 = (UpdateSketch) Sketch.wrap(sketch1mem);
UpdateSketch sk2 = (UpdateSketch) Sketch.wrap(sketch2mem);
UpdateSketch sk3 = (UpdateSketch) Sketch.wrap(sketch3mem);
//confirm that each of these 3 sketches is exact.
assertEquals(sk1.getEstimate(), sketchNomEntries, 0.0);
assertEquals(sk2.getEstimate(), sketchNomEntries, 0.0);
assertEquals(sk3.getEstimate(), sketchNomEntries, 0.0);
//Create a new union in the same space with a smaller size.
unionMem.clear();
Union union = SetOperation.builder().initMemory(unionMem).buildUnion(unionNomEntries);
union.update(sk1);
union.update(sk2);
union.update(sk3);
Sketch resSk = union.getResult(true, resultMem);
double est = resSk.getEstimate();
return est;
}
@Test
public void checkValidSetOpID() {
assertFalse(SetOperation.isValidSetOpID(0));
assertTrue(SetOperation.isValidSetOpID(UNION.getID()));
assertTrue(SetOperation.isValidSetOpID(INTERSECTION.getID()));
assertTrue(SetOperation.isValidSetOpID(A_NOT_B.getID()));
}
@Test
public void setOpsExample() {
println("Set Operations Example:");
int k = 4096;
UpdateSketch skA = Sketches.updateSketchBuilder().build(k);
UpdateSketch skB = Sketches.updateSketchBuilder().build(k);
UpdateSketch skC = Sketches.updateSketchBuilder().build(k);
for (int i=1; i<=10; i++) { skA.update(i); }
for (int i=1; i<=20; i++) { skB.update(i); }
for (int i=6; i<=15; i++) { skC.update(i); } //overlapping set
Union union = Sketches.setOperationBuilder().buildUnion(k);
union.update(skA);
union.update(skB);
// ... continue to iterate on the input sketches to union
CompactSketch unionSk = union.getResult(); //the result union sketch
println("A U B : "+unionSk.getEstimate()); //the estimate of the union
//Intersection is similar
Intersection inter = Sketches.setOperationBuilder().buildIntersection();
inter.update(unionSk);
inter.update(skC);
// ... continue to iterate on the input sketches to intersect
CompactSketch interSk = inter.getResult(); //the result intersection sketch
println("(A U B) ^ C: "+interSk.getEstimate()); //the estimate of the intersection
//The AnotB operation is a little different as it is stateless:
AnotB aNotB = Sketches.setOperationBuilder().buildANotB();
aNotB.update(skA, skC);
CompactSketch not = aNotB.getResult();
println("A \\ C : "+not.getEstimate()); //the estimate of the AnotB operation
}
@Test
public void printlnTest() {
println("PRINTING: "+this.getClass().getName());
}
/**
* @param s value to print
*/
static void println(String s) {
//System.out.println(s); //disable here
}
}