blob: e00c216aec6fb6d2a1ba0ecaadc77f44f477868e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.sampling;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNull;
import static org.testng.Assert.assertTrue;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Random;
import org.testng.annotations.Test;
public class EbppsSampleTest {
private static final double EPS = 1e-14;
@Test
public void basicInitialization() {
EbppsItemsSample<Integer> sample = new EbppsItemsSample<>(0);
assertEquals(sample.getC(), 0.0);
assertEquals(sample.getNumRetainedItems(), 0);
assertNull(sample.getSample());
}
@Test
public void initializeWithData() {
final double theta1 = 1.0;
EbppsItemsSample<Integer> sample = new EbppsItemsSample<>(1);
sample.replaceContent(-1, theta1);
assertEquals(sample.getC(), theta1);
assertEquals(sample.getNumRetainedItems(), 1);
assertEquals(sample.getSample().size(), 1);
assertEquals(sample.getSample().get(0), -1);
assertFalse(sample.hasPartialItem());
final double theta2 = 1e-300;
sample.replaceContent(-2, theta2);
assertEquals(sample.getC(), theta2);
assertEquals(sample.getNumRetainedItems(), 1);
// next check assumes random number is > 1e-300
assertNull(sample.getSample());
assertTrue(sample.hasPartialItem());
}
@Test
public void downsampleToZeroOrOneItem() {
EbppsItemsSample<String> sample = new EbppsItemsSample<>(1);
sample.replaceContent("a", 1.0);
sample.downsample(2.0); // no-op
assertEquals(sample.getC(), 1.0);
assertEquals(sample.getNumRetainedItems(), 1);
assertEquals(sample.getSample().get(0), "a");
assertFalse(sample.hasPartialItem());
// downsample and result in an empty sample
ArrayList<String> items = new ArrayList<>(Arrays.asList("a", "b"));
sample = new EbppsItemsSample<>(items, null, 1.8);
sample.replaceRandom(new Random(85942));
sample.downsample(0.5);
assertEquals(sample.getC(), 0.9);
assertEquals(sample.getNumRetainedItems(), 0);
assertNull(sample.getSample());
assertFalse(sample.hasPartialItem());
// downsample and result in a sample with a partial item
// create a new ArrayList each time to be sure it's clean
items = new ArrayList<>(Arrays.asList("a", "b"));
sample = new EbppsItemsSample<>(items, null, 1.5);
sample.replaceRandom(new Random(15));
sample.downsample(0.5);
assertEquals(sample.getC(), 0.75);
assertEquals(sample.getNumRetainedItems(), 1);
assertTrue(sample.hasPartialItem());
for (String s : sample.getSample()) {
assertTrue("a".equals(s) || "b".equals(s));
}
}
@Test
public void downsampleMultipleItems() {
// downsample to an exact integer c (7.5 * 0.8 = 6.0)
ArrayList<String> items = new ArrayList<>(Arrays.asList("a", "b", "c", "d", "e", "f", "g"));
String partial = "h";
ArrayList<String> referenceItems = new ArrayList<>(items); // copy of inputs
referenceItems.add("h"); // include the partial item
EbppsItemsSample<String> sample = new EbppsItemsSample<>(items, partial, 7.5);
sample.downsample(0.8);
assertEquals(sample.getC(), 6.0);
assertEquals(sample.getNumRetainedItems(), 6);
assertFalse(sample.hasPartialItem());
for (String s : sample.getSample()) {
assertTrue(referenceItems.contains(s));
}
// downsample to c > 1 with partial item
items = new ArrayList<>(referenceItems); // includes previous optional
partial = "i";
referenceItems.add("i");
sample = new EbppsItemsSample<>(items, partial, 8.5);
sample.downsample(0.8);
assertEquals(sample.getC(), 6.8, EPS);
assertEquals(sample.getNumRetainedItems(), 7);
assertTrue(sample.hasPartialItem());
for (String s : sample.getSample()) {
assertTrue(referenceItems.contains(s));
}
}
@Test
public void mergeUnitSamples() {
int k = 8;
EbppsItemsSample<Integer> sample = new EbppsItemsSample<>(k);
EbppsItemsSample<Integer> s = new EbppsItemsSample<>(1);
for (int i = 1; i <= k; ++i) {
s.replaceContent(i, 1.0);
sample.merge(s);
assertEquals(sample.getC(), (double) i);
assertEquals(sample.getNumRetainedItems(), i);
}
sample.reset();
assertEquals(sample.getC(), 0.0);
assertEquals(sample.getNumRetainedItems(), 0);
assertFalse(sample.hasPartialItem());
}
}