| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.datasketches.sampling; |
| |
| import static org.testng.Assert.assertEquals; |
| import static org.testng.Assert.assertFalse; |
| import static org.testng.Assert.assertNull; |
| import static org.testng.Assert.assertTrue; |
| |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Random; |
| |
| import org.testng.annotations.Test; |
| |
| public class EbppsSampleTest { |
| private static final double EPS = 1e-14; |
| |
| @Test |
| public void basicInitialization() { |
| EbppsItemsSample<Integer> sample = new EbppsItemsSample<>(0); |
| assertEquals(sample.getC(), 0.0); |
| assertEquals(sample.getNumRetainedItems(), 0); |
| assertNull(sample.getSample()); |
| } |
| |
| @Test |
| public void initializeWithData() { |
| final double theta1 = 1.0; |
| EbppsItemsSample<Integer> sample = new EbppsItemsSample<>(1); |
| sample.replaceContent(-1, theta1); |
| assertEquals(sample.getC(), theta1); |
| assertEquals(sample.getNumRetainedItems(), 1); |
| assertEquals(sample.getSample().size(), 1); |
| assertEquals(sample.getSample().get(0), -1); |
| assertFalse(sample.hasPartialItem()); |
| |
| final double theta2 = 1e-300; |
| sample.replaceContent(-2, theta2); |
| assertEquals(sample.getC(), theta2); |
| assertEquals(sample.getNumRetainedItems(), 1); |
| // next check assumes random number is > 1e-300 |
| assertNull(sample.getSample()); |
| assertTrue(sample.hasPartialItem()); |
| } |
| |
| @Test |
| public void downsampleToZeroOrOneItem() { |
| EbppsItemsSample<String> sample = new EbppsItemsSample<>(1); |
| sample.replaceContent("a", 1.0); |
| |
| sample.downsample(2.0); // no-op |
| assertEquals(sample.getC(), 1.0); |
| assertEquals(sample.getNumRetainedItems(), 1); |
| assertEquals(sample.getSample().get(0), "a"); |
| assertFalse(sample.hasPartialItem()); |
| |
| // downsample and result in an empty sample |
| ArrayList<String> items = new ArrayList<>(Arrays.asList("a", "b")); |
| sample = new EbppsItemsSample<>(items, null, 1.8); |
| sample.replaceRandom(new Random(85942)); |
| sample.downsample(0.5); |
| assertEquals(sample.getC(), 0.9); |
| assertEquals(sample.getNumRetainedItems(), 0); |
| assertNull(sample.getSample()); |
| assertFalse(sample.hasPartialItem()); |
| |
| // downsample and result in a sample with a partial item |
| // create a new ArrayList each time to be sure it's clean |
| items = new ArrayList<>(Arrays.asList("a", "b")); |
| sample = new EbppsItemsSample<>(items, null, 1.5); |
| sample.replaceRandom(new Random(15)); |
| sample.downsample(0.5); |
| assertEquals(sample.getC(), 0.75); |
| assertEquals(sample.getNumRetainedItems(), 1); |
| assertTrue(sample.hasPartialItem()); |
| for (String s : sample.getSample()) { |
| assertTrue("a".equals(s) || "b".equals(s)); |
| } |
| } |
| |
| @Test |
| public void downsampleMultipleItems() { |
| // downsample to an exact integer c (7.5 * 0.8 = 6.0) |
| ArrayList<String> items = new ArrayList<>(Arrays.asList("a", "b", "c", "d", "e", "f", "g")); |
| String partial = "h"; |
| ArrayList<String> referenceItems = new ArrayList<>(items); // copy of inputs |
| referenceItems.add("h"); // include the partial item |
| |
| EbppsItemsSample<String> sample = new EbppsItemsSample<>(items, partial, 7.5); |
| sample.downsample(0.8); |
| assertEquals(sample.getC(), 6.0); |
| assertEquals(sample.getNumRetainedItems(), 6); |
| assertFalse(sample.hasPartialItem()); |
| for (String s : sample.getSample()) { |
| assertTrue(referenceItems.contains(s)); |
| } |
| |
| // downsample to c > 1 with partial item |
| items = new ArrayList<>(referenceItems); // includes previous optional |
| partial = "i"; |
| referenceItems.add("i"); |
| sample = new EbppsItemsSample<>(items, partial, 8.5); |
| sample.downsample(0.8); |
| assertEquals(sample.getC(), 6.8, EPS); |
| assertEquals(sample.getNumRetainedItems(), 7); |
| assertTrue(sample.hasPartialItem()); |
| for (String s : sample.getSample()) { |
| assertTrue(referenceItems.contains(s)); |
| } |
| } |
| |
| @Test |
| public void mergeUnitSamples() { |
| int k = 8; |
| EbppsItemsSample<Integer> sample = new EbppsItemsSample<>(k); |
| EbppsItemsSample<Integer> s = new EbppsItemsSample<>(1); |
| |
| for (int i = 1; i <= k; ++i) { |
| s.replaceContent(i, 1.0); |
| sample.merge(s); |
| assertEquals(sample.getC(), (double) i); |
| assertEquals(sample.getNumRetainedItems(), i); |
| } |
| |
| sample.reset(); |
| assertEquals(sample.getC(), 0.0); |
| assertEquals(sample.getNumRetainedItems(), 0); |
| assertFalse(sample.hasPartialItem()); |
| } |
| } |