| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.datasketches.quantiles; |
| |
| import static org.apache.datasketches.common.TestUtil.CHECK_CPP_FILES; |
| import static org.apache.datasketches.common.TestUtil.CHECK_CPP_HISTORICAL_FILES; |
| import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES; |
| import static org.apache.datasketches.common.TestUtil.cppPath; |
| import static org.apache.datasketches.common.TestUtil.javaPath; |
| import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE; |
| import static org.testng.Assert.assertEquals; |
| import static org.testng.Assert.assertTrue; |
| |
| import java.io.IOException; |
| import java.nio.file.Files; |
| import java.util.Comparator; |
| |
| import org.apache.datasketches.common.ArrayOfStringsSerDe; |
| import org.apache.datasketches.common.TestUtil; |
| import org.apache.datasketches.memory.Memory; |
| import org.apache.datasketches.quantilescommon.QuantilesDoublesSketchIterator; |
| import org.apache.datasketches.quantilescommon.QuantilesGenericSketchIterator; |
| import org.testng.Assert; |
| import org.testng.annotations.Test; |
| |
| /** |
| * Serialize binary sketches to be tested by C++ code. |
| * Test deserialization of binary sketches serialized by C++ code. |
| */ |
| public class QuantilesSketchCrossLanguageTest { |
| private static final String LS = System.getProperty("line.separator"); |
| |
| @Test(groups = {GENERATE_JAVA_FILES}) |
| public void generateDoublesSketch() throws IOException { |
| final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000}; |
| for (final int n: nArr) { |
| final UpdateDoublesSketch sk = DoublesSketch.builder().build(); |
| for (int i = 1; i <= n; i++) sk.update(i); |
| Files.newOutputStream(javaPath.resolve("quantiles_double_n" + n + "_java.sk")).write(sk.toByteArray()); |
| } |
| } |
| |
| @Test(groups = {GENERATE_JAVA_FILES}) |
| public void generateItemsSketchWithStrings() throws IOException { |
| final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000}; |
| for (final int n: nArr) { |
| final ItemsSketch<String> sk = ItemsSketch.getInstance(String.class, new Comparator<String>() { |
| @Override |
| public int compare(final String s1, final String s2) { |
| try { |
| final int i1 = Integer.parseInt(s1); |
| final int i2 = Integer.parseInt(s2); |
| return Integer.compare(i1,i2); |
| } catch (NumberFormatException e) { |
| throw new RuntimeException(e); |
| } |
| } |
| }); |
| for (int i = 1; i <= n; i++) sk.update(Integer.toString(i)); |
| if (n > 0) { |
| assertEquals(sk.getMinItem(), "1"); |
| assertEquals(sk.getMaxItem(), Integer.toString(n)); |
| } |
| Files.newOutputStream(javaPath.resolve("quantiles_string_n" + n + "_java.sk")) |
| .write(sk.toByteArray(new ArrayOfStringsSerDe())); |
| } |
| } |
| |
| @Test(groups = {CHECK_CPP_FILES}) |
| public void checkDoublesSketch() throws IOException { |
| final int[] nArr = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; |
| for (int n: nArr) { |
| final byte[] byteArr = Files.readAllBytes(cppPath.resolve("quantiles_double_n" + n + "_cpp.sk")); |
| final DoublesSketch sk = DoublesSketch.wrap(Memory.wrap(byteArr)); |
| assertTrue(n == 0 ? sk.isEmpty() : !sk.isEmpty()); |
| assertTrue(n > 128 ? sk.isEstimationMode() : !sk.isEstimationMode()); |
| assertEquals(sk.getN(), n); |
| if (n > 0) { |
| assertEquals(sk.getMinItem(), 1); |
| assertEquals(sk.getMaxItem(), n); |
| QuantilesDoublesSketchIterator it = sk.iterator(); |
| long weight = 0; |
| while(it.next()) { |
| assertTrue(it.getQuantile() >= sk.getMinItem()); |
| assertTrue(it.getQuantile() <= sk.getMaxItem()); |
| weight += it.getWeight(); |
| } |
| assertEquals(weight, n); |
| } |
| } |
| } |
| |
| @Test(groups = {CHECK_CPP_FILES}) |
| public void checkItemsSketchWithStrings() throws IOException { |
| // sketch contains numbers in strings to make meaningful assertions |
| Comparator<String> numericOrder = new Comparator<String>() { |
| @Override |
| public int compare(final String s1, final String s2) { |
| try { |
| final int i1 = Integer.parseInt(s1); |
| final int i2 = Integer.parseInt(s2); |
| return Integer.compare(i1, i2); |
| } catch (NumberFormatException e) { |
| throw new RuntimeException(e); |
| } |
| } |
| }; |
| final int[] nArr = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; |
| for (int n: nArr) { |
| final byte[] byteArr = Files.readAllBytes(cppPath.resolve("quantiles_string_n" + n + "_cpp.sk")); |
| final ItemsSketch<String> sk = ItemsSketch.getInstance( |
| String.class, |
| Memory.wrap(byteArr), |
| numericOrder, |
| new ArrayOfStringsSerDe() |
| ); |
| assertTrue(n == 0 ? sk.isEmpty() : !sk.isEmpty()); |
| assertTrue(n > 128 ? sk.isEstimationMode() : !sk.isEstimationMode()); |
| assertEquals(sk.getN(), n); |
| if (n > 0) { |
| assertEquals(sk.getMinItem(), "1"); |
| assertEquals(sk.getMaxItem(), Integer.toString(n)); |
| QuantilesGenericSketchIterator<String> it = sk.iterator(); |
| long weight = 0; |
| while(it.next()) { |
| assertTrue(numericOrder.compare(it.getQuantile(), sk.getMinItem()) >= 0); |
| assertTrue(numericOrder.compare(it.getQuantile(), sk.getMaxItem()) <= 0); |
| weight += it.getWeight(); |
| } |
| assertEquals(weight, n); |
| } |
| } |
| } |
| |
| @Test(groups = {CHECK_CPP_HISTORICAL_FILES}) |
| //fullPath: sketches/src/test/resources/Qk128_n50_v0.3.0.sk |
| //Median2: 26.0 |
| public void check030_50() { |
| int n = 50; |
| String ver = "0.3.0"; |
| double expected = 26; |
| getAndCheck(ver, n, expected); |
| } |
| |
| @Test(groups = {CHECK_CPP_HISTORICAL_FILES}) |
| //fullPath: sketches/src/test/resources/Qk128_n1000_v0.3.0.sk |
| //Median2: 501.0 |
| public void check030_1000() { |
| int n = 1000; |
| String ver = "0.3.0"; |
| double expected = 501; |
| getAndCheck(ver, n, expected); |
| } |
| |
| @Test(groups = {CHECK_CPP_HISTORICAL_FILES}) |
| //fullPath: sketches/src/test/resources/Qk128_n50_v0.6.0.sk |
| //Median2: 26.0 |
| public void check060_50() { |
| int n = 50; |
| String ver = "0.6.0"; |
| double expected = 26; |
| getAndCheck(ver, n, expected); |
| } |
| |
| @Test(groups = {CHECK_CPP_HISTORICAL_FILES}) |
| //fullPath: sketches/src/test/resources/Qk128_n1000_v0.6.0.sk |
| //Median2: 501.0 |
| public void check060_1000() { |
| int n = 1000; |
| String ver = "0.6.0"; |
| double expected = 501; |
| getAndCheck(ver, n, expected); |
| } |
| |
| @Test(groups = {CHECK_CPP_HISTORICAL_FILES}) |
| //fullPath: sketches/src/test/resources/Qk128_n50_v0.8.0.sk |
| //Median2: 26.0 |
| public void check080_50() { |
| int n = 50; |
| String ver = "0.8.0"; |
| double expected = 26; |
| getAndCheck(ver, n, expected); |
| } |
| |
| @Test(groups = {CHECK_CPP_HISTORICAL_FILES}) |
| //fullPath: sketches/src/test/resources/Qk128_n1000_v0.8.0.sk |
| //Median2: 501.0 |
| public void check080_1000() { |
| int n = 1000; |
| String ver = "0.8.0"; |
| double expected = 501; |
| getAndCheck(ver, n, expected); |
| } |
| |
| @Test(groups = {CHECK_CPP_HISTORICAL_FILES}) |
| //fullPath: sketches/src/test/resources/Qk128_n50_v0.8.3.sk |
| //Median2: 26.0 |
| public void check083_50() { |
| int n = 50; |
| String ver = "0.8.3"; |
| double expected = 26; |
| getAndCheck(ver, n, expected); |
| } |
| |
| @Test(groups = {CHECK_CPP_HISTORICAL_FILES}) |
| //fullPath: sketches/src/test/resources/Qk128_n1000_v0.8.0.sk |
| //Median2: 501.0 |
| public void check083_1000() { |
| int n = 1000; |
| String ver = "0.8.3"; |
| double expected = 501; |
| getAndCheck(ver, n, expected); |
| } |
| |
| private static void getAndCheck(String ver, int n, double quantile) { |
| DoublesSketch.rand.setSeed(131); //make deterministic |
| //create fileName |
| int k = 128; |
| double nf = 0.5; |
| String fileName = String.format("Qk%d_n%d_v%s.sk", k, n, ver); |
| println("fullName: "+ fileName); |
| println("Old Median: " + quantile); |
| //Read File bytes |
| byte[] byteArr = TestUtil.getResourceBytes(fileName); |
| Memory srcMem = Memory.wrap(byteArr); |
| |
| // heapify as update sketch |
| DoublesSketch qs2 = UpdateDoublesSketch.heapify(srcMem); |
| //Test the quantile |
| double q2 = qs2.getQuantile(nf, EXCLUSIVE); |
| println("New Median: " + q2); |
| Assert.assertEquals(q2, quantile, 0.0); |
| |
| // same thing with compact sketch |
| qs2 = CompactDoublesSketch.heapify(srcMem); |
| //Test the quantile |
| q2 = qs2.getQuantile(nf, EXCLUSIVE); |
| println("New Median: " + q2); |
| Assert.assertEquals(q2, quantile, 0.0); |
| } |
| |
| @Test |
| public void printlnTest() { |
| println("PRINTING: "+this.getClass().getName()); |
| } |
| |
| static void println(final Object o) { |
| if (o == null) { print(LS); } |
| else { print(o.toString() + LS); } |
| } |
| |
| /** |
| * @param o value to print |
| */ |
| static void print(final Object o) { |
| if (o != null) { |
| //System.out.print(o.toString()); //disable here |
| } |
| } |
| |
| } |