| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.solr.util.hll; |
| |
| import static com.carrotsearch.randomizedtesting.RandomizedTest.*; |
| import static org.apache.solr.util.hll.ProbabilisticTestUtil.*; |
| |
| import java.io.IOException; |
| import java.io.Writer; |
| import java.nio.charset.StandardCharsets; |
| import java.nio.file.Files; |
| import java.nio.file.Paths; |
| import java.util.Random; |
| |
| /** |
| * Generates test files for testing other implementations of HLL |
| * serialization/deserialization, namely the PostgreSQL implementation. |
| */ |
| public class IntegrationTestGenerator { |
| // ************************************************************************ |
| // directory to output the generated tests |
| private static final String OUTPUT_DIRECTORY = "/tmp/hll_test/"; |
| |
| // ------------------------------------------------------------------------ |
| // configurations for HLLs, should mirror settings in PostgreSQL impl. tests |
| private static final int REGWIDTH = 5; |
| private static final int LOG2M = 11; |
| // NOTE: This differs from the PostgreSQL impl. parameter 'expthresh'. This |
| // is a literal threshold to use in the promotion hierarchy, implying |
| // that both EXPLICIT representation should be used and it should |
| // NOT be automatically computed. This is done to ensure that the |
| // parameters of the test are very explicitly defined. |
| private static final int EXPLICIT_THRESHOLD = 256; |
| // NOTE: This is not the PostgreSQL impl. parameter 'sparseon'. 'sparseon' |
| // is assumed to be true and this is a literal register-count threshold |
| // to use in the promotion hierarchy. This is done to ensure that the |
| // parameters of the test are very explicitly defined. |
| private static final int SPARSE_THRESHOLD = 850; |
| |
| // ------------------------------------------------------------------------ |
| // computed constants |
| private static final int REGISTER_COUNT = (1 << LOG2M); |
| private static final int REGISTER_MAX_VALUE = (1 << REGWIDTH) - 1; |
| |
| // ======================================================================== |
| // Tests |
| /** |
| * Cumulatively adds random values to a FULL HLL through the small range |
| * correction, uncorrected range, and large range correction of the HLL's |
| * cardinality estimator. |
| * |
| * Format: cumulative add |
| * Tests: |
| * - FULL cardinality computation |
| */ |
| private static void fullCardinalityCorrectionTest(final ISchemaVersion schemaVersion) throws IOException { |
| final Writer output = openOutput(schemaVersion, "cardinality_correction", TestType.ADD); |
| |
| // the accumulator, starts empty |
| final HLL hll = newHLL(HLLType.FULL); |
| initLineAdd(output, hll, schemaVersion); |
| |
| // run through some values in the small range correction |
| for(int i=0; i<((1 << LOG2M) - 1); i++) { |
| final long rawValue = constructHLLValue(LOG2M, i, 1); |
| cumulativeAddLine(output, hll, rawValue, schemaVersion); |
| } |
| |
| // run up past some values in the uncorrected range |
| for(int i=0; i<(1 << LOG2M); i++) { |
| final long rawValue = constructHLLValue(LOG2M, i, 7); |
| cumulativeAddLine(output, hll, rawValue, schemaVersion); |
| } |
| |
| // run through some values in the large range correction |
| for(int i=0; i<(1 << LOG2M); i++) { |
| final long rawValue = constructHLLValue(LOG2M, i, 30); |
| cumulativeAddLine(output, hll, rawValue, schemaVersion); |
| } |
| |
| output.flush(); |
| output.close(); |
| } |
| |
| /** |
| * Cumulatively adds random values to an EMPTY HLL. |
| * |
| * Format: cumulative add |
| * Tests: |
| * - EMPTY, EXPLICIT, SPARSE, PROBABILSTIC addition |
| * - EMPTY to EXPLICIT promotion |
| * - EXPLICIT to SPARSE promotion |
| * - SPARSE to FULL promotion |
| */ |
| private static void globalStepTest(final ISchemaVersion schemaVersion) throws IOException { |
| final Writer output = openOutput(schemaVersion, "comprehensive_promotion", TestType.ADD); |
| |
| // the accumulator, starts empty |
| final HLL hll = newHLL(HLLType.EMPTY); |
| initLineAdd(output, hll, schemaVersion); |
| |
| for(int i=0; i<10000/*arbitrary*/; i++) { |
| cumulativeAddLine(output, hll, randomLong(), schemaVersion); |
| } |
| |
| output.flush(); |
| output.close(); |
| } |
| |
| /** |
| * Cumulatively unions "underpopulated" FULL HLLs into the |
| * accumulator to verify the correct behavior from the PostgreSQL implementation. |
| * The PostgreSQL implementation's representations of probabilistic HLLs should |
| * depend exclusively on the chosen SPARSE-to-FULL cutoff. |
| * |
| * Format: cumulative union |
| * Tests: |
| * - EMPTY U "underpopulated" FULL => SPARSE |
| * - SPARSE U "underpopulated" FULL => SPARSE |
| * - SPARSE U "barely underpopulated" FULL => FULL |
| */ |
| private static void sparseFullRepresentationTest(final ISchemaVersion schemaVersion) throws IOException { |
| final Writer output = openOutput(schemaVersion, "sparse_full_representation", TestType.UNION); |
| |
| final HLL emptyHLL1 = newHLL(HLLType.EMPTY); |
| final HLL emptyHLL2 = newHLL(HLLType.EMPTY); |
| |
| cumulativeUnionLine(output, emptyHLL1, emptyHLL2, schemaVersion); |
| |
| // NOTE: In this test the sparseReference will be the "expected" value |
| // from the C representation, since it doesn't choose representation |
| // based on original encoding, but rather on the promotion rules |
| // and the declared type of the "receiving" field. |
| // It is the manually-constructed union result. |
| |
| // "underpopulated" FULL U EMPTY => SPARSE |
| final HLL fullHLL = newHLL(HLLType.FULL); |
| fullHLL.addRaw(constructHLLValue(LOG2M, 0/*ix*/, 1/*val*/)); |
| |
| final HLL sparseHLL = newHLL(HLLType.SPARSE); |
| sparseHLL.addRaw(constructHLLValue(LOG2M, 0/*ix*/, 1/*val*/)); |
| |
| output.write(stringCardinality(fullHLL) + "," + toByteA(fullHLL, schemaVersion) + "," + stringCardinality(sparseHLL) + "," + toByteA(sparseHLL, schemaVersion) + "\n"); |
| output.flush(); |
| |
| // "underpopulated" FULL (small) U SPARSE (small) => SPARSE |
| final HLL fullHLL2 = newHLL(HLLType.FULL); |
| fullHLL2.addRaw(constructHLLValue(LOG2M, 1/*ix*/, 1/*val*/)); |
| |
| sparseHLL.addRaw(constructHLLValue(LOG2M, 1/*ix*/, 1/*val*/)); |
| |
| output.write(stringCardinality(fullHLL2) + "," + toByteA(fullHLL2, schemaVersion) + "," + stringCardinality(sparseHLL) + "," + toByteA(sparseHLL, schemaVersion) + "\n"); |
| output.flush(); |
| |
| // "underpopulated" FULL (just on edge) U SPARSE (small) => FULL |
| final HLL fullHLL3 = newHLL(HLLType.FULL); |
| for(int i=2; i<(SPARSE_THRESHOLD + 1); i++) { |
| fullHLL3.addRaw(constructHLLValue(LOG2M, i/*ix*/, 1/*val*/)); |
| sparseHLL.addRaw(constructHLLValue(LOG2M, i/*ix*/, 1/*val*/)); |
| } |
| |
| output.write(stringCardinality(fullHLL3) + "," + toByteA(fullHLL3, schemaVersion) + "," + stringCardinality(sparseHLL) + "," + toByteA(sparseHLL, schemaVersion) + "\n"); |
| output.flush(); |
| } |
| |
| /** |
| * Cumulatively sets successive registers to: |
| * |
| * <code>(registerIndex % REGISTER_MAX_VALUE) + 1</code> |
| * |
| * by adding specifically constructed values to a SPARSE HLL. |
| * Does not induce promotion. |
| * |
| * Format: cumulative add |
| * Tests: |
| * - SPARSE addition (predictable) |
| */ |
| private static void sparseStepTest(final ISchemaVersion schemaVersion) throws IOException { |
| final Writer output = openOutput(schemaVersion, "sparse_step", TestType.ADD); |
| |
| // the accumulator, starts empty sparse probabilistic |
| final HLL hll = newHLL(HLLType.SPARSE); |
| initLineAdd(output, hll, schemaVersion); |
| |
| for(int i=0; i<SPARSE_THRESHOLD; i++) { |
| final long rawValue = constructHLLValue(LOG2M, i, ((i % REGISTER_MAX_VALUE) + 1)); |
| cumulativeAddLine(output, hll, rawValue, schemaVersion); |
| } |
| |
| output.flush(); |
| output.close(); |
| } |
| |
| /** |
| * Cumulatively sets random registers of a SPARSE HLL to |
| * random values by adding random values. Does not induce promotion. |
| * |
| * Format: cumulative add |
| * Tests: |
| * - SPARSE addition (random) |
| */ |
| private static void sparseRandomTest(final ISchemaVersion schemaVersion) throws IOException { |
| final Writer output = openOutput(schemaVersion, "sparse_random", TestType.ADD); |
| |
| final Random random = new Random(randomLong()); |
| |
| // the accumulator, starts empty |
| final HLL hll = newHLL(HLLType.SPARSE); |
| initLineAdd(output, hll, schemaVersion); |
| |
| for(int i=0; i<SPARSE_THRESHOLD; i++) { |
| final int registerIndex = Math.abs(random.nextInt()) % REGISTER_COUNT; |
| final int registerValue = ((Math.abs(random.nextInt()) % REGISTER_MAX_VALUE) + 1); |
| final long rawValue = constructHLLValue(LOG2M, registerIndex, registerValue); |
| |
| cumulativeAddLine(output, hll, rawValue, schemaVersion); |
| } |
| |
| output.flush(); |
| output.close(); |
| } |
| |
| /** |
| * Cumulatively sets the first register (index 0) to value 2, the last |
| * register (index m-1) to value 2, and then sets registers with indices in |
| * the range 2 to (sparseCutoff + 2) to value 1 to trigger promotion. |
| * |
| * This tests for register alignment in the promotion from SPARSE |
| * to FULL. |
| * |
| * Format: cumulative add |
| * Tests: |
| * - SPARSE addition |
| * - SPARSE to FULL promotion |
| */ |
| private static void sparseEdgeTest(final ISchemaVersion schemaVersion) throws IOException { |
| final Writer output = openOutput(schemaVersion, "sparse_edge", TestType.ADD); |
| |
| // the accumulator, starts empty |
| final HLL hll = newHLL(HLLType.SPARSE); |
| initLineAdd(output, hll, schemaVersion); |
| |
| final long firstValue = constructHLLValue(LOG2M, 0, 2); |
| cumulativeAddLine(output, hll, firstValue, schemaVersion); |
| |
| final long lastValue = constructHLLValue(LOG2M, (1 << LOG2M) - 1, 2); |
| cumulativeAddLine(output, hll, lastValue, schemaVersion); |
| |
| for(int i=2; i<(SPARSE_THRESHOLD + 2); i++) { |
| final long middleValue = constructHLLValue(LOG2M, i, 1); |
| |
| cumulativeAddLine(output, hll, middleValue, schemaVersion); |
| } |
| |
| output.flush(); |
| output.close(); |
| } |
| |
| /** |
| * Unions an EMPTY accumulator with EXPLICIT HLLs, each containing a |
| * single random value. |
| * |
| * Format: cumulative union |
| * Tests: |
| * - EMPTY U EXPLICIT |
| * - EXPLICIT U EXPLICIT |
| * - EXPLICIT to SPARSE promotion |
| * - SPARSE U EXPLICIT |
| */ |
| private static void explicitPromotionTest(final ISchemaVersion schemaVersion) throws IOException { |
| final Writer output = openOutput(schemaVersion, "explicit_promotion", TestType.UNION); |
| |
| final Random random = new Random(randomLong()); |
| |
| // the accumulator, starts empty |
| final HLL hll = newHLL(HLLType.EMPTY); |
| final HLL emptyHLL = newHLL(HLLType.EMPTY); |
| cumulativeUnionLine(output, hll, emptyHLL, schemaVersion); |
| |
| for(int i=0; i<(EXPLICIT_THRESHOLD+500)/*should be greater than promotion cutoff*/; i++) { |
| // make an EXPLICIT set and populate with cardinality 1 |
| final HLL explicitHLL = newHLL(HLLType.EXPLICIT); |
| explicitHLL.addRaw(random.nextLong()); |
| |
| cumulativeUnionLine(output, hll, explicitHLL, schemaVersion); |
| } |
| |
| output.flush(); |
| output.close(); |
| } |
| |
| /** |
| * Unions an EMPTY accumulator with SPARSE HLLs, each |
| * having one register set. |
| * |
| * Format: cumulative union |
| * Tests: |
| * - EMPTY U SPARSE |
| * - SPARSE U SPARSE |
| * - SPARSE promotion |
| * - SPARSE U FULL |
| */ |
| private static void sparseProbabilisticPromotionTest(final ISchemaVersion schemaVersion) throws IOException { |
| final Writer output = openOutput(schemaVersion, "sparse_promotion", TestType.UNION); |
| |
| final Random random = new Random(randomLong()); |
| |
| // the accumulator, starts empty |
| final HLL hll = newHLL(HLLType.EMPTY); |
| final HLL emptyHLL = newHLL(HLLType.EMPTY); |
| cumulativeUnionLine(output, hll, emptyHLL, schemaVersion); |
| |
| |
| for(int i=0; i<(SPARSE_THRESHOLD + 1000)/*should be greater than promotion cutoff*/; i++) { |
| // make a SPARSE set and populate with cardinality 1 |
| final HLL sparseHLL = newHLL(HLLType.SPARSE); |
| |
| final int registerIndex = Math.abs(random.nextInt()) % REGISTER_COUNT; |
| final int registerValue = ((Math.abs(random.nextInt()) % REGISTER_MAX_VALUE) + 1); |
| final long rawValue = constructHLLValue(LOG2M, registerIndex, registerValue); |
| sparseHLL.addRaw(rawValue); |
| |
| cumulativeUnionLine(output, hll, sparseHLL, schemaVersion); |
| } |
| |
| output.flush(); |
| output.close(); |
| } |
| |
| /** |
| * Unions an EMPTY accumulator with EXPLICIT HLLs, each having a single |
| * random value, twice in a row to verify that the set properties are |
| * satisfied. |
| * |
| * Format: cumulative union |
| * Tests: |
| * - EMPTY U EXPLICIT |
| * - EXPLICIT U EXPLICIT |
| */ |
| private static void explicitOverlapTest(final ISchemaVersion schemaVersion) throws IOException { |
| final Writer output = openOutput(schemaVersion, "explicit_explicit", TestType.UNION); |
| |
| final Random random = new Random(randomLong()); |
| |
| // the accumulator, starts empty |
| final HLL hll = newHLL(HLLType.EMPTY); |
| final HLL emptyHLL = newHLL(HLLType.EMPTY); |
| |
| cumulativeUnionLine(output, hll, emptyHLL, schemaVersion); |
| |
| for(int i=0; i<EXPLICIT_THRESHOLD; i++) { |
| // make an EXPLICIT set and populate with cardinality 1 |
| final HLL explicitHLL = newHLL(HLLType.EXPLICIT); |
| explicitHLL.addRaw(random.nextLong()); |
| |
| // union it into the accumulator twice, to test overlap (cardinality should not change) |
| cumulativeUnionLine(output, hll, explicitHLL, schemaVersion); |
| cumulativeUnionLine(output, hll, explicitHLL, schemaVersion); |
| } |
| |
| output.flush(); |
| output.close(); |
| } |
| |
| /** |
| * Unions an EMPTY accumulator with SPARSE HLLs, each |
| * having a single register set, twice in a row to verify that the set |
| * properties are satisfied. |
| * |
| * Format: cumulative union |
| * Tests: |
| * - EMPTY U SPARSE |
| * - SPARSE U SPARSE |
| */ |
| private static void sparseProbabilisticOverlapTest(final ISchemaVersion schemaVersion) throws IOException { |
| final Writer output = openOutput(schemaVersion, "sparse_sparse", TestType.UNION); |
| |
| final Random random = new Random(randomLong()); |
| |
| // the accumulator, starts empty |
| final HLL hll = newHLL(HLLType.EMPTY); |
| final HLL emptyHLL = newHLL(HLLType.EMPTY); |
| |
| cumulativeUnionLine(output, hll, emptyHLL, schemaVersion); |
| |
| for(int i=0; i<SPARSE_THRESHOLD; i++) { |
| // make a SPARSE set and populate with cardinality 1 |
| final HLL sparseHLL = newHLL(HLLType.SPARSE); |
| final int registerIndex = Math.abs(random.nextInt()) % REGISTER_COUNT; |
| final int registerValue = ((Math.abs(random.nextInt()) % REGISTER_MAX_VALUE) + 1); |
| final long rawValue = constructHLLValue(LOG2M, registerIndex, registerValue); |
| sparseHLL.addRaw(rawValue); |
| |
| cumulativeUnionLine(output, hll, sparseHLL, schemaVersion); |
| } |
| |
| output.flush(); |
| output.close(); |
| } |
| |
| /** |
| * Unions an EMPTY accumulator with FULL HLLs, each having |
| * many registers set, twice in a row to verify that the set properties are |
| * satisfied. |
| * |
| * Format: cumulative union |
| * Tests: |
| * - EMPTY U FULL |
| * - FULL U FULL |
| */ |
| private static void probabilisticUnionTest(final ISchemaVersion schemaVersion) throws IOException { |
| final Writer output = openOutput(schemaVersion, "probabilistic_probabilistic", TestType.UNION); |
| |
| final Random random = new Random(randomLong()); |
| |
| // the accumulator, starts empty |
| final HLL hll = newHLL(HLLType.EMPTY); |
| final HLL emptyHLL = newHLL(HLLType.EMPTY); |
| cumulativeUnionLine(output, hll, emptyHLL, schemaVersion); |
| |
| for(int i=0; i<1000/*number of rows to generate*/; i++) { |
| // make a FULL set and populate with |
| final HLL fullHLL = newHLL(HLLType.FULL); |
| final int elementCount = random.nextInt(10000/*arbitrary maximum cardinality*/); |
| for(int j=0;j<elementCount;j++) { |
| fullHLL.addRaw(random.nextLong()); |
| } |
| |
| cumulativeUnionLine(output, hll, fullHLL, schemaVersion); |
| } |
| |
| output.flush(); |
| output.close(); |
| } |
| |
| /** |
| * Unions an EMPTY accumulator with random HLLs. |
| * |
| * Format: cumulative union |
| * Tests: |
| * - hopefully all union possibilities |
| */ |
| private static void globalUnionTest(final ISchemaVersion schemaVersion) throws IOException { |
| final Writer output = openOutput(schemaVersion, "comprehensive", TestType.UNION); |
| |
| // the accumulator, starts empty |
| final HLL hll = newHLL(HLLType.EMPTY); |
| final HLL emptyHLL = newHLL(HLLType.EMPTY); |
| |
| cumulativeUnionLine(output, hll, emptyHLL, schemaVersion); |
| |
| for(int i=0; i<1000/*number of rows to generate*/; i++) { |
| final HLL randomHLL = generateRandomHLL(); |
| cumulativeUnionLine(output, hll, randomHLL, schemaVersion); |
| } |
| |
| output.flush(); |
| output.close(); |
| } |
| |
| // ======================================================================== |
| // Main |
| public static void fullSuite(final ISchemaVersion schemaVersion) throws IOException { |
| fullCardinalityCorrectionTest(schemaVersion); |
| globalUnionTest(schemaVersion); |
| globalStepTest(schemaVersion); |
| probabilisticUnionTest(schemaVersion); |
| explicitPromotionTest(schemaVersion); |
| explicitOverlapTest(schemaVersion); |
| sparseFullRepresentationTest(schemaVersion); |
| sparseStepTest(schemaVersion); |
| sparseRandomTest(schemaVersion); |
| sparseEdgeTest(schemaVersion); |
| sparseProbabilisticPromotionTest(schemaVersion); |
| sparseProbabilisticOverlapTest(schemaVersion); |
| } |
| |
| public static void main(String[] args) throws IOException { |
| fullSuite(SerializationUtil.VERSION_ONE); |
| } |
| |
| // ************************************************************************ |
| // Helpers |
| /** |
| * Shortcut for testing constructor, which uses the constants defined at |
| * the top of the file as default parameters. |
| * |
| * @return a new {@link HLL} of specified type, which uses the parameters |
| * ({@link #LOG2M}, {@link #REGWIDTH}, {@link #EXPLICIT_THRESHOLD}, |
| * and {@link #SPARSE_THRESHOLD}) specified above. |
| */ |
| private static HLL newHLL(final HLLType type) { |
| return newHLL(type); |
| } |
| |
| /** |
| * Returns the algorithm-specific cardinality of the specified {@link HLL} |
| * as a {@link String} appropriate for comparison with the algorithm-specific |
| * cardinality provided by the PostgreSQL implementation. |
| * |
| * @param hll the HLL whose algorithm-specific cardinality is to be printed. |
| * This cannot be <code>null</code>. |
| * @return the algorithm-specific cardinality of the instance as a PostgreSQL- |
| * compatible String. This will never be <code>null</code> |
| */ |
| private static String stringCardinality(final HLL hll) { |
| switch(hll.getType()) { |
| case EMPTY: |
| return "0"; |
| case EXPLICIT:/*promotion has not yet occurred*/ |
| return Long.toString(hll.cardinality()); |
| case SPARSE: |
| return Double.toString(hll.sparseProbabilisticAlgorithmCardinality()); |
| case FULL: |
| return Double.toString(hll.fullProbabilisticAlgorithmCardinality()); |
| default: |
| throw new RuntimeException("Unknown HLL type " + hll.getType()); |
| } |
| } |
| |
| /** |
| * Generates a random HLL and populates it with random values. |
| * |
| * @return the populated HLL. This will never be <code>null</code>. |
| */ |
| public static HLL generateRandomHLL() { |
| final int randomTypeInt = randomIntBetween(0, HLLType.values().length - 1); |
| final HLLType type; |
| switch(randomTypeInt) { |
| case 0: |
| type = HLLType.EMPTY; |
| break; |
| case 1: |
| type = HLLType.EXPLICIT; |
| break; |
| case 2: |
| type = HLLType.FULL; |
| break; |
| case 3: |
| type = HLLType.EMPTY; |
| break; |
| case 4: |
| type = HLLType.SPARSE; |
| break; |
| default: |
| throw new RuntimeException("Unassigned type int " + randomTypeInt); |
| } |
| |
| final int cardinalityCap; |
| final int cardinalityBaseline; |
| |
| switch(type) { |
| case EMPTY: |
| return newHLL(HLLType.EMPTY); |
| case EXPLICIT: |
| cardinalityCap = EXPLICIT_THRESHOLD; |
| cardinalityBaseline = 1; |
| break; |
| case SPARSE: |
| cardinalityCap = SPARSE_THRESHOLD; |
| cardinalityBaseline = (EXPLICIT_THRESHOLD + 1); |
| break; |
| case FULL: |
| cardinalityCap = 100000; |
| cardinalityBaseline = (SPARSE_THRESHOLD*10); |
| break; |
| default: |
| throw new RuntimeException("We should never be here."); |
| } |
| |
| final HLL hll = newHLL(HLLType.EMPTY); |
| for(int i=0; i<cardinalityBaseline; i++) { |
| hll.addRaw(randomLong()); |
| } |
| for(int i=0; i<randomInt(cardinalityCap - cardinalityBaseline); i++) { |
| hll.addRaw(randomLong()); |
| } |
| |
| return hll; |
| } |
| |
| /** |
| * Opens a {@link Writer} and writes out an appropriate CSV header. |
| * |
| * @param schemaVersion Schema version of the output. This cannot be |
| * <code>null</code>. |
| * @param description Description string used to build the filename. |
| * This cannot be <code>null</code>. |
| * @param type {@link TestType type} of the test file to be written. |
| * This cannot be <code>null</code>. |
| * @return The opened {@link Writer writer}. This will never be <code>null</code>. |
| */ |
| private static Writer openOutput(final ISchemaVersion schemaVersion, final String description, final TestType type) throws IOException { |
| final String schemaVersionPrefix = "v"+ schemaVersion.schemaVersionNumber() + "_"; |
| final String header; |
| final String filename; |
| switch(type) { |
| case ADD: |
| header = "cardinality,raw_value,HLL\n"; |
| filename = schemaVersionPrefix + "cumulative_add_" + description + ".csv"; |
| break; |
| case UNION: |
| header = "cardinality,HLL,union_cardinality,union_HLL\n"; |
| filename = schemaVersionPrefix + "cumulative_union_" + description + ".csv"; |
| break; |
| default: |
| throw new RuntimeException("Unknown test type " + type); |
| } |
| |
| final Writer output = Files.newBufferedWriter( |
| Paths.get(OUTPUT_DIRECTORY, filename), StandardCharsets.UTF_8); |
| output.write(header); |
| output.flush(); |
| return output; |
| } |
| |
| /** |
| * Writes out a {@link TestType#ADD}-formatted test line. |
| * |
| * @param output The output {@link Writer writer}. This cannot be <code>null</code>. |
| * @param hll The "accumulator" HLL instance. This cannot be <code>null</code>. |
| * @param rawValue The raw value added to the HLL. |
| * @param schemaVersion the schema with which to serialize the HLLs. This cannot |
| * be <code>null</code>. |
| */ |
| private static void cumulativeAddLine(final Writer output, final HLL hll, final long rawValue, final ISchemaVersion schemaVersion) throws IOException { |
| hll.addRaw(rawValue); |
| final String accumulatorCardinality = stringCardinality(hll); |
| |
| output.write(accumulatorCardinality + "," + rawValue + "," + toByteA(hll, schemaVersion) + "\n"); |
| output.flush(); |
| } |
| |
| /** |
| * Writes an initial line for a {@link TestType#ADD}-formatted test. |
| * |
| * @param output The output {@link Writer writer}. This cannot be <code>null</code>. |
| * @param hll The "accumulator" HLL instance. This cannot be <code>null</code>. |
| * @param schemaVersion the schema with which to serialize the HLLs. This cannot |
| * be <code>null</code>. |
| */ |
| private static void initLineAdd(final Writer output, final HLL hll, final ISchemaVersion schemaVersion) throws IOException { |
| output.write(0 + "," + 0 + "," + toByteA(hll, schemaVersion) + "\n"); |
| output.flush(); |
| } |
| |
| /** |
| * Writes out a {@link TestType#UNION}-formatted test line. |
| * |
| * @param output The output {@link Writer writer}. This cannot be <code>null</code>. |
| * @param hll The "accumulator" HLL instance. This cannot be <code>null</code>. |
| * @param increment The "increment" HLL instance which will be unioned into |
| * the accumulator. This cannot be <code>null</code>. |
| * @param schemaVersion the schema with which to serialize the HLLs. This cannot |
| * be <code>null</code>. |
| */ |
| private static void cumulativeUnionLine(final Writer output, final HLL hll, final HLL increment, final ISchemaVersion schemaVersion) throws IOException { |
| hll.union(increment); |
| |
| final String incrementCardinality = stringCardinality(increment); |
| final String accumulatorCardinality = stringCardinality(hll); |
| output.write(incrementCardinality + "," + toByteA(increment, schemaVersion) + "," + accumulatorCardinality + "," + toByteA(hll, schemaVersion) + "\n"); |
| output.flush(); |
| } |
| |
| /** |
| * Serializes a HLL to Postgres 9 'bytea' hex-format, for CSV ingest. |
| * |
| * @param hll the HLL to serialize. This cannot be <code>null</code>. |
| * @param schemaVersion the schema with which to serialize the HLLs. This cannot |
| * be <code>null</code>. |
| * @return a PostgreSQL 'bytea' string representing the HLL. |
| */ |
| private static String toByteA(final HLL hll, final ISchemaVersion schemaVersion) { |
| final byte[] bytes = hll.toBytes(schemaVersion); |
| return ("\\x" + NumberUtil.toHex(bytes, 0, bytes.length)); |
| } |
| |
| /** |
| * Indicates what kind of test output a test will generate. |
| */ |
| private static enum TestType { |
| /** |
| * This type of test is characterized by values being added to an |
| * accumulator HLL whose serialized representation (after the value is added) |
| * is printed to each line along with the cardinality and added value. |
| */ |
| ADD, |
| /** |
| * This type of test is characterized by HLLs being unioned into an |
| * accumulator HLL whose serialized representation (after the HLL is |
| * union'd) is printed to each line along with the cardinalities and the |
| * serialized representation of the HLL union'd in. |
| */ |
| UNION; |
| } |
| } |