blob: ac337a0e5efc6591f62426e15926a03288d4993c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.util.hll;
import java.util.HashSet;
import org.apache.solr.SolrTestCase;
import org.junit.Test;
import com.carrotsearch.hppc.LongHashSet;
import static com.carrotsearch.randomizedtesting.RandomizedTest.*;
/**
* Tests {@link HLL} of type {@link HLLType#EXPLICIT}.
*/
public class ExplicitHLLTest extends SolrTestCase {
/**
* Tests basic set semantics of {@link HLL#addRaw(long)}.
*/
@Test
public void addBasicTest() {
{ // Adding a single positive value to an empty set should work.
final HLL hll = newHLL(128/*arbitrary*/);
hll.addRaw(1L/*positive*/);
assertEquals(hll.cardinality(), 1L);
}
{ // Adding a single negative value to an empty set should work.
final HLL hll = newHLL(128/*arbitrary*/);
hll.addRaw(-1L/*negative*/);
assertEquals(hll.cardinality(), 1L);
}
{ // Adding a duplicate value to a set should be a no-op.
final HLL hll = newHLL(128/*arbitrary*/);
hll.addRaw(1L/*positive*/);
assertEquals(hll.cardinality(), 1L/*arbitrary*/);
assertEquals(hll.cardinality(), 1L/*dupe*/);
}
}
// ------------------------------------------------------------------------
/**
* Tests {@link HLL#union(HLL)}.
*/
@Test
public void unionTest() {
{// Unioning two distinct sets should work
final HLL hllA = newHLL(128/*arbitrary*/);
final HLL hllB = newHLL(128/*arbitrary*/);
hllA.addRaw(1L);
hllA.addRaw(2L);
hllB.addRaw(3L);
hllA.union(hllB);
assertEquals(hllA.cardinality(), 3);
}
{// Unioning two sets whose union doesn't exceed the cardinality cap should not promote
final HLL hllA = newHLL(128/*arbitrary*/);
final HLL hllB = newHLL(128/*arbitrary*/);
hllA.addRaw(1L);
hllA.addRaw(2L);
hllB.addRaw(1L);
hllA.union(hllB);
assertEquals(hllA.cardinality(), 2);
}
{// unioning two sets whose union exceeds the cardinality cap should promote
final HLL hllA = newHLL(128/*arbitrary*/);
final HLL hllB = newHLL(128/*arbitrary*/);
// fill up sets to explicitThreshold
for(long i=0; i<128/*explicitThreshold*/; i++) {
hllA.addRaw(i);
hllB.addRaw(i + 128);
}
hllA.union(hllB);
assertEquals(hllA.getType(), HLLType.SPARSE);
}
}
// ------------------------------------------------------------------------
/**
* Tests {@link HLL#clear()}
*/
@Test
public void clearTest() {
final HLL hll = newHLL(128/*arbitrary*/);
hll.addRaw(1L);
assertEquals(hll.cardinality(), 1L);
hll.clear();
assertEquals(hll.cardinality(), 0L);
}
// ------------------------------------------------------------------------
/**
*/
@Test
public void toFromBytesTest() {
final ISchemaVersion schemaVersion = SerializationUtil.DEFAULT_SCHEMA_VERSION;
final HLLType type = HLLType.EXPLICIT;
final int padding = schemaVersion.paddingBytes(type);
final int bytesPerWord = 8;
{// Should work on an empty set
final HLL hll = newHLL(128/*arbitrary*/);
final byte[] bytes = hll.toBytes(schemaVersion);
// assert output has correct byte length
assertEquals(bytes.length, padding/*no elements, just padding*/);
final HLL inHLL = HLL.fromBytes(bytes);
assertElementsEqual(hll, inHLL);
}
{// Should work on a partially filled set
final HLL hll = newHLL(128/*arbitrary*/);
for(int i=0; i<3; i++) {
hll.addRaw(i);
}
final byte[] bytes = hll.toBytes(schemaVersion);
// assert output has correct byte length
assertEquals(bytes.length, padding + (bytesPerWord * 3/*elements*/));
final HLL inHLL = HLL.fromBytes(bytes);
assertElementsEqual(hll, inHLL);
}
{// Should work on a full set
final int explicitThreshold = 128;
final HLL hll = newHLL(explicitThreshold);
for(int i=0; i<explicitThreshold; i++) {
hll.addRaw(27 + i/*arbitrary*/);
}
final byte[] bytes = hll.toBytes(schemaVersion);
// assert output has correct byte length
assertEquals(bytes.length, padding + (bytesPerWord * explicitThreshold/*elements*/));
final HLL inHLL = HLL.fromBytes(bytes);
assertElementsEqual(hll, inHLL);
}
}
// ------------------------------------------------------------------------
/**
* Tests correctness against {@link java.util.HashSet}.
*/
@Test
public void randomValuesTest() {
final int explicitThreshold = 4096;
final HashSet<Long> canonical = new HashSet<Long>();
final HLL hll = newHLL(explicitThreshold);
for(int i=0;i<explicitThreshold;i++){
long randomLong = randomLong();
canonical.add(randomLong);
hll.addRaw(randomLong);
}
final int canonicalCardinality = canonical.size();
assertEquals(hll.cardinality(), canonicalCardinality);
}
// ------------------------------------------------------------------------
/**
* Tests promotion to {@link HLLType#SPARSE} and {@link HLLType#FULL}.
*/
@Test
public void promotionTest() {
{ // locally scoped for sanity
final int explicitThreshold = 128;
final HLL hll = new HLL(11/*log2m, unused*/, 5/*regwidth, unused*/, explicitThreshold, 256/*sparseThreshold*/, HLLType.EXPLICIT);
for(int i=0;i<explicitThreshold + 1;i++){
hll.addRaw(i);
}
assertEquals(hll.getType(), HLLType.SPARSE);
}
{ // locally scoped for sanity
final HLL hll = new HLL(11/*log2m, unused*/, 5/*regwidth, unused*/, 4/*expthresh => explicitThreshold = 8*/, false/*sparseon*/, HLLType.EXPLICIT);
for(int i=0;i<9/* > explicitThreshold */;i++){
hll.addRaw(i);
}
assertEquals(hll.getType(), HLLType.FULL);
}
}
// ************************************************************************
// assertion helpers
/**
* Asserts that values in both sets are exactly equal.
*/
private static void assertElementsEqual(final HLL hllA, final HLL hllB) {
final LongHashSet internalSetA = hllA.explicitStorage;
final LongHashSet internalSetB = hllB.explicitStorage;
assertTrue(internalSetA.equals(internalSetB));
}
/**
* Builds a {@link HLLType#EXPLICIT} {@link HLL} instance with the specified
* explicit threshold.
*
* @param explicitThreshold explicit threshold to use for the constructed
* {@link HLL}. This must be greater than zero.
* @return a default-sized {@link HLLType#EXPLICIT} empty {@link HLL} instance.
* This will never be <code>null</code>.
*/
private static HLL newHLL(final int explicitThreshold) {
return new HLL(11/*log2m, unused*/, 5/*regwidth, unused*/, explicitThreshold, 256/*sparseThreshold, arbitrary, unused*/, HLLType.EXPLICIT);
}
}