blob: e946450c9042653a1c4103e9d94aff01b4c8dfe9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.index.bloom;
import org.apache.hudi.common.util.collection.Pair;
import org.junit.jupiter.api.Test;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestBucketizedBloomCheckPartitioner {
@Test
public void testAssignmentCorrectness() {
Map<String, Long> fileToComparisons = new HashMap<String, Long>() {
{
put("f1", 40L);
put("f2", 35L);
put("f3", 20L);
}
};
BucketizedBloomCheckPartitioner p = new BucketizedBloomCheckPartitioner(4, fileToComparisons, 10);
Map<String, List<Integer>> assignments = p.getFileGroupToPartitions();
assertEquals(4, assignments.get("f1").size(), "f1 should have 4 buckets");
assertEquals(4, assignments.get("f2").size(), "f2 should have 4 buckets");
assertEquals(2, assignments.get("f3").size(), "f3 should have 2 buckets");
assertArrayEquals(new Integer[] {0, 0, 1, 3}, assignments.get("f1").toArray(), "f1 spread across 3 partitions");
assertArrayEquals(new Integer[] {1, 2, 2, 0}, assignments.get("f2").toArray(), "f2 spread across 3 partitions");
assertArrayEquals(new Integer[] {3, 1}, assignments.get("f3").toArray(), "f3 spread across 2 partitions");
}
@Test
public void testUniformPacking() {
// evenly distribute 10 buckets/file across 100 partitions
Map<String, Long> comparisons1 = new HashMap<String, Long>() {
{
IntStream.range(0, 10).forEach(f -> put("f" + f, 100L));
}
};
BucketizedBloomCheckPartitioner partitioner = new BucketizedBloomCheckPartitioner(100, comparisons1, 10);
Map<String, List<Integer>> assignments = partitioner.getFileGroupToPartitions();
assignments.forEach((key, value) -> assertEquals(10, value.size()));
Map<Integer, Long> partitionToNumBuckets =
assignments.entrySet().stream().flatMap(e -> e.getValue().stream().map(p -> Pair.of(p, e.getKey())))
.collect(Collectors.groupingBy(Pair::getLeft, Collectors.counting()));
partitionToNumBuckets.forEach((key, value) -> assertEquals(1L, value.longValue()));
}
@Test
public void testNumPartitions() {
Map<String, Long> comparisons1 = new HashMap<String, Long>() {
{
IntStream.range(0, 10).forEach(f -> put("f" + f, 100L));
}
};
BucketizedBloomCheckPartitioner p = new BucketizedBloomCheckPartitioner(10000, comparisons1, 10);
assertEquals(100, p.numPartitions(), "num partitions must equal total buckets");
}
@Test
public void testGetPartitions() {
Map<String, Long> comparisons1 = new HashMap<String, Long>() {
{
IntStream.range(0, 100000).forEach(f -> put("f" + f, 100L));
}
};
BucketizedBloomCheckPartitioner p = new BucketizedBloomCheckPartitioner(1000, comparisons1, 10);
IntStream.range(0, 100000).forEach(f -> {
int partition = p.getPartition(Pair.of("f" + f, "value"));
assertTrue(0 <= partition && partition <= 1000, "partition is out of range: " + partition);
});
}
}