blob: 84d1c43d15e7e6f78898e5fa944033070a34540c [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.core.util;
import com.google.common.base.Preconditions;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import org.apache.pinot.spi.utils.Pairs;
import org.apache.pinot.spi.utils.Pairs.IntPair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.Test;
/**
* Tests for SortedRangeIntersection utility class.
*
* We hardcoded or random generated some lists of sorted doc range pairs (inclusive), and inside each list, there is no
* overlap between two pairs. Then we compare the SortedRangeIntersection results with our simple set based brute force
* solution results to verify the correctness of SortedRangeIntersection utility class.
*/
public class SortedRangeIntersectionTest {
private static final Logger LOGGER = LoggerFactory.getLogger(SortedRangeIntersectionTest.class);
@Test
public void testSimple() {
List<IntPair> rangeSet1 = Arrays.asList(Pairs.intPair(0, 4), Pairs.intPair(6, 10));
List<IntPair> rangeSet2 = Arrays.asList(Pairs.intPair(4, 7), Pairs.intPair(8, 14));
List<List<IntPair>> newArrayList = Arrays.asList(rangeSet1, rangeSet2);
List<IntPair> intersectionPairs = SortedRangeIntersection.intersectSortedRangeSets(newArrayList);
// expected (4,4) (6,10)
Assert.assertEquals(intersectionPairs.size(), 2);
Assert.assertEquals(intersectionPairs.get(0), Pairs.intPair(4, 4));
Assert.assertEquals(intersectionPairs.get(1), Pairs.intPair(6, 10));
}
@Test
public void testComplex() {
String rangeSet1 = "[[9148,10636], [18560,21885], [29475,32972], [34313,37642], [38008,47157], [50962,53911], "
+ "[59240,68238], [72458,83087], [92235,97593], [100690,103101], [111708,120102], [123212,124718], "
+ "[127544,134012], [134701,141314], [144966,146889], [147206,156680], [163842,168747], [175241,179737], "
+ "[180669,184662], [192538,200004], [207255,211226], [217529,219152], [221319,228532], [230549,236908], "
+ "[237353,240840], [245097,251894], [253228,257447], [263986,268166], [272168,276416], [282756,285555], "
+ "[286030,289848], [293220,303828], [308259,317810], [320830,330498], [337606,345534], [354205,361367], "
+ "[365751,375129], [379830,382548], [390661,399509], [409031,415694], [421748,428011], [436729,442978], "
+ "[443187,448760], [454285,464404], [469128,471735], [475965,478758], [483038,491060], [496477,499410], "
+ "[502719,507364], [511478,515427], [521615,523897], [524251,529600], [530904,536822], [541666,543826], "
+ "[551652,555367], [561244,565874], [573934,582151], [587804,593424], [596533,599490], [601884,605244], "
+ "[610479,618173], [627032,630079], [633582,643323], [648357,658921], [662083,664340], [666519,677174], "
+ "[681524,687223], [693032,696329], [700808,705461], [709573,713092], [722500,732846], [733115,741189], "
+ "[742183,743217], [748442,754700], [760482,768791], [769875,773877], [774153,775538], [778521,781333], "
+ "[781945,791595], [799389,809167], [811769,814445], [824160,831582], [838445,844533], [850597,858212], "
+ "[862638,867759], [877243,887468], [893193,895091], [902608,908295], [911058,915872], [916127,917590], "
+ "[922702,933633], [938082,946932], [953197,956096], [965980,970314], [976357,983182], [983378,991764]]";
String rangeSet2 = "[[4615,7365], [9048,19300], [25607,33900], [37975,45734], [53195,58803], [64401,72246], "
+ "[76305,84289], [86575,96695], [104465,114232], [121799,124496], [132587,137518], [146406,152055], "
+ "[154808,159304], [165855,168693], [177387,184548], [192275,202089], [204700,215167], [218780,219934], "
+ "[220492,224530], [227195,231541], [233667,241692], [249043,251396], [258494,263095], [271187,272880], "
+ "[279871,287604], [295906,302319], [302575,309352], [318221,320436], [324492,326129], [326623,333708], "
+ "[340839,349361], [356638,361977], [368099,373667], [374773,377525], [378682,380033], [387254,396509], "
+ "[405096,411616], [421132,424029], [426427,435377], [442540,447244], [453501,459620], [462366,471360], "
+ "[473395,484316], [492462,502422], [503755,507454], [507551,510017], [511176,516561], [522063,525977], "
+ "[531770,537861], [540539,542996], [547329,557420], [560641,570186], [570284,578197], [583861,588606], "
+ "[591016,596724], [601714,610872], [614557,622940], [632723,634975], [636710,647340], [647937,657306], "
+ "[666519,671699], [673434,679252], [679505,687724], [695809,697606], [705905,710503], [719044,728326], "
+ "[735262,739796], [748048,753094], [762698,768074], [771762,781103], [786979,789938], [790140,794143], "
+ "[800910,806993], [807930,811850], [818716,827521], [828786,839104], [840596,850617], [851100,858980], "
+ "[863671,874042], [874432,880240], [889917,897380], [897599,904508], [910935,914564], [919538,927762], "
+ "[933690,942122], [951330,959747], [969266,978196], [984965,991648]]";
String rangeSet3 = "[[3987,12147], [16890,26115], [34621,41095], [41476,44775], [50031,51695], [56539,60977], "
+ "[67558,76247], [83873,86817], [94900,102884], [111390,119073], [127792,138059], [145130,148633], "
+ "[155709,157847], [158118,161503], [164296,165881], [166123,169365], [178304,182866], [191004,193315], "
+ "[198648,199930], [204806,209054], [209177,213999], [214843,219858], [221497,223059], [228746,236624], "
+ "[241482,244679], [254433,255982], [257794,260287], [270207,277022], [278621,286999], [296719,303032], "
+ "[310590,318040], [323645,333283], [336166,341711], [347485,352255], [353348,358126], [362660,368061], "
+ "[376141,381147], [390178,393612], [399003,401626], [402609,412126], [419426,423852], [430009,432359], "
+ "[436647,442494], [446986,453904], [457694,461029], [466339,474088], [483026,486699], [495143,499556], "
+ "[506900,510220], [516325,521843], [523249,528070], [528549,532543], [533418,538998], [547895,549483], "
+ "[554460,555634], [562049,569098], [576463,584537], [588353,590300], [595284,600900], [609006,610452], "
+ "[618857,627533], [633186,637955], [642446,650849], [655326,662913], [663654,673388], [673914,678458], "
+ "[685951,690391], [699505,707103], [715822,718387], [725073,735513], [739306,741642], [750077,752683], "
+ "[759644,768461], [775885,778884], [783314,785772], [792568,802284], [806644,813017], [821962,828876], "
+ "[837176,847172], [854336,864364], [874180,881592], [888633,899508], [906913,907927], [908291,918558], "
+ "[927183,929930], [939548,949063], [951579,957887], [958917,960053], [968720,973220], [978731,989441]]";
String expectedOutputRangeSet = "[[9148,10636], [18560,19300], [38008,41095], [41476,44775], [67558,68238], "
+ "[94900,96695], [111708,114232], [132587,134012], [134701,137518], [146406,146889], [147206,148633], "
+ "[155709,156680], [165855,165881], [166123,168693], [178304,179737], [180669,182866], [192538,193315], "
+ "[198648,199930], [207255,209054], [209177,211226], [218780,219152], [221497,223059], [230549,231541], "
+ "[233667,236624], [272168,272880], [282756,285555], [286030,286999], [296719,302319], [302575,303032], "
+ "[324492,326129], [326623,330498], [340839,341711], [356638,358126], [379830,380033], [390661,393612], "
+ "[409031,411616], [421748,423852], [446986,447244], [457694,459620], [469128,471360], [483038,484316], "
+ "[496477,499410], [506900,507364], [523249,523897], [524251,525977], [531770,532543], [533418,536822], "
+ "[554460,555367], [562049,565874], [576463,578197], [588353,588606], [596533,596724], [633582,634975], "
+ "[636710,637955], [642446,643323], [648357,650849], [655326,657306], [666519,671699], [673914,677174], "
+ "[685951,687223], [725073,728326], [735262,735513], [739306,739796], [750077,752683], [762698,768074], "
+ "[778521,778884], [800910,802284], [806644,806993], [807930,809167], [811769,811850], [824160,827521], "
+ "[828786,828876], [838445,839104], [840596,844533], [854336,858212], [863671,864364], [877243,880240], "
+ "[893193,895091], [911058,914564], [927183,927762], [939548,942122], [953197,956096], [969266,970314], "
+ "[984965,989441]]";
List<List<IntPair>> sortedRangeSetList =
Arrays.asList(constructRangeSet(rangeSet1), constructRangeSet(rangeSet2), constructRangeSet(rangeSet3));
List<IntPair> expected = constructRangeSet(expectedOutputRangeSet);
List<IntPair> actual = SortedRangeIntersection.intersectSortedRangeSets(sortedRangeSetList);
Assert.assertEquals(actual, expected);
}
List<IntPair> constructRangeSet(String formattedString) {
formattedString = formattedString.replace('[', ' ');
formattedString = formattedString.replace(']', ' ');
String[] splits = formattedString.split(",");
int length = splits.length;
Preconditions.checkState(length % 2 == 0);
List<IntPair> pairs = new ArrayList<>();
for (int i = 0; i < length; i += 2) {
int start = Integer.parseInt(splits[i].trim());
int end = Integer.parseInt(splits[i + 1].trim());
pairs.add(Pairs.intPair(start, end));
}
return pairs;
}
@Test
public void testRandom() {
int totalDocs = 1_000_000;
int maxRange = 10000;
int minRange = 1000;
long randomSeed = System.currentTimeMillis();
Random r = new Random(randomSeed);
int numLists = 3;
List<List<IntPair>> sortedRangePairsList = new ArrayList<>();
List<Set<Integer>> rawIdSetList = new ArrayList<>();
for (int i = 0; i < numLists; i++) {
List<IntPair> pairs = new ArrayList<>();
Set<Integer> rawIdSet = new HashSet<>();
int docId = 0;
while (docId < totalDocs) {
int start = docId + r.nextInt(maxRange);
int end = start + Math.max(minRange, r.nextInt(maxRange));
if (end < totalDocs) {
pairs.add(Pairs.intPair(start, end));
for (int id = start; id <= end; id++) {
rawIdSet.add(id);
}
}
docId = end + 1;
}
sortedRangePairsList.add(pairs);
rawIdSetList.add(rawIdSet);
}
// expected intersection
List<IntPair> expected = new ArrayList<>();
int tempStart = -1;
for (int id = 0; id < totalDocs; id++) {
boolean foundInAll = true;
for (int i = 0; i < numLists; i++) {
if (!rawIdSetList.get(i).contains(id)) {
foundInAll = false;
break;
}
}
if (foundInAll) {
if (tempStart == -1) {
tempStart = id;
}
} else {
if (tempStart != -1) {
expected.add(Pairs.intPair(tempStart, id - 1));
tempStart = -1;
}
}
}
List<IntPair> actual = SortedRangeIntersection.intersectSortedRangeSets(sortedRangePairsList);
if (!actual.equals(expected)) {
LOGGER.error("Actual pairs not equal to expected pairs.");
LOGGER.error("Actual pairs: {}", actual);
LOGGER.error("Expected pairs: {}", expected);
LOGGER.error("Random seed: {}", randomSeed);
LOGGER.error("Sorted range pairs list: {}", sortedRangePairsList);
Assert.fail();
}
}
}