blob: dcfc74968f6e3f15ee81bc5439810676b9aa61d4 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.admin;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.cloud.api.collections.SplitByPrefixTest;
import org.apache.solr.cloud.api.collections.SplitByPrefixTest.Prefix;
import org.apache.solr.common.cloud.CompositeIdRouter;
import org.apache.solr.common.cloud.DocRouter;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.BeforeClass;
import org.junit.Test;
// test low level splitByPrefix range recommendations.
// This is here to access package private methods.
// See SplitByPrefixTest for cloud level tests of SPLITSHARD that use this by passing getRanges with the SPLIT command
public class SplitHandlerTest extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeTests() throws Exception {
System.setProperty("managed.schema.mutable", "true"); // needed by cloud-managed config set
initCore("solrconfig.xml","schema_latest.xml");
}
void verifyContiguous(Collection<DocRouter.Range> results, DocRouter.Range currentRange) {
if (results == null) return;
assertTrue(results.size() > 1);
DocRouter.Range prev = null;
for (DocRouter.Range range : results) {
if (prev == null) {
// first range
assertEquals(range.min, currentRange.min);
} else {
// make sure produced ranges are contiguous
assertEquals(range.min, prev.max + 1);
}
prev = range;
}
assertEquals(prev.max, currentRange.max);
}
// bias around special numbers
int randomBound(Random rand) {
int ret = 0;
switch(rand.nextInt(10)) {
case 0: ret = Integer.MIN_VALUE; break;
case 1: ret = Integer.MAX_VALUE; break;
case 2: ret = 0; break;
default: ret = rand.nextInt();
}
if (rand.nextBoolean()) {
ret += rand.nextInt(2000) - 1000;
}
return ret;
}
@Test
public void testRandomSplitRecommendations() throws Exception {
Random rand = random();
for (int i=0; i<10000; i++) { // 1M takes ~ 1 sec
doRandomSplitRecommendation(rand);
}
}
public void doRandomSplitRecommendation(Random rand) throws Exception {
int low = 0;
int high = 0;
while (high-low < 10) {
low = randomBound(rand);
high = randomBound(rand);
if (low > high) {
int tmp = low;
low = high;
high = tmp;
}
}
DocRouter.Range curr = new DocRouter.Range(low,high);
int maxRanges = rand.nextInt(20);
int start = low;
// bucket can start before or after
if (rand.nextBoolean()) {
start += rand.nextInt(200) - 100;
if (start > low) {
// underflow
start = Integer.MIN_VALUE;
}
}
List<SplitOp.RangeCount> counts = new ArrayList<>(maxRanges);
for (;;) {
int end = start + rand.nextInt(100) + 1;
if (end < start) {
// overflow
end = Integer.MAX_VALUE;
}
counts.add( new SplitOp.RangeCount(new DocRouter.Range(start, end), rand.nextInt(1000)+1));
if (counts.size() >= maxRanges) break;
if (counts.size() == maxRanges / 2 && rand.nextBoolean()) {
// transition toward the end of the range (more boundary cases for large ranges)
start = high - rand.nextInt(100);
start = Math.max(start, end+1);
} else {
start = end + 1;
}
if (rand.nextBoolean()) {
start += rand.nextInt(100);
}
if (start < end) {
// overflow
break;
}
}
try {
Collection<DocRouter.Range> results = SplitOp.getSplits(counts, curr);
verifyContiguous(results, curr);
} catch (Throwable e) {
// System.err.println(e);
}
}
@Test
public void testSplitRecommendations() throws Exception {
// split whole range exactly in two
DocRouter.Range curr = new DocRouter.Range(10,15);
List<SplitOp.RangeCount> counts = new ArrayList<>();
counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,15), 100));
Collection<DocRouter.Range> results = SplitOp.getSplits(counts, curr);
assertEquals(12, results.iterator().next().max);
verifyContiguous(results, curr);
// make sure range with docs is split in half even if current range of shard is bigger
curr = new DocRouter.Range(-100,101);
counts = new ArrayList<>();
counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,15), 100));
results = SplitOp.getSplits(counts, curr);
assertEquals(12, results.iterator().next().max);
verifyContiguous(results, curr);
// don't freak out if we encounter some ranges outside of the current defined shard range
// this can happen since document routing can be overridden.
curr = new DocRouter.Range(-100,101);
counts = new ArrayList<>();
counts.add(new SplitOp.RangeCount(new DocRouter.Range(-1000,-990), 100));
counts.add(new SplitOp.RangeCount(new DocRouter.Range(-980,-970), 2));
counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,15), 100));
counts.add(new SplitOp.RangeCount(new DocRouter.Range(1000,1010), 5));
counts.add(new SplitOp.RangeCount(new DocRouter.Range(1020,1030), 7));
results = SplitOp.getSplits(counts, curr);
assertEquals(12, results.iterator().next().max);
verifyContiguous(results, curr);
// splitting counts of [1,4,3] should result in [1,4],[3]
// splitting count sof [3,4,1] should result in [3],[4,1]
// The current implementation has specific code for the latter case (hence this is needed for code coverage)
// The random tests *should* catch this as well though.
curr = new DocRouter.Range(-100,101);
counts = new ArrayList<>();
counts.add(new SplitOp.RangeCount(new DocRouter.Range(0,9), 1));
counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,19), 4));
counts.add(new SplitOp.RangeCount(new DocRouter.Range(20,29), 3));
results = SplitOp.getSplits(counts, curr);
assertEquals(19, results.iterator().next().max);
verifyContiguous(results, curr);
curr = new DocRouter.Range(-100,101);
counts = new ArrayList<>();
counts.add(new SplitOp.RangeCount(new DocRouter.Range(0,9), 3));
counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,19), 4));
counts.add(new SplitOp.RangeCount(new DocRouter.Range(20,29), 1));
results = SplitOp.getSplits(counts, curr);
assertEquals(9, results.iterator().next().max);
verifyContiguous(results, curr);
// test that if largest count is first
curr = new DocRouter.Range(-100,101);
counts = new ArrayList<>();
counts.add(new SplitOp.RangeCount(new DocRouter.Range(0,9), 4));
counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,19), 1));
counts.add(new SplitOp.RangeCount(new DocRouter.Range(20,29), 1));
results = SplitOp.getSplits(counts, curr);
assertEquals(9, results.iterator().next().max);
verifyContiguous(results, curr);
// test that if largest count is last (this has specific code since we don't get over midpoint until the last range and then need to back up)
curr = new DocRouter.Range(-100,101);
counts = new ArrayList<>();
counts.add(new SplitOp.RangeCount(new DocRouter.Range(0,9), 1));
counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,19), 1));
counts.add(new SplitOp.RangeCount(new DocRouter.Range(20,29), 4));
results = SplitOp.getSplits(counts, curr);
assertEquals(19, results.iterator().next().max);
verifyContiguous(results, curr);
}
@Test
public void testHistogramBuilding() throws Exception {
List<Prefix> prefixes = SplitByPrefixTest.findPrefixes(20, 0, 0x00ffffff);
List<Prefix> uniquePrefixes = SplitByPrefixTest.removeDups(prefixes);
assertTrue(prefixes.size() > uniquePrefixes.size()); // make sure we have some duplicates to test hash collisions
String prefixField = "id_prefix_s";
String idField = "id";
DocRouter router = new CompositeIdRouter();
for (int i=0; i<100; i++) {
SolrQueryRequest req = req("myquery");
try {
// the first time through the loop we do this before adding docs to test an empty index
Collection<SplitOp.RangeCount> counts1 = SplitOp.getHashHistogram(req.getSearcher(), prefixField, router, null);
Collection<SplitOp.RangeCount> counts2 = SplitOp.getHashHistogramFromId(req.getSearcher(), idField, router, null);
assertTrue(eqCount(counts1, counts2));
if (i>0) {
assertTrue(counts1.size() > 0); // make sure we are testing something
}
// index a few random documents
int ndocs = random().nextInt(10) + 1;
for (int j=0; j<ndocs; j++) {
String prefix = prefixes.get( random().nextInt(prefixes.size()) ).key;
if (random().nextBoolean()) {
prefix = prefix + Integer.toString(random().nextInt(3)) + "!";
}
String id = prefix + "doc" + i + "_" + j;
updateJ(jsonAdd(sdoc(idField, id, prefixField, prefix)), null);
}
assertU(commit());
} finally {
req.close();
}
}
}
private boolean eqCount(Collection<SplitOp.RangeCount> a, Collection<SplitOp.RangeCount> b) {
if (a.size() != b.size()) {
return false;
}
Iterator<SplitOp.RangeCount> it1 = a.iterator();
Iterator<SplitOp.RangeCount> it2 = b.iterator();
while (it1.hasNext()) {
SplitOp.RangeCount r1 = it1.next();
SplitOp.RangeCount r2 = it2.next();
if (!r1.range.equals(r2.range) || r1.count != r2.count) {
return false;
}
}
return true;
}
}