solr/core/src/test/org/apache/solr/handler/admin/SplitHandlerTest.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.solr.handler.admin;

 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Random;

 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.cloud.api.collections.SplitByPrefixTest;
 import org.apache.solr.cloud.api.collections.SplitByPrefixTest.Prefix;
 import org.apache.solr.common.cloud.CompositeIdRouter;
 import org.apache.solr.common.cloud.DocRouter;
 import org.apache.solr.request.SolrQueryRequest;
 import org.junit.BeforeClass;
 import org.junit.Test;

 // test low level splitByPrefix range recommendations.
 // This is here to access package private methods.
 // See SplitByPrefixTest for cloud level tests of SPLITSHARD that use this by passing getRanges with the SPLIT command
 public class SplitHandlerTest extends SolrTestCaseJ4 {

   @BeforeClass
   public static void beforeTests() throws Exception {
     System.setProperty("managed.schema.mutable", "true");  // needed by cloud-managed config set
     initCore("solrconfig.xml","schema_latest.xml");
   }

   void verifyContiguous(Collection<DocRouter.Range> results, DocRouter.Range currentRange) {
     if (results == null) return;

     assertTrue(results.size() > 1);

     DocRouter.Range prev = null;
     for (DocRouter.Range range : results) {
       if (prev == null) {
         // first range
         assertEquals(range.min, currentRange.min);
       } else {
         // make sure produced ranges are contiguous
         assertEquals(range.min, prev.max + 1);
       }
       prev = range;
     }
     assertEquals(prev.max, currentRange.max);
   }


   // bias around special numbers
   int randomBound(Random rand) {
     int ret = 0;
     switch(rand.nextInt(10)) {
       case 0: ret = Integer.MIN_VALUE; break;
       case 1: ret = Integer.MAX_VALUE; break;
       case 2: ret = 0; break;
       default: ret = rand.nextInt();
     }
     if (rand.nextBoolean()) {
       ret += rand.nextInt(2000) - 1000;
     }
     return ret;
   }

   @Test
   public void testRandomSplitRecommendations() throws Exception {
     Random rand = random();
     for (int i=0; i<10000; i++) { // 1M takes ~ 1 sec
       doRandomSplitRecommendation(rand);
     }
   }

   public void doRandomSplitRecommendation(Random rand) throws Exception {
     int low = 0;
     int high = 0;

     while (high-low < 10) {
       low = randomBound(rand);
       high = randomBound(rand);
       if (low > high) {
         int tmp = low;
         low = high;
         high = tmp;
       }
     }

     DocRouter.Range curr = new DocRouter.Range(low,high);


     int maxRanges = rand.nextInt(20);

     int start = low;

     // bucket can start before or after
     if (rand.nextBoolean()) {
         start += rand.nextInt(200) - 100;
         if (start > low) {
           // underflow
           start = Integer.MIN_VALUE;
         }
     }

     List<SplitOp.RangeCount> counts = new ArrayList<>(maxRanges);
     for (;;) {
       int end = start + rand.nextInt(100) + 1;
       if (end < start) {
         // overflow
         end = Integer.MAX_VALUE;
       }
       counts.add( new SplitOp.RangeCount(new DocRouter.Range(start, end), rand.nextInt(1000)+1));
       if (counts.size() >= maxRanges) break;
       if (counts.size() == maxRanges / 2 && rand.nextBoolean()) {
         // transition toward the end of the range (more boundary cases for large ranges)
         start = high - rand.nextInt(100);
         start = Math.max(start, end+1);
       } else {
         start = end + 1;
       }
       if (rand.nextBoolean()) {
         start += rand.nextInt(100);
       }
       if (start < end) {
         // overflow
         break;
       }
     }

     try {
       Collection<DocRouter.Range> results = SplitOp.getSplits(counts, curr);
       verifyContiguous(results, curr);
     } catch (Throwable e) {
       // System.err.println(e);
     }
   }


   @Test
   public void testSplitRecommendations() throws Exception {

     // split whole range exactly in two
     DocRouter.Range curr = new DocRouter.Range(10,15);
     List<SplitOp.RangeCount> counts = new ArrayList<>();
     counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,15), 100));
     Collection<DocRouter.Range> results = SplitOp.getSplits(counts, curr);
     assertEquals(12, results.iterator().next().max);
     verifyContiguous(results, curr);

     // make sure range with docs is split in half even if current range of shard is bigger
     curr = new DocRouter.Range(-100,101);
     counts = new ArrayList<>();
     counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,15), 100));
     results = SplitOp.getSplits(counts, curr);
     assertEquals(12, results.iterator().next().max);
     verifyContiguous(results, curr);

     // don't freak out if we encounter some ranges outside of the current defined shard range
     // this can happen since document routing can be overridden.
     curr = new DocRouter.Range(-100,101);
     counts = new ArrayList<>();
     counts.add(new SplitOp.RangeCount(new DocRouter.Range(-1000,-990), 100));
     counts.add(new SplitOp.RangeCount(new DocRouter.Range(-980,-970), 2));
     counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,15), 100));
     counts.add(new SplitOp.RangeCount(new DocRouter.Range(1000,1010), 5));
     counts.add(new SplitOp.RangeCount(new DocRouter.Range(1020,1030), 7));
     results = SplitOp.getSplits(counts, curr);
     assertEquals(12, results.iterator().next().max);
     verifyContiguous(results, curr);


     // splitting counts of [1,4,3] should result in [1,4],[3]
     // splitting count sof [3,4,1] should result in [3],[4,1]
     // The current implementation has specific code for the latter case (hence this is needed for code coverage)
     // The random tests *should* catch this as well though.
     curr = new DocRouter.Range(-100,101);
     counts = new ArrayList<>();
     counts.add(new SplitOp.RangeCount(new DocRouter.Range(0,9), 1));
     counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,19), 4));
     counts.add(new SplitOp.RangeCount(new DocRouter.Range(20,29), 3));
     results = SplitOp.getSplits(counts, curr);
     assertEquals(19, results.iterator().next().max);
     verifyContiguous(results, curr);

     curr = new DocRouter.Range(-100,101);
     counts = new ArrayList<>();
     counts.add(new SplitOp.RangeCount(new DocRouter.Range(0,9), 3));
     counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,19), 4));
     counts.add(new SplitOp.RangeCount(new DocRouter.Range(20,29), 1));
     results = SplitOp.getSplits(counts, curr);
     assertEquals(9, results.iterator().next().max);
     verifyContiguous(results, curr);


     // test that if largest count is first
     curr = new DocRouter.Range(-100,101);
     counts = new ArrayList<>();
     counts.add(new SplitOp.RangeCount(new DocRouter.Range(0,9), 4));
     counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,19), 1));
     counts.add(new SplitOp.RangeCount(new DocRouter.Range(20,29), 1));
     results = SplitOp.getSplits(counts, curr);
     assertEquals(9, results.iterator().next().max);
     verifyContiguous(results, curr);

     // test that if largest count is last (this has specific code since we don't get over midpoint until the last range and then need to back up)
     curr = new DocRouter.Range(-100,101);
     counts = new ArrayList<>();
     counts.add(new SplitOp.RangeCount(new DocRouter.Range(0,9), 1));
     counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,19), 1));
     counts.add(new SplitOp.RangeCount(new DocRouter.Range(20,29), 4));
     results = SplitOp.getSplits(counts, curr);
     assertEquals(19, results.iterator().next().max);
     verifyContiguous(results, curr);
   }

   @Test
   public void testHistogramBuilding() throws Exception {
     List<Prefix> prefixes = SplitByPrefixTest.findPrefixes(20, 0, 0x00ffffff);
     List<Prefix> uniquePrefixes = SplitByPrefixTest.removeDups(prefixes);
     assertTrue(prefixes.size() > uniquePrefixes.size());  // make sure we have some duplicates to test hash collisions

     String prefixField = "id_prefix_s";
     String idField = "id";
     DocRouter router = new CompositeIdRouter();


     for (int i=0; i<100; i++) {
       SolrQueryRequest req = req("myquery");
       try {
         // the first time through the loop we do this before adding docs to test an empty index
         Collection<SplitOp.RangeCount> counts1 = SplitOp.getHashHistogram(req.getSearcher(), prefixField, router, null);
         Collection<SplitOp.RangeCount> counts2 = SplitOp.getHashHistogramFromId(req.getSearcher(), idField, router, null);
         assertTrue(eqCount(counts1, counts2));

         if (i>0) {
           assertTrue(counts1.size() > 0);  // make sure we are testing something
         }


         // index a few random documents
         int ndocs = random().nextInt(10) + 1;
         for (int j=0; j<ndocs; j++) {
           String prefix = prefixes.get( random().nextInt(prefixes.size()) ).key;
           if (random().nextBoolean()) {
             prefix = prefix + Integer.toString(random().nextInt(3)) + "!";
           }
           String id = prefix + "doc" + i + "_" + j;
           updateJ(jsonAdd(sdoc(idField, id, prefixField, prefix)), null);
         }

         assertU(commit());


       } finally {
         req.close();
       }

     }

   }

   private boolean eqCount(Collection<SplitOp.RangeCount> a, Collection<SplitOp.RangeCount> b) {
     if (a.size() != b.size()) {
       return false;
     }

     Iterator<SplitOp.RangeCount> it1 = a.iterator();
     Iterator<SplitOp.RangeCount> it2 = b.iterator();
     while (it1.hasNext()) {
       SplitOp.RangeCount r1 = it1.next();
       SplitOp.RangeCount r2 = it2.next();
       if (!r1.range.equals(r2.range) || r1.count != r2.count) {
         return false;
       }
     }
     return true;
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.solr.handler.admin;

	import java.util.ArrayList;
	import java.util.Collection;
	import java.util.Iterator;
	import java.util.List;
	import java.util.Random;

	import org.apache.solr.SolrTestCaseJ4;
	import org.apache.solr.cloud.api.collections.SplitByPrefixTest;
	import org.apache.solr.cloud.api.collections.SplitByPrefixTest.Prefix;
	import org.apache.solr.common.cloud.CompositeIdRouter;
	import org.apache.solr.common.cloud.DocRouter;
	import org.apache.solr.request.SolrQueryRequest;
	import org.junit.BeforeClass;
	import org.junit.Test;

	// test low level splitByPrefix range recommendations.
	// This is here to access package private methods.
	// See SplitByPrefixTest for cloud level tests of SPLITSHARD that use this by passing getRanges with the SPLIT command
	public class SplitHandlerTest extends SolrTestCaseJ4 {

	@BeforeClass
	public static void beforeTests() throws Exception {
	System.setProperty("managed.schema.mutable", "true"); // needed by cloud-managed config set
	initCore("solrconfig.xml","schema_latest.xml");
	}

	void verifyContiguous(Collection<DocRouter.Range> results, DocRouter.Range currentRange) {
	if (results == null) return;

	assertTrue(results.size() > 1);

	DocRouter.Range prev = null;
	for (DocRouter.Range range : results) {
	if (prev == null) {
	// first range
	assertEquals(range.min, currentRange.min);
	} else {
	// make sure produced ranges are contiguous
	assertEquals(range.min, prev.max + 1);
	}
	prev = range;
	}
	assertEquals(prev.max, currentRange.max);
	}


	// bias around special numbers
	int randomBound(Random rand) {
	int ret = 0;
	switch(rand.nextInt(10)) {
	case 0: ret = Integer.MIN_VALUE; break;
	case 1: ret = Integer.MAX_VALUE; break;
	case 2: ret = 0; break;
	default: ret = rand.nextInt();
	}
	if (rand.nextBoolean()) {
	ret += rand.nextInt(2000) - 1000;
	}
	return ret;
	}

	@Test
	public void testRandomSplitRecommendations() throws Exception {
	Random rand = random();
	for (int i=0; i<10000; i++) { // 1M takes ~ 1 sec
	doRandomSplitRecommendation(rand);
	}
	}

	public void doRandomSplitRecommendation(Random rand) throws Exception {
	int low = 0;
	int high = 0;

	while (high-low < 10) {
	low = randomBound(rand);
	high = randomBound(rand);
	if (low > high) {
	int tmp = low;
	low = high;
	high = tmp;
	}
	}

	DocRouter.Range curr = new DocRouter.Range(low,high);


	int maxRanges = rand.nextInt(20);

	int start = low;

	// bucket can start before or after
	if (rand.nextBoolean()) {
	start += rand.nextInt(200) - 100;
	if (start > low) {
	// underflow
	start = Integer.MIN_VALUE;
	}
	}

	List<SplitOp.RangeCount> counts = new ArrayList<>(maxRanges);
	for (;;) {
	int end = start + rand.nextInt(100) + 1;
	if (end < start) {
	// overflow
	end = Integer.MAX_VALUE;
	}
	counts.add( new SplitOp.RangeCount(new DocRouter.Range(start, end), rand.nextInt(1000)+1));
	if (counts.size() >= maxRanges) break;
	if (counts.size() == maxRanges / 2 && rand.nextBoolean()) {
	// transition toward the end of the range (more boundary cases for large ranges)
	start = high - rand.nextInt(100);
	start = Math.max(start, end+1);
	} else {
	start = end + 1;
	}
	if (rand.nextBoolean()) {
	start += rand.nextInt(100);
	}
	if (start < end) {
	// overflow
	break;
	}
	}

	try {
	Collection<DocRouter.Range> results = SplitOp.getSplits(counts, curr);
	verifyContiguous(results, curr);
	} catch (Throwable e) {
	// System.err.println(e);
	}
	}


	@Test
	public void testSplitRecommendations() throws Exception {

	// split whole range exactly in two
	DocRouter.Range curr = new DocRouter.Range(10,15);
	List<SplitOp.RangeCount> counts = new ArrayList<>();
	counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,15), 100));
	Collection<DocRouter.Range> results = SplitOp.getSplits(counts, curr);
	assertEquals(12, results.iterator().next().max);
	verifyContiguous(results, curr);

	// make sure range with docs is split in half even if current range of shard is bigger
	curr = new DocRouter.Range(-100,101);
	counts = new ArrayList<>();
	counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,15), 100));
	results = SplitOp.getSplits(counts, curr);
	assertEquals(12, results.iterator().next().max);
	verifyContiguous(results, curr);

	// don't freak out if we encounter some ranges outside of the current defined shard range
	// this can happen since document routing can be overridden.
	curr = new DocRouter.Range(-100,101);
	counts = new ArrayList<>();
	counts.add(new SplitOp.RangeCount(new DocRouter.Range(-1000,-990), 100));
	counts.add(new SplitOp.RangeCount(new DocRouter.Range(-980,-970), 2));
	counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,15), 100));
	counts.add(new SplitOp.RangeCount(new DocRouter.Range(1000,1010), 5));
	counts.add(new SplitOp.RangeCount(new DocRouter.Range(1020,1030), 7));
	results = SplitOp.getSplits(counts, curr);
	assertEquals(12, results.iterator().next().max);
	verifyContiguous(results, curr);


	// splitting counts of [1,4,3] should result in [1,4],[3]
	// splitting count sof [3,4,1] should result in [3],[4,1]
	// The current implementation has specific code for the latter case (hence this is needed for code coverage)
	// The random tests should catch this as well though.
	curr = new DocRouter.Range(-100,101);
	counts = new ArrayList<>();
	counts.add(new SplitOp.RangeCount(new DocRouter.Range(0,9), 1));
	counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,19), 4));
	counts.add(new SplitOp.RangeCount(new DocRouter.Range(20,29), 3));
	results = SplitOp.getSplits(counts, curr);
	assertEquals(19, results.iterator().next().max);
	verifyContiguous(results, curr);

	curr = new DocRouter.Range(-100,101);
	counts = new ArrayList<>();
	counts.add(new SplitOp.RangeCount(new DocRouter.Range(0,9), 3));
	counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,19), 4));
	counts.add(new SplitOp.RangeCount(new DocRouter.Range(20,29), 1));
	results = SplitOp.getSplits(counts, curr);
	assertEquals(9, results.iterator().next().max);
	verifyContiguous(results, curr);


	// test that if largest count is first
	curr = new DocRouter.Range(-100,101);
	counts = new ArrayList<>();
	counts.add(new SplitOp.RangeCount(new DocRouter.Range(0,9), 4));
	counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,19), 1));
	counts.add(new SplitOp.RangeCount(new DocRouter.Range(20,29), 1));
	results = SplitOp.getSplits(counts, curr);
	assertEquals(9, results.iterator().next().max);
	verifyContiguous(results, curr);

	// test that if largest count is last (this has specific code since we don't get over midpoint until the last range and then need to back up)
	curr = new DocRouter.Range(-100,101);
	counts = new ArrayList<>();
	counts.add(new SplitOp.RangeCount(new DocRouter.Range(0,9), 1));
	counts.add(new SplitOp.RangeCount(new DocRouter.Range(10,19), 1));
	counts.add(new SplitOp.RangeCount(new DocRouter.Range(20,29), 4));
	results = SplitOp.getSplits(counts, curr);
	assertEquals(19, results.iterator().next().max);
	verifyContiguous(results, curr);
	}

	@Test
	public void testHistogramBuilding() throws Exception {
	List<Prefix> prefixes = SplitByPrefixTest.findPrefixes(20, 0, 0x00ffffff);
	List<Prefix> uniquePrefixes = SplitByPrefixTest.removeDups(prefixes);
	assertTrue(prefixes.size() > uniquePrefixes.size()); // make sure we have some duplicates to test hash collisions

	String prefixField = "id_prefix_s";
	String idField = "id";
	DocRouter router = new CompositeIdRouter();


	for (int i=0; i<100; i++) {
	SolrQueryRequest req = req("myquery");
	try {
	// the first time through the loop we do this before adding docs to test an empty index
	Collection<SplitOp.RangeCount> counts1 = SplitOp.getHashHistogram(req.getSearcher(), prefixField, router, null);
	Collection<SplitOp.RangeCount> counts2 = SplitOp.getHashHistogramFromId(req.getSearcher(), idField, router, null);
	assertTrue(eqCount(counts1, counts2));

	if (i>0) {
	assertTrue(counts1.size() > 0); // make sure we are testing something
	}


	// index a few random documents
	int ndocs = random().nextInt(10) + 1;
	for (int j=0; j<ndocs; j++) {
	String prefix = prefixes.get( random().nextInt(prefixes.size()) ).key;
	if (random().nextBoolean()) {
	prefix = prefix + Integer.toString(random().nextInt(3)) + "!";
	}
	String id = prefix + "doc" + i + "_" + j;
	updateJ(jsonAdd(sdoc(idField, id, prefixField, prefix)), null);
	}

	assertU(commit());


	} finally {
	req.close();
	}

	}

	}

	private boolean eqCount(Collection<SplitOp.RangeCount> a, Collection<SplitOp.RangeCount> b) {
	if (a.size() != b.size()) {
	return false;
	}

	Iterator<SplitOp.RangeCount> it1 = a.iterator();
	Iterator<SplitOp.RangeCount> it2 = b.iterator();
	while (it1.hasNext()) {
	SplitOp.RangeCount r1 = it1.next();
	SplitOp.RangeCount r2 = it2.next();
	if (!r1.range.equals(r2.range) \|\| r1.count != r2.count) {
	return false;
	}
	}
	return true;
	}

	}