solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.solr.search.facet;

 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.LuceneTestCase.Slow;
 import org.apache.lucene.util.TestUtil;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.SolrTestUtil;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.impl.CloudHttp2SolrClient;
 import org.apache.solr.client.solrj.impl.Http2SolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.request.QueryRequest;
 import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.cloud.SolrCloudTestCase;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.noggit.JSONUtil;
 import org.noggit.JSONWriter;
 import org.noggit.JSONWriter.Writable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import static org.apache.solr.search.facet.RelatednessAgg.computeRelatedness;
 import static org.apache.solr.search.facet.RelatednessAgg.roundTo5Digits;
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
 import java.util.concurrent.atomic.AtomicInteger;

 /**
  * <p>
  * A randomized test of nested facets using the <code>relatedness()</code> function, that asserts the
  * accuracy the results for all the buckets returned using verification queries of the (expected)
  * foreground &amp; background queries based on the nested facet terms.
  * <p>
  * Note that unlike normal facet "count" verification, using a high limit + overrequest isn't a substitute
  * for refinement in order to ensure accurate "skg" computation across shards.  For that reason, this
  * tests forces <code>refine: true</code> (unlike {@link TestCloudJSONFacetJoinDomain}) and specifices a
  * <code>domain: { 'query':'*:*' }</code> for every facet, in order to garuntee that all shards
  * participate in all facets, so that the popularity &amp; relatedness values returned can be proven
  * with validation requests.
  * </p>
  * <p>
  * (Refinement alone is not enough. Using the '*:*' query as the facet domain is neccessary to
  * prevent situations where a single shardX may return candidate bucket with no child-buckets due to
  * the normal facet intersections, but when refined on other shardY(s), can produce "high scoring"
  * SKG child-buckets, which would then be missing the foreground/background "size" contributions from
  * shardX.
  * </p>
  *
  * @see TestCloudJSONFacetJoinDomain
  * @see TestCloudJSONFacetSKGEquiv
  */
 @Slow
 public class TestCloudJSONFacetSKG extends SolrCloudTestCase {

   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

   private static final String DEBUG_LABEL = MethodHandles.lookup().lookupClass().getName();
   private static final String COLLECTION_NAME = DEBUG_LABEL + "_collection";

   private static final int DEFAULT_LIMIT = FacetField.DEFAULT_FACET_LIMIT;
   private static final int MAX_FIELD_NUM = 15;
   private static int UNIQUE_FIELD_VALS;

   /** Multivalued string field suffixes that can be randomized for testing diff facet/join code paths */
   private static final String[] MULTI_STR_FIELD_SUFFIXES = new String[]
     { "_multi_ss", "_multi_sds", "_multi_sdsS" };
   /** Multivalued int field suffixes that can be randomized for testing diff facet/join code paths */
   private static final String[] MULTI_INT_FIELD_SUFFIXES = new String[]
     { "_multi_is", "_multi_ids", "_multi_idsS" };

   /** Single Valued string field suffixes that can be randomized for testing diff facet code paths */
   private static final String[] SOLO_STR_FIELD_SUFFIXES = new String[]
     { "_solo_s", "_solo_sd", "_solo_sdS" };
   /** Single Valued int field suffixes that can be randomized for testing diff facet code paths */
   private static final String[] SOLO_INT_FIELD_SUFFIXES = new String[]
     { "_solo_i", "_solo_id", "_solo_idS" };

   /** A basic client for operations at the cloud level, default collection will be set */
   private static CloudHttp2SolrClient CLOUD_CLIENT;
   /** One client per node */
   private static final ArrayList<Http2SolrClient> CLIENTS = new ArrayList<>(5);

   @BeforeClass
   private static void createMiniSolrCloudCluster() throws Exception {

     if (TEST_NIGHTLY) {
       UNIQUE_FIELD_VALS = 50;
     } else {
       UNIQUE_FIELD_VALS = 20;
     }

     // sanity check constants
     assertTrue("bad test constants: some suffixes will never be tested",
                (MULTI_STR_FIELD_SUFFIXES.length < MAX_FIELD_NUM) &&
                (MULTI_INT_FIELD_SUFFIXES.length < MAX_FIELD_NUM) &&
                (SOLO_STR_FIELD_SUFFIXES.length < MAX_FIELD_NUM) &&
                (SOLO_INT_FIELD_SUFFIXES.length < MAX_FIELD_NUM));

     // we need DVs on point fields to compute stats & facets
     if (Boolean.getBoolean(SolrTestCaseJ4.NUMERIC_POINTS_SYSPROP)) System.setProperty(SolrTestCaseJ4.NUMERIC_DOCVALUES_SYSPROP,"true");

     // multi replicas should not matter...
     final int repFactor = LuceneTestCase.usually() ? 1 : 2;
     // ... but we definitely want to test multiple shards
     final int numShards = TEST_NIGHTLY ? TestUtil.nextInt(random(), 1, (LuceneTestCase.usually() ? 2 :3)) : 2;
     final int numNodes = (numShards * repFactor);

     final String configName = DEBUG_LABEL + "_config-set";
     final Path configDir = Paths.get(SolrTestUtil.TEST_HOME(), "collection1", "conf");

     configureCluster(numNodes).addConfig(configName, configDir).configure();

     Map<String, String> collectionProperties = new LinkedHashMap<>();
     collectionProperties.put("config", "solrconfig-tlog.xml");
     collectionProperties.put("schema", "schema_latest.xml");
     CollectionAdminRequest.createCollection(COLLECTION_NAME, configName, numShards, repFactor)
         .setProperties(collectionProperties)
         .process(cluster.getSolrClient());

     CLOUD_CLIENT = cluster.getSolrClient();
     CLOUD_CLIENT.setDefaultCollection(COLLECTION_NAME);

     for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
       CLIENTS.add(SolrTestCaseJ4
           .getHttpSolrClient(jetty.getBaseUrl() + "/" + COLLECTION_NAME + "/"));
     }

     final int numDocs = LuceneTestCase.atLeast(TEST_NIGHTLY ? 97 : 7) + 3;
     for (int id = 0; id < numDocs; id++) {
       SolrInputDocument doc = SolrTestCaseJ4.sdoc("id", ""+id);
       for (int fieldNum = 0; fieldNum < MAX_FIELD_NUM; fieldNum++) {
         // NOTE: we ensure every doc has at least one value in each field
         // that way, if a term is returned for a parent there there is garunteed to be at least one
         // one term in the child facet as well.
         //
         // otherwise, we'd face the risk of a single shardX returning parentTermX as a top term for
         // the parent facet, but having no child terms -- meanwhile on refinement another shardY that
         // did *not* returned parentTermX in phase#1, could return some *new* child terms under
         // parentTermX, but their stats would not include the bgCount from shardX.
         //
         // in normal operation, this is an edge case that isn't a big deal because the ratios &
         // relatedness scores are statistically approximate, but for the purpose of this test where
         // we verify correctness via exactness we need all shards to contribute to the SKG statistics
         final int numValsThisDoc = TestUtil.nextInt(random(), 1, (LuceneTestCase.usually() ? 5 : 10));
         for (int v = 0; v < numValsThisDoc; v++) {
           final String fieldValue = randFieldValue(fieldNum);

           // multi valued: one string, and one integer
           doc.addField(multiStrField(fieldNum), fieldValue);
           doc.addField(multiIntField(fieldNum), fieldValue);
         }
         { // single valued: one string, and one integer
           final String fieldValue = randFieldValue(fieldNum);
           doc.addField(soloStrField(fieldNum), fieldValue);
           doc.addField(soloIntField(fieldNum), fieldValue);
         }
       }
       CLOUD_CLIENT.add(doc);
       if (random().nextInt(100) < 1) {
         CLOUD_CLIENT.commit();  // commit 1% of the time to create new segments
       }
       if (random().nextInt(100) < (TEST_NIGHTLY ? 5 : 1)) {
         CLOUD_CLIENT.add(doc);  // duplicate the doc 5% of the time to create deleted docs
       }
     }
     CLOUD_CLIENT.commit();
   }

   /**
    * Given a (random) number, and a (static) array of possible suffixes returns a consistent field name that
    * uses that number and one of hte specified suffixes in it's name.
    *
    * @see #MULTI_STR_FIELD_SUFFIXES
    * @see #MULTI_INT_FIELD_SUFFIXES
    * @see #MAX_FIELD_NUM
    * @see #randFieldValue
    */
   private static String field(final String[] suffixes, final int fieldNum) {
     assert fieldNum < MAX_FIELD_NUM;

     final String suffix = suffixes[fieldNum % suffixes.length];
     return "field_" + fieldNum + suffix;
   }
   /** Given a (random) number, returns a consistent field name for a multi valued string field */
   private static String multiStrField(final int fieldNum) {
     return field(MULTI_STR_FIELD_SUFFIXES, fieldNum);
   }
   /** Given a (random) number, returns a consistent field name for a multi valued int field */
   private static String multiIntField(final int fieldNum) {
     return field(MULTI_INT_FIELD_SUFFIXES, fieldNum);
   }
   /** Given a (random) number, returns a consistent field name for a single valued string field */
   private static String soloStrField(final int fieldNum) {
     return field(SOLO_STR_FIELD_SUFFIXES, fieldNum);
   }
   /** Given a (random) number, returns a consistent field name for a single valued int field */
   private static String soloIntField(final int fieldNum) {
     return field(SOLO_INT_FIELD_SUFFIXES, fieldNum);
   }

   /**
    * Given a (random) field number, returns a random (integer based) value for that field.
    * NOTE: The number of unique values in each field is constant acording to {@link #UNIQUE_FIELD_VALS}
    * but the precise <em>range</em> of values will vary for each unique field number, such that cross field joins
    * will match fewer documents based on how far apart the field numbers are.
    *
    * @see #UNIQUE_FIELD_VALS
    * @see #field
    */
   private static String randFieldValue(final int fieldNum) {
     return "" + (fieldNum + TestUtil.nextInt(random(), 1, UNIQUE_FIELD_VALS));
   }


   @AfterClass
   private static void afterClass() throws Exception {
     if (null != CLOUD_CLIENT) {
       // CLOUD_CLIENT is not ours to close!
       // CLOUD_CLIENT.close();
       CLOUD_CLIENT = null;
     }
     for (Http2SolrClient client : CLIENTS) {
       client.close();
     }
     CLIENTS.clear();
   }

   /**
    * Test some small, hand crafted, but non-trivial queries that are
    * easier to trace/debug then a pure random monstrosity.
    * (ie: if something obvious gets broken, this test may fail faster and in a more obvious way then testRandom)
    */
   public void testBespoke() throws Exception {
     { // trivial single level facet
       LuceneTestCase.assumeFalse("TODO: Bad Seed", "E5A14A8ED3385FF9".equals(System.getProperty("tests.seed"))); // TODO bad seed
       LuceneTestCase.assumeFalse("TODO: Bad Seed", "226E21DD909C0468".equals(System.getProperty("tests.seed"))); // TODO bad seed
       LuceneTestCase.assumeFalse("TODO: Bad Seed", "7437716F4AD8DD12".equals(System.getProperty("tests.seed"))); // TODO bad seed


       Map<String,TermFacet> facets = new LinkedHashMap<>();
       TermFacet top = new TermFacet(multiStrField(9), UNIQUE_FIELD_VALS, 0, null);
       facets.put("top1", top);
       final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS);
       assertFacetSKGsAreCorrect(maxBuckets, facets, multiStrField(7)+":11", multiStrField(5)+":9", "*:*");
       assertTrue("Didn't check a single bucket???", maxBuckets.get() < UNIQUE_FIELD_VALS);
     }

     { // trivial single level facet w/sorting on skg
       Map<String,TermFacet> facets = new LinkedHashMap<>();
       TermFacet top = new TermFacet(multiStrField(9), UNIQUE_FIELD_VALS, 0, "skg desc");
       facets.put("top2", top);
       final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS);
       assertFacetSKGsAreCorrect(maxBuckets, facets, multiStrField(7)+":11", multiStrField(5)+":9", "*:*");
       assertTrue("Didn't check a single bucket???", maxBuckets.get() < UNIQUE_FIELD_VALS);
     }

     { // trivial single level facet w/ 2 diff ways to request "limit = (effectively) Infinite"
       // to sanity check refinement of buckets missing from other shard in both cases

       // NOTE that these two queries & facets *should* effectively identical given that the
       // very large limit value is big enough no shard will ever return that may terms,
       // but the "limit=-1" case it actaully triggers slightly different code paths
       // because it causes FacetField.returnsPartial() to be "true"
       for (int limit : new int[] { 999999999, -1 }) {
         Map<String,TermFacet> facets = new LinkedHashMap<>();
         facets.put("top_facet_limit__" + limit, new TermFacet(multiStrField(9), limit, 0, "skg desc"));
         final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS);
         assertFacetSKGsAreCorrect(maxBuckets, facets, multiStrField(7)+":11", multiStrField(5)+":9", "*:*");
         assertTrue("Didn't check a single bucket???", maxBuckets.get() < UNIQUE_FIELD_VALS);
       }
     }
     { // allBuckets should have no impact...
       for (Boolean allBuckets : Arrays.asList( null, false, true )) {
         Map<String,TermFacet> facets = new LinkedHashMap<>();
         facets.put("allb__" + allBuckets, new TermFacet(multiStrField(9),
                                                         map("allBuckets", allBuckets,
                                                             "sort", "skg desc")));
         final AtomicInteger maxBuckets = new AtomicInteger(UNIQUE_FIELD_VALS);
         assertFacetSKGsAreCorrect(maxBuckets, facets, multiStrField(7)+":11", multiStrField(5)+":9", "*:*");
         assertTrue("Didn't check a single bucket???", maxBuckets.get() < UNIQUE_FIELD_VALS);
       }
     }
   }

   @LuceneTestCase.Nightly
   public void testRandom() throws Exception {

     // since the "cost" of verifying the stats for each bucket is so high (see TODO in verifySKGResults())
     // we put a safety valve in place on the maximum number of buckets that we are willing to verify
     // across *all* the queries that we do.
     // that way if the randomized queries we build all have relatively small facets, so be it, but if
     // we get a really big one early on, we can test as much as possible, skip other iterations.
     //
     // (deeply nested facets may contain more buckets then the max, but we won't *check* all of them)
     final int maxBucketsAllowed = LuceneTestCase.atLeast(TEST_NIGHTLY ? 2000 : 200);
     final AtomicInteger maxBucketsToCheck = new AtomicInteger(maxBucketsAllowed);

     final int numIters = LuceneTestCase.atLeast(TEST_NIGHTLY ? 9 : 4) + 1;
     for (int iter = 0; iter < numIters && 0 < maxBucketsToCheck.get(); iter++) {
       assertFacetSKGsAreCorrect(maxBucketsToCheck, TermFacet.buildRandomFacets(),
                                 buildRandomQuery(), buildRandomQuery(), buildRandomQuery());
     }
     assertTrue("Didn't check a single bucket???", maxBucketsToCheck.get() < maxBucketsAllowed);


   }

   /**
    * Generates a random query string across the randomized fields/values in the index
    *
    * @see #randFieldValue
    * @see #field
    */
   private static String buildRandomQuery() {
     if (0 == TestUtil.nextInt(random(), 0,10)) {
       return "*:*";
     }
     final int numClauses = TestUtil.nextInt(random(), 3, 10);
     final String[] clauses = new String[numClauses];
     for (int c = 0; c < numClauses; c++) {
       final int fieldNum = random().nextInt(MAX_FIELD_NUM);
       // keep queries simple, just use str fields - not point of test
       clauses[c] = multiStrField(fieldNum) + ":" + randFieldValue(fieldNum);
     }
     return buildORQuery(clauses);
   }

   private static String buildORQuery(String... clauses) {
     assert 0 < clauses.length;
     return "(" + String.join(" OR ", clauses) + ")";
   }

   /**
    * Given a set of term facets, and top level query strings, asserts that
    * the SKG stats for each facet term returned when executing that query with those foreground/background
    * queries match the expected results of executing the equivalent queries in isolation.
    *
    * @see #verifySKGResults
    */
   private void assertFacetSKGsAreCorrect(final AtomicInteger maxBucketsToCheck,
                                          Map<String,TermFacet> expected,
                                          final String query,
                                          final String foreQ,
                                          final String backQ) throws SolrServerException, IOException {
     final SolrParams baseParams = params("rows","0", "fore", foreQ, "back", backQ);

     final SolrParams facetParams = params("q", query,
                                           "json.facet", ""+TermFacet.toJSONFacetParamValue(expected));
     final SolrParams initParams = SolrParams.wrapAppended(facetParams, baseParams);

     log.info("Doing full run: {}", initParams);

     QueryResponse rsp = null;
     // JSON Facets not (currently) available from QueryResponse...
     NamedList topNamedList = null;
     try {
       rsp = (new QueryRequest(initParams)).process(getRandClient(random()));
       assertNotNull(initParams + " is null rsp?", rsp);
       topNamedList = rsp.getResponse();
       assertNotNull(initParams + " is null topNamedList?", topNamedList);
     } catch (Exception e) {
       throw new RuntimeException("init query failed: " + initParams + ": " +
                                  e.getMessage(), e);
     }
     try {
       final NamedList facetResponse = (NamedList) topNamedList.get("facets");
       assertNotNull("null facet results?", facetResponse);
       assertEquals("numFound mismatch with top count?",
                    rsp.getResults().getNumFound(), ((Number)facetResponse.get("count")).longValue());

       // Note: even if the query has numFound=0, our explicit background query domain should
       // still force facet results
       // (even if the background query matches nothing, that just means there will be no
       // buckets in those facets)
       assertFacetSKGsAreCorrect(maxBucketsToCheck, expected, baseParams, facetResponse);

     } catch (AssertionError e) {
       throw new AssertionError(initParams + " ===> " + topNamedList + " --> " + e.getMessage(), e);
     } finally {
       log.info("Ending full run");
     }
   }

   /**
    * Recursive helper method that walks the actual facet response, comparing the SKG results to
    * the expected output based on the equivalent filters generated from the original TermFacet.
    */
   private void assertFacetSKGsAreCorrect(final AtomicInteger maxBucketsToCheck,
                                          final Map<String,TermFacet> expected,
                                          final SolrParams baseParams,
                                          final NamedList actualFacetResponse) throws SolrServerException, IOException {

     for (Map.Entry<String,TermFacet> entry : expected.entrySet()) {
       final String facetKey = entry.getKey();
       final TermFacet facet = entry.getValue();

       final NamedList results = (NamedList) actualFacetResponse.get(facetKey);
       assertNotNull(facetKey + " key missing from: " + actualFacetResponse, results);

       if (null != results.get("allBuckets")) {
         // if the response includes an allBuckets bucket, then there must not be an skg value

         // 'skg' key must not exist in th allBuckets bucket
         assertEquals(facetKey + " has skg in allBuckets: " + results.get("allBuckets"),
                      Collections.emptyList(),
                      ((NamedList)results.get("allBuckets")).getAll("skg"));
       }
       final List<NamedList> buckets = (List<NamedList>) results.get("buckets");
       assertNotNull(facetKey + " has null buckets: " + actualFacetResponse, buckets);

       if (buckets.isEmpty()) {
         // should only happen if the background query does not match any docs with field X
         final long docsWithField = getNumFound(params("_trace", "noBuckets",
                                                       "rows", "0",
                                                       "q", facet.field+":[* TO *]",
                                                       "fq", baseParams.get("back")));

         assertEquals(facetKey + " has no buckets, but docs in background exist with field: " + facet.field,
                      0, docsWithField);
       }

       // NOTE: it's important that we do this depth first -- not just because it's the easiest way to do it,
       // but because it means that our maxBucketsToCheck will ensure we do a lot of deep sub-bucket checking,
       // not just all the buckets of the top level(s) facet(s)
       for (NamedList bucket : buckets) {
         final String fieldVal = bucket.get("val").toString(); // int or stringified int

         verifySKGResults(facetKey, facet, baseParams, fieldVal, bucket);
         if (maxBucketsToCheck.decrementAndGet() <= 0) {
           return;
         }

         final SolrParams verifyParams = SolrParams.wrapAppended(baseParams,
                                                                 params("fq", facet.field + ":" + fieldVal));

         // recursively check subFacets
         if (! facet.subFacets.isEmpty()) {
           assertFacetSKGsAreCorrect(maxBucketsToCheck, facet.subFacets, verifyParams, bucket);
         }
       }
     }

     { // make sure we don't have any facet keys we don't expect
       // a little hackish because subfacets have extra keys...
       final LinkedHashSet expectedKeys = new LinkedHashSet(expected.keySet());
       expectedKeys.add("count");
       if (0 <= actualFacetResponse.indexOf("val",0)) {
         expectedKeys.add("val");
         expectedKeys.add("skg");
       }
       assertEquals("Unexpected keys in facet response",
                    expectedKeys, actualFacetResponse.asShallowMap().keySet());
     }
   }

   /**
    * Verifies that the popularity &amp; relatedness values containined in a single SKG bucket
    * match the expected values based on the facet field &amp; bucket value, as well the existing
    * filterParams.
    *
    * @see #assertFacetSKGsAreCorrect
    */
   private void verifySKGResults(String facetKey, TermFacet facet, SolrParams filterParams,
                                 String fieldVal, NamedList<Object> bucket)
     throws SolrServerException, IOException {

     final String bucketQ = facet.field+":"+fieldVal;
     final NamedList<Object> skgBucket = (NamedList<Object>) bucket.get("skg");
     assertNotNull(facetKey + "/bucket:" + bucket.toString(), skgBucket);

     // TODO: make this more efficient?
     // ideally we'd do a single query w/4 facet.queries, one for each count
     // but formatting the queries is a pain, currently we leverage the accumulated fq's
     final long fgSize = getNumFound(SolrParams.wrapAppended(params("_trace", "fgSize",
                                                                    "rows","0",
                                                                    "q","{!query v=$fore}"),
                                                             filterParams));
     final long bgSize = getNumFound(params("_trace", "bgSize",
                                            "rows","0",
                                            "q", filterParams.get("back")));

     final long fgCount = getNumFound(SolrParams.wrapAppended(params("_trace", "fgCount",
                                                                    "rows","0",
                                                                     "q","{!query v=$fore}",
                                                                     "fq", bucketQ),
                                                              filterParams));
     final long bgCount = getNumFound(params("_trace", "bgCount",
                                             "rows","0",
                                             "q", bucketQ,
                                             "fq", filterParams.get("back")));

     assertEquals(facetKey + "/bucket:" + bucket + " => fgPop should be: " + fgCount + " / " + bgSize,
                  roundTo5Digits((double) fgCount / bgSize),
                  skgBucket.get("foreground_popularity"));
     assertEquals(facetKey + "/bucket:" + bucket + " => bgPop should be: " + bgCount + " / " + bgSize,
                  roundTo5Digits((double) bgCount / bgSize),
                  skgBucket.get("background_popularity"));
     assertEquals(facetKey + "/bucket:" + bucket + " => relatedness is wrong",
                  roundTo5Digits(computeRelatedness(fgCount, fgSize, bgCount, bgSize)),
                  skgBucket.get("relatedness"));

   }

   /**
    * Trivial data structure for modeling a simple terms facet that can be written out as a json.facet param.
    *
    * Doesn't do any string escaping or quoting, so don't use whitespace or reserved json characters
    */
   private static final class TermFacet implements Writable {

     /** non-skg subfacets for use in verification */
     public final Map<String,TermFacet> subFacets = new LinkedHashMap<>();

     private final Map<String,Object> jsonData = new LinkedHashMap<>();

     public final String field;
     /**
      * @param field must be non null
      * @param options can set any of options used in a term facet other then field or (sub) facets
      */
     public TermFacet(final String field, final Map<String,Object> options) {
       assert null != field;
       this.field = field;

       jsonData.putAll(options);

       // we don't allow these to be overridden by options, so set them now...
       jsonData.put("type", "terms");
       jsonData.put("field", field);
       // see class javadocs for why we always use refine:true & the query:'*:*' domain for this test.
       jsonData.put("refine", true);
       jsonData.put("domain", map("query","*:*"));

     }

     /** all params except field can be null */
     public TermFacet(String field, Integer limit, Integer overrequest, String sort) {
       this(field, map("limit", limit, "overrequest", overrequest, "sort", sort));
     }

     /** Simplified constructor asks for limit = # unique vals */
     public TermFacet(String field) {
       this(field, UNIQUE_FIELD_VALS, 0, "skg desc");

     }
     @Override
     public void write(JSONWriter writer) {
       // we need to include both our "real" subfacets, along with our SKG stat and 'processEmpty'
       // (we don't put these in 'subFacets' to help keep the verification code simpler
       final Map<String,Object> sub = map("processEmpty", true,
                                          "skg", "relatedness($fore,$back)");
       sub.putAll(subFacets);

       final Map<String,Object> out = map("facet", sub);
       out.putAll(jsonData);

       writer.write(out);
     }

     /**
      * Given a set of (possibly nested) facets, generates a suitable <code>json.facet</code> param value to
      * use for testing them against in a solr request.
      */
     public static String toJSONFacetParamValue(final Map<String,TermFacet> facets) {
       assert null != facets;
       assert ! facets.isEmpty();

       // see class javadocs for why we always want processEmpty
       final Map<String,Object> jsonData = map("processEmpty", true);
       jsonData.putAll(facets);

       return JSONUtil.toJSON(jsonData, -1); // no newlines
     }

     /**
      * Factory method for generating some random facets.
      *
      * For simplicity, each facet will have a unique key name.
      */
     public static Map<String,TermFacet> buildRandomFacets() {
       // for simplicity, use a unique facet key regardless of depth - simplifies verification
       // and le's us enforce a hard limit on the total number of facets in a request
       AtomicInteger keyCounter = new AtomicInteger(0);

       final int maxDepth = TestUtil.nextInt(random(), 0, (LuceneTestCase.usually() ? 2 : 3));
       return buildRandomFacets(keyCounter, maxDepth);
     }

     /**
      * picks a random field to facet on.
      *
      * @see #field
      * @return field name, never null
      */
     public static String randomFacetField(final Random r) {
       final int fieldNum = r.nextInt(MAX_FIELD_NUM);
       switch(r.nextInt(4)) {
         case 0: return multiStrField(fieldNum);
         case 1: return multiIntField(fieldNum);
         case 2: return soloStrField(fieldNum);
         case 3: return soloIntField(fieldNum);
         default: throw new RuntimeException("Broken case statement");
       }
     }

     /**
      * picks a random value for the "perSeg" param, biased in favor of interesting test cases
      *
      * @return a Boolean, may be null
      */
     public static Boolean randomPerSegParam(final Random r) {

       switch(r.nextInt(4)) {
         case 0: return true;
         case 1: return false;
         case 2:
         case 3: return null;
         default: throw new RuntimeException("Broken case statement");
       }
     }

     /**
      * picks a random value for the "prefix" param, biased in favor of interesting test cases
      *
      * @return a valid prefix value, may be null
      */
     public static String randomPrefixParam(final Random r, final String facetField) {

       if (facetField.contains("multi_i") || facetField.contains("solo_i")) {
         // never used a prefix on a numeric field
         return null;
       }
       assert (facetField.contains("multi_s") || facetField.contains("solo_s"))
         : "possible facet fields have changed, breaking test";

       switch(r.nextInt(5)) {
         case 0: return "2";
         case 1: return "3";
         case 2:
         case 3:
         case 4: return null;
         default: throw new RuntimeException("Broken case statement");
       }
     }

     /**
      * picks a random value for the "prelim_sort" param, biased in favor of interesting test cases.
      *
      * @return a sort string (w/direction), or null to specify nothing (trigger default behavior)
      * @see #randomSortParam
      */
     public static String randomPrelimSortParam(final Random r, final String sort) {

       if (null != sort && sort.startsWith("skg") && 1 == TestUtil.nextInt(random(), 0, 3)) {
         return "count desc";
       }
       return null;
     }
     /**
      * picks a random value for the "sort" param, biased in favor of interesting test cases
      *
      * @return a sort string (w/direction), or null to specify nothing (trigger default behavior)
      * @see #randomLimitParam
      * @see #randomAllBucketsParam
      * @see #randomPrelimSortParam
      */
     public static String randomSortParam(Random r) {

       // IMPORTANT!!!
       // if this method is modified to produce new sorts, make sure to update
       // randomLimitParam to account for them if they are impacted by SOLR-12556
       final String dir = random().nextBoolean() ? "asc" : "desc";
       switch(r.nextInt(4)) {
         case 0: return null;
         case 1: return "count " + dir;
         case 2: return "skg " + dir;
         case 3: return "index " + dir;
         default: throw new RuntimeException("Broken case statement");
       }
     }
     /**
      * picks a random value for the "limit" param, biased in favor of interesting test cases
      *
      * <p>
      * <b>NOTE:</b> Due to SOLR-12556, we have to force an overrequest of "all" possible terms for
      * some sort values.
      * </p>
      *
      * @return a number to specify in the request, or null to specify nothing (trigger default behavior)
      * @see #UNIQUE_FIELD_VALS
      * @see #randomSortParam
      */
     public static Integer randomLimitParam(Random r, final String sort) {
       if (null != sort) {
         if (sort.equals("count asc") || sort.startsWith("skg")) {
           // of the known types of sorts produced, these are at risk of SOLR-12556
           // so request (effectively) unlimited num buckets
           return r.nextBoolean() ? UNIQUE_FIELD_VALS : -1;
         }
       }
       final int limit = 1 + r.nextInt((int) (UNIQUE_FIELD_VALS * 1.5F));
       if (limit >= UNIQUE_FIELD_VALS && r.nextBoolean()) {
         return -1; // unlimited
       } else if (limit == DEFAULT_LIMIT && r.nextBoolean()) {
         return null; // sometimes, don't specify limit if it's the default
       }
       return limit;
     }

     /**
      * picks a random value for the "overrequest" param, biased in favor of interesting test cases.
      *
      * @return a number to specify in the request, or null to specify nothing (trigger default behavior)
      * @see #UNIQUE_FIELD_VALS
      */
     public static Integer randomOverrequestParam(Random r) {
       switch(r.nextInt(10)) {
         case 0:
         case 1:
         case 2:
         case 3:
           return 0; // 40% of the time, disable overrequest to better stress refinement
         case 4:
         case 5:
           return r.nextInt(UNIQUE_FIELD_VALS); // 20% ask for less them what's needed
         case 6:
           return r.nextInt(Integer.MAX_VALUE); // 10%: completley random value, statisticaly more then enough
         default: break;
       }
       // else.... either leave param unspecified (or redundently specify the -1 default)
       return r.nextBoolean() ? null : -1;
     }

     /**
      * picks a random value for the "allBuckets" param, biased in favor of interesting test cases.
      * This bucket should be ignored by relatedness, but inclusion should not cause any problems
      * (or change the results)
      *
      * <p>
      * <b>NOTE:</b> allBuckets is meaningless in conjunction with the <code>STREAM</code> processor, so
      * this method always returns null if sort is <code>index asc</code>.
      * </p>
      *
      *
      * @return a Boolean, may be null
      * @see <a href="https://issues.apache.org/jira/browse/SOLR-14514">SOLR-14514: allBuckets ignored by method:stream</a>
      */
     public static Boolean randomAllBucketsParam(final Random r, final String sort) {

       if ("index asc".equals(sort)) {
         return null;
       }

       switch(r.nextInt(4)) {
         case 0: return true;
         case 1: return false;
         case 2:
         case 3: return null;
         default: throw new RuntimeException("Broken case statement");
       }
     }

     /**
      * recursive helper method for building random facets
      *
      * @param keyCounter used to ensure every generated facet has a unique key name
      * @param maxDepth max possible depth allowed for the recusion, a lower value may be used depending on how many facets are returned at the current level.
      */
     private static Map<String,TermFacet> buildRandomFacets(AtomicInteger keyCounter, int maxDepth) {
       final int numFacets = Math.max(1, TestUtil.nextInt(random(), -1, 3)); // 3/5th chance of being '1'
       Map<String,TermFacet> results = new LinkedHashMap<>();
       for (int i = 0; i < numFacets; i++) {
         if (keyCounter.get() < 3) { // a hard limit on the total number of facets (regardless of depth) to reduce OOM risk

           final String sort = randomSortParam(random());
           final String facetField = randomFacetField(random());
           final TermFacet facet =  new TermFacet(facetField,
                                                  map("sort", sort,
                                                      "prelim_sort", randomPrelimSortParam(random(), sort),
                                                      "limit", randomLimitParam(random(), sort),
                                                      "overrequest", randomOverrequestParam(random()),
                                                      "prefix", randomPrefixParam(random(), facetField),
                                                      "allBuckets", randomAllBucketsParam(random(), sort),
                                                      "perSeg", randomPerSegParam(random())));


           results.put("facet_" + keyCounter.incrementAndGet(), facet);
           if (0 < maxDepth) {
             // if we're going wide, don't go deep
             final int nextMaxDepth = Math.max(0, maxDepth - numFacets);
             facet.subFacets.putAll(buildRandomFacets(keyCounter, TestUtil.nextInt(random(), 0, nextMaxDepth)));
           }
         }
       }
       return results;
     }
   }

   /**
    * returns a random SolrClient -- either a CloudSolrClient, or an HttpSolrClient pointed
    * at a node in our cluster
    */
   public static SolrClient getRandClient(Random rand) {
     int numClients = CLIENTS.size();
     int idx = TestUtil.nextInt(rand, 0, numClients);

     return (idx == numClients) ? CLOUD_CLIENT : CLIENTS.get(idx);
   }

   /**
    * Uses a random SolrClient to execture a request and returns only the numFound
    * @see #getRandClient
    */
   public static long getNumFound(final SolrParams req) throws SolrServerException, IOException {
     return getRandClient(random()).query(req).getResults().getNumFound();
   }

   /** helper macro: fails on null keys, skips pairs with null values  */
   public static Map<String,Object> map(Object... pairs) {
     if (0 != pairs.length % 2) throw new IllegalArgumentException("uneven number of arguments");
     final Map<String,Object> map = new LinkedHashMap<>();
     for (int i = 0; i < pairs.length; i+=2) {
       final Object key = pairs[i];
       final Object val = pairs[i+1];
       if (null == key) throw new NullPointerException("arguemnt " + i);
       if (null == val) continue;

       map.put(key.toString(), val);
     }
     return map;
   }

 }