solr/core/src/test/org/apache/solr/search/facet/DistributedFacetSimpleRefinementLongTailTest.java - solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.solr.search.facet;

 import java.util.Arrays;
 import java.util.List;
 import org.apache.solr.BaseDistributedSearchTestCase;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
 import org.junit.Test;

 /**
  * A test that demonstrates some expected behavior for "long tail" terms when using <code>
  * refine:simple</code>
  *
  * <p><b>NOTE:</b> This test ignores the control collection (in single node mode, there is no need
  * for the overrequesting, all the data is local -- so comparisons with it wouldn't be valid in the
  * cases we are testing here)
  *
  * <p><b>NOTE:</b> This test is heavily inspired by (and uses the same indexed documents) as {@link
  * org.apache.solr.handler.component.DistributedFacetPivotLongTailTest} -- however the behavior of
  * <code>refine:simple</code> is "simpler" then the refinement logic used by <code>facet.pivot
  * </code> so the assertions in this test vary from that test.
  */
 public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistributedSearchTestCase {

   private static List<String> ALL_STATS =
       Arrays.asList(
           "min",
           "max",
           "sum",
           "stddev",
           "avg",
           "sumsq",
           "unique",
           "missing",
           "countvals",
           "percentile",
           "variance",
           "hll");

   private final String STAT_FIELD;
   private String ALL_STATS_JSON = "";

   public DistributedFacetSimpleRefinementLongTailTest() {
     // we need DVs on point fields to compute stats & facets
     if (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP))
       System.setProperty(NUMERIC_DOCVALUES_SYSPROP, "true");

     STAT_FIELD = random().nextBoolean() ? "stat_is" : "stat_i";

     for (String stat : ALL_STATS) {
       String val = stat.equals("percentile") ? STAT_FIELD + ",90" : STAT_FIELD;
       ALL_STATS_JSON += stat + ":'" + stat + "(" + val + ")',";
     }
   }

   @Test
   @ShardsFixed(num = 3)
   public void test() throws Exception {
     buildIndexes(clients, STAT_FIELD);
     commit();

     sanityCheckIndividualShards();
     checkRefinementAndOverrequesting();
     checkSubFacetStats();
   }

   public static void buildIndexes(final List<SolrClient> clients, final String statField)
       throws Exception {

     assertEquals("This indexing code assumes exactly 3 shards/clients", 3, clients.size());

     int docNum = 0;
     final SolrClient shard0 = clients.get(0);
     final SolrClient shard1 = clients.get(1);
     final SolrClient shard2 = clients.get(2);

     // the 5 top foo_s terms have 100 docs each on every shard
     for (int i = 0; i < 100; i++) {
       for (int j = 0; j < 5; j++) {
         shard0.add(sdoc("id", docNum++, "foo_s", "aaa" + j, statField, j * 13 - i));
         shard1.add(sdoc("id", docNum++, "foo_s", "aaa" + j, statField, j * 3 + i));
         shard2.add(sdoc("id", docNum++, "foo_s", "aaa" + j, statField, i * 7 + j));
       }
     }

     // 20 foo_s terms that come in "second" with 50 docs each
     // on both shard0 & shard1 ("bbb_")
     for (int i = 0; i < 50; i++) {
       for (int j = 0; j < 20; j++) {
         shard0.add(sdoc("id", docNum++, "foo_s", "bbb" + j, statField, 0));
         shard1.add(sdoc("id", docNum++, "foo_s", "bbb" + j, statField, 1));
       }
       // distracting term appears on only on shard2 50 times
       shard2.add(sdoc("id", docNum++, "foo_s", "junkA"));
     }
     // put "bbb0" on shard2 exactly once to sanity check refinement
     shard2.add(sdoc("id", docNum++, "foo_s", "bbb0", statField, -2));

     // long 'tail' foo_s term appears in 45 docs on every shard
     // foo_s:tail is the only term with bar_s sub-pivot terms
     for (int i = 0; i < 45; i++) {

       // for sub-pivot, shard0 & shard1 have 6 docs each for "tailB"
       // but the top 5 terms are ccc(0-4) -- 7 on each shard
       // (4 docs each have junk terms)
       String sub_term = (i < 35) ? "ccc" + (i % 5) : ((i < 41) ? "tailB" : "junkA");
       shard0.add(sdoc("id", docNum++, "foo_s", "tail", "bar_s", sub_term, statField, i));
       shard1.add(sdoc("id", docNum++, "foo_s", "tail", "bar_s", sub_term, statField, i));

       // shard2's top 5 sub-pivot terms are junk only it has with 8 docs each
       // and 5 docs that use "tailB"
       // NOTE: none of these get statField ! !
       sub_term = (i < 40) ? "junkB" + (i % 5) : "tailB";
       shard2.add(sdoc("id", docNum++, "foo_s", "tail", "bar_s", sub_term));
     }

     // really long tail uncommon foo_s terms on shard2
     for (int i = 0; i < 30; i++) {
       // NOTE: using "Z" here so these sort before bbb0 when they tie for '1' instance each on
       // shard2
       shard2.add(sdoc("id", docNum++, "foo_s", "ZZZ" + i));
     }
   }

   @SuppressWarnings({"unchecked", "rawtypes"})
   private void sanityCheckIndividualShards() throws Exception {
     // sanity check that our expectations about each shard (non-distrib) are correct

     SolrParams req =
         params(
             "q",
             "*:*",
             "distrib",
             "false",
             "json.facet",
             " { foo:{ type:terms, limit:10, field:foo_s, facet:{ bar:{ type:terms, limit:10, field:bar_s }}}}");

     List<NamedList>[] shardFooBuckets = new List[clients.size()];
     for (int i = 0; i < clients.size(); i++) {
       shardFooBuckets[i] =
           (List<NamedList>)
               ((NamedList<NamedList>) clients.get(i).query(req).getResponse().get("facets"))
                   .get("foo")
                   .get("buckets");
     }

     // top 5 same on all shards
     for (int i = 0; i < 3; i++) {
       assertEquals(10, shardFooBuckets[i].size());
       for (int j = 0; j < 5; j++) {
         NamedList bucket = shardFooBuckets[i].get(j);
         assertEquals(bucket.toString(), "aaa" + j, bucket.get("val"));
         assertEquals(bucket.toString(), 100L, bucket.get("count"));
       }
     }
     // top 6-10 same on shard0 & shard1
     for (int i = 0; i < 2; i++) {
       for (int j = 5; j < 10; j++) {
         NamedList bucket = shardFooBuckets[i].get(j);
         assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("bbb"));
         assertEquals(bucket.toString(), 50L, bucket.get("count"));
       }
     }

     // 6-10 on shard2
     assertEquals("junkA", shardFooBuckets[2].get(5).get("val"));
     assertEquals(50L, shardFooBuckets[2].get(5).get("count"));
     assertEquals("tail", shardFooBuckets[2].get(6).get("val"));
     assertEquals(45L, shardFooBuckets[2].get(6).get("count"));
     for (int j = 7; j < 10; j++) {
       NamedList bucket = shardFooBuckets[2].get(j);
       assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("ZZZ"));
       assertEquals(bucket.toString(), 1L, bucket.get("count"));
     }

     // check 'bar' sub buckets on "tail" from shard2
     {
       List<NamedList> bar_buckets =
           (List<NamedList>)
               ((NamedList<NamedList>) shardFooBuckets[2].get(6).get("bar")).get("buckets");
       assertEquals(6, bar_buckets.size());
       for (int j = 0; j < 5; j++) {
         NamedList bucket = bar_buckets.get(j);
         assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("junkB"));
         assertEquals(bucket.toString(), 8L, bucket.get("count"));
       }
       NamedList bucket = bar_buckets.get(5);
       assertEquals("tailB", bucket.get("val"));
       assertEquals(5L, bucket.get("count"));
     }
   }

   @SuppressWarnings({"unchecked", "rawtypes"})
   private void checkRefinementAndOverrequesting() throws Exception {
     // // distributed queries // //

     { // w/o refinement, the default overrequest isn't enough to find the long 'tail' *OR* the
       // correct count for 'bbb0'...
       List<NamedList> foo_buckets =
           (List<NamedList>)
               ((NamedList<NamedList>)
                       queryServer(
                               params(
                                   "q",
                                   "*:*",
                                   "shards",
                                   getShardsString(),
                                   "json.facet",
                                   "{ foo: { type:terms, refine:none, limit:6, field:foo_s } }"))
                           .getResponse()
                           .get("facets"))
                   .get("foo")
                   .get("buckets");
       assertEquals(6, foo_buckets.size());
       for (int i = 0; i < 5; i++) {
         NamedList bucket = foo_buckets.get(i);
         assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("aaa"));
         assertEquals(bucket.toString(), 300L, bucket.get("count"));
       }

       // this will be short the "+1" fo the doc added to shard2...
       NamedList bucket = foo_buckets.get(5);
       assertEquals(bucket.toString(), "bbb0", bucket.get("val")); // 'tail' is missed
       assertEquals(
           bucket.toString(),
           100L,
           bucket.get("count")); // will not include the "+1" for the doc added to shard2
     }

     // even if we enable refinement, we still won't find the long 'tail' ...
     // regardless of whether we use either the default overrequest, or disable overrequesting...
     for (String over : Arrays.asList("", "overrequest:0,")) {
       List<NamedList> foo_buckets =
           (List<NamedList>)
               ((NamedList<NamedList>)
                       queryServer(
                               params(
                                   "q",
                                   "*:*",
                                   "shards",
                                   getShardsString(),
                                   "json.facet",
                                   "{ foo: { type:terms, refine:simple, limit:6, "
                                       + over
                                       + " field:foo_s, facet:{ "
                                       + ALL_STATS_JSON
                                       + "  bar: { type:terms, refine:simple, limit:6, "
                                       + over
                                       + " field:bar_s, facet:{"
                                       + ALL_STATS_JSON
                                       + "}}}}}"))
                           .getResponse()
                           .get("facets"))
                   .get("foo")
                   .get("buckets");
       assertEquals(6, foo_buckets.size());
       for (int i = 0; i < 5; i++) {
         NamedList bucket = foo_buckets.get(i);
         assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("aaa"));
         assertEquals(bucket.toString(), 300L, bucket.get("count"));
       }
       // ...but it should have correctly asked shard2 to refine bbb0
       NamedList bucket = foo_buckets.get(5);
       assertEquals(bucket.toString(), "bbb0", bucket.get("val"));
       assertEquals(bucket.toString(), 101L, bucket.get("count"));
       // ...and the status under bbb0 should be correct to include the refinement
       assertEquals(ALL_STATS.size() + 3, bucket.size()); // val,count,facet
       assertEquals(-2L, bucket.get("min")); // this min only exists on shard2
       assertEquals(1L, bucket.get("max"));
       assertEquals(101L, bucket.get("countvals"));
       assertEquals(0L, bucket.get("missing"));
       assertEquals(48.0D, bucket.get("sum"));
       assertEquals(1.0D, bucket.get("percentile"));
       assertEquals(0.475247524752475D, (double) bucket.get("avg"), 0.1E-7);
       assertEquals(54.0D, (double) bucket.get("sumsq"), 0.1E-7);
       assertEquals(0.55846323792D, (double) bucket.get("stddev"), 0.1E-7);
       assertEquals(0.3118811881D, (double) bucket.get("variance"), 0.1E-7);
       assertEquals(3L, bucket.get("unique"));
       assertEquals(3L, bucket.get("hll"));
     }

     // with a limit==6, we have to "overrequest >= 20" in order to ensure that 'tail' is included in
     // the top 6 this is because of how the "simple" refinement process works: the "top buckets" are
     // determined based on the info available in the first pass request.
     //
     // Even though 'tail' is returned in the top6 for shard2, the cumulative total for 'bbb0' from
     // shard0 and shard1 is high enough that the simple facet refinement ignores 'tail' because it
     // assumes 'bbb0's final total will be greater.
     //
     // Meanwhile, for the sub-facet on 'bar', a limit==6 means we should correctly find 'tailB' as
     // the top sub-term of 'tail', regardless of how much overrequest is used (or even if we don't
     // have any refinement) since it's always in the top6...
     for (String bar_opts :
         Arrays.asList(
             "refine:none,",
             "refine:simple,",
             "refine:none,   overrequest:0,",
             "refine:simple, overrequest:0,")) {

       List<NamedList> buckets =
           (List<NamedList>)
               ((NamedList<NamedList>)
                       queryServer(
                               params(
                                   "q",
                                   "*:*",
                                   "shards",
                                   getShardsString(),
                                   "json.facet",
                                   "{ foo: { type:terms, limit:6, overrequest:20, refine:simple, field:foo_s, facet:{ "
                                       + "  bar: { type:terms, limit:6, "
                                       + bar_opts
                                       + " field:bar_s }}}}"))
                           .getResponse()
                           .get("facets"))
                   .get("foo")
                   .get("buckets");

       assertEquals(6, buckets.size());
       for (int i = 0; i < 5; i++) {
         NamedList bucket = buckets.get(i);
         assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("aaa"));
         assertEquals(bucket.toString(), 300L, bucket.get("count"));
       }

       NamedList bucket = buckets.get(5);
       assertEquals(bucket.toString(), "tail", bucket.get("val"));
       assertEquals(bucket.toString(), 135L, bucket.get("count"));
       // check the sub buckets
       buckets = ((NamedList<NamedList<List<NamedList>>>) bucket).get("bar").get("buckets");
       assertEquals(6, buckets.size());
       bucket = buckets.get(0);
       assertEquals(bucket.toString(), "tailB", bucket.get("val"));
       assertEquals(bucket.toString(), 17L, bucket.get("count"));
       for (int i = 1; i < 6; i++) { // ccc(0-4)
         bucket = buckets.get(i);
         assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("ccc"));
         assertEquals(bucket.toString(), 14L, bucket.get("count"));
       }
     }

     // if we lower the limit on the sub-bucket to '5', overrequesting of at least 1 should still
     // ensure that we get the correct top5 including "tailB" -- even w/o refinement
     for (String bar_opts :
         Arrays.asList(
             "refine:none,",
             "refine:simple,",
             "refine:none,   overrequest:1,",
             "refine:simple, overrequest:1,")) {

       List<NamedList> buckets =
           (List<NamedList>)
               ((NamedList<NamedList>)
                       queryServer(
                               params(
                                   "q",
                                   "*:*",
                                   "shards",
                                   getShardsString(),
                                   "json.facet",
                                   "{ foo: { type:terms, limit:6, overrequest:20, refine:simple, field:foo_s, facet:{ "
                                       + "  bar: { type:terms, limit:5, "
                                       + bar_opts
                                       + " field:bar_s }}}}"))
                           .getResponse()
                           .get("facets"))
                   .get("foo")
                   .get("buckets");

       assertEquals(6, buckets.size());
       for (int i = 0; i < 5; i++) {
         NamedList bucket = buckets.get(i);
         assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("aaa"));
         assertEquals(bucket.toString(), 300L, bucket.get("count"));
       }
       NamedList bucket = buckets.get(5);
       assertEquals(bucket.toString(), "tail", bucket.get("val"));
       assertEquals(bucket.toString(), 135L, bucket.get("count"));
       // check the sub buckets
       buckets = ((NamedList<NamedList<List<NamedList>>>) bucket).get("bar").get("buckets");
       assertEquals(5, buckets.size());
       bucket = buckets.get(0);
       assertEquals(bucket.toString(), "tailB", bucket.get("val"));
       assertEquals(bucket.toString(), 17L, bucket.get("count"));
       for (int i = 1; i < 5; i++) { // ccc(0-3)
         bucket = buckets.get(i);
         assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("ccc"));
         assertEquals(bucket.toString(), 14L, bucket.get("count"));
       }
     }

     // however: with a lower sub-facet limit==5, and overrequesting disabled,
     // we're going to miss out on tailB even if we have refinement
     for (String bar_opts :
         Arrays.asList("refine:none,   overrequest:0,", "refine:simple, overrequest:0,")) {

       List<NamedList> buckets =
           (List<NamedList>)
               ((NamedList<NamedList>)
                       queryServer(
                               params(
                                   "q",
                                   "*:*",
                                   "shards",
                                   getShardsString(),
                                   "json.facet",
                                   "{ foo: { type:terms, limit:6, overrequest:20, refine:simple, field:foo_s, facet:{ "
                                       + "  bar: { type:terms, limit:5, "
                                       + bar_opts
                                       + " field:bar_s }}}}"))
                           .getResponse()
                           .get("facets"))
                   .get("foo")
                   .get("buckets");

       assertEquals(6, buckets.size());
       for (int i = 0; i < 5; i++) {
         NamedList bucket = buckets.get(i);
         assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("aaa"));
         assertEquals(bucket.toString(), 300L, bucket.get("count"));
       }
       NamedList bucket = buckets.get(5);
       assertEquals(bucket.toString(), "tail", bucket.get("val"));
       assertEquals(bucket.toString(), 135L, bucket.get("count"));
       // check the sub buckets
       buckets = ((NamedList<NamedList<List<NamedList>>>) bucket).get("bar").get("buckets");
       assertEquals(5, buckets.size());
       for (int i = 0; i < 5; i++) { // ccc(0-4)
         bucket = buckets.get(i);
         assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("ccc"));
         assertEquals(bucket.toString(), 14L, bucket.get("count"));
       }
     }
   }

   private void checkSubFacetStats() throws Exception {
     // Deep checking of some Facet stats

     // the assertions only care about the first 5 results of each facet, but to get the long tail
     // more are needed from the sub-shards.  results should be the same regardless of: "high limit"
     // vs "low limit + high overrequest"
     checkSubFacetStats("refine:simple, limit: 100,");
     checkSubFacetStats("refine:simple, overrequest: 100,");

     // and the results shouldn't change if we explicitly disable refinement
     checkSubFacetStats("refine:none, limit: 100,");
     checkSubFacetStats("refine:none, overrequest: 100,");
   }

   private void checkSubFacetStats(String extraJson) throws Exception {
     String commonJson = "type: terms, " + extraJson;
     @SuppressWarnings({"unchecked", "rawtypes"})
     NamedList<NamedList> all_facets =
         (NamedList)
             queryServer(
                     params(
                         "q",
                         "*:*",
                         "shards",
                         getShardsString(),
                         "rows",
                         "0",
                         "json.facet",
                         "{ foo : { "
                             + commonJson
                             + " field: foo_s, facet: { "
                             + ALL_STATS_JSON
                             + " bar: { "
                             + commonJson
                             + " field: bar_s, facet: { "
                             + ALL_STATS_JSON
                             +
                             // under bar, in addition to "ALL" simple stats, we also ask for skg...
                             ", skg : 'relatedness($skg_fore,$skg_back)' } } } } }",
                         "skg_fore",
                         STAT_FIELD + ":[0 TO 40]",
                         "skg_back",
                         STAT_FIELD + ":[-10000 TO 10000]"))
                 .getResponse()
                 .get("facets");

     assertNotNull(all_facets);

     @SuppressWarnings({"unchecked", "rawtypes"})
     List<NamedList> foo_buckets = (List) (all_facets.get("foo")).get("buckets");

     @SuppressWarnings({"rawtypes"})
     NamedList aaa0_Bucket = foo_buckets.get(0);
     assertEquals(ALL_STATS.size() + 3, aaa0_Bucket.size()); // val,count,facet
     assertEquals("aaa0", aaa0_Bucket.get("val"));
     assertEquals(300L, aaa0_Bucket.get("count"));
     assertEquals(-99L, aaa0_Bucket.get("min"));
     assertEquals(693L, aaa0_Bucket.get("max"));
     assertEquals(300L, aaa0_Bucket.get("countvals"));
     assertEquals(0L, aaa0_Bucket.get("missing"));
     assertEquals(34650.0D, aaa0_Bucket.get("sum"));
     assertEquals(486.5D, (double) aaa0_Bucket.get("percentile"), 0.1E-7);
     assertEquals(115.5D, (double) aaa0_Bucket.get("avg"), 0.1E-7);
     assertEquals(1.674585E7D, (double) aaa0_Bucket.get("sumsq"), 0.1E-7);
     assertEquals(206.4493184076D, (double) aaa0_Bucket.get("stddev"), 0.1E-7);
     assertEquals(42621.32107023412D, (double) aaa0_Bucket.get("variance"), 0.1E-7);
     assertEquals(284L, aaa0_Bucket.get("unique"));
     assertEquals(284L, aaa0_Bucket.get("hll"));

     @SuppressWarnings({"rawtypes"})
     NamedList tail_Bucket = foo_buckets.get(5);
     assertEquals(ALL_STATS.size() + 3, tail_Bucket.size()); // val,count,facet
     assertEquals("tail", tail_Bucket.get("val"));
     assertEquals(135L, tail_Bucket.get("count"));
     assertEquals(0L, tail_Bucket.get("min"));
     assertEquals(44L, tail_Bucket.get("max"));
     assertEquals(90L, tail_Bucket.get("countvals"));
     assertEquals(40.0D, tail_Bucket.get("percentile"));
     assertEquals(45L, tail_Bucket.get("missing"));
     assertEquals(1980.0D, tail_Bucket.get("sum"));
     assertEquals(22.0D, (double) tail_Bucket.get("avg"), 0.1E-7);
     assertEquals(58740.0D, (double) tail_Bucket.get("sumsq"), 0.1E-7);
     assertEquals(13.0599310011D, (double) tail_Bucket.get("stddev"), 0.1E-7);
     assertEquals(170.5617977535D, (double) tail_Bucket.get("variance"), 0.1E-7);
     assertEquals(45L, tail_Bucket.get("unique"));
     assertEquals(45L, tail_Bucket.get("hll"));

     @SuppressWarnings({"unchecked", "rawtypes"})
     List<NamedList> tail_bar_buckets = (List) ((NamedList) tail_Bucket.get("bar")).get("buckets");

     @SuppressWarnings({"rawtypes"})
     NamedList tailB_Bucket = tail_bar_buckets.get(0);
     assertEquals(ALL_STATS.size() + 3, tailB_Bucket.size()); // val,count,skg ... NO SUB FACETS
     assertEquals("tailB", tailB_Bucket.get("val"));
     assertEquals(17L, tailB_Bucket.get("count"));
     assertEquals(35L, tailB_Bucket.get("min"));
     assertEquals(40L, tailB_Bucket.get("max"));
     assertEquals(12L, tailB_Bucket.get("countvals"));
     assertEquals(40.0D, tailB_Bucket.get("percentile"));
     assertEquals(5L, tailB_Bucket.get("missing"));
     assertEquals(450.0D, tailB_Bucket.get("sum"));
     assertEquals(37.5D, (double) tailB_Bucket.get("avg"), 0.1E-7);
     assertEquals(16910.0D, (double) tailB_Bucket.get("sumsq"), 0.1E-7);
     assertEquals(1.78376517D, (double) tailB_Bucket.get("stddev"), 0.1E-7);
     assertEquals(3.1818181817D, (double) tailB_Bucket.get("variance"), 0.1E-7);
     assertEquals(6L, tailB_Bucket.get("unique"));
     assertEquals(6L, tailB_Bucket.get("hll"));

     // check the SKG stats on our tailB bucket
     @SuppressWarnings({"rawtypes"})
     NamedList tailB_skg = (NamedList) tailB_Bucket.get("skg");
     assertEquals(tailB_skg.toString(), 3, tailB_skg.size());
     assertEquals(0.19990D, tailB_skg.get("relatedness"));
     assertEquals(0.00334D, tailB_skg.get("foreground_popularity"));
     assertEquals(0.00334D, tailB_skg.get("background_popularity"));
     // assertEquals(12L,       tailB_skg.get("foreground_count"));
     // assertEquals(82L,       tailB_skg.get("foreground_size"));
     // assertEquals(12L,       tailB_skg.get("background_count"));
     // assertEquals(3591L,     tailB_skg.get("background_size"));
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.solr.search.facet;

	import java.util.Arrays;
	import java.util.List;
	import org.apache.solr.BaseDistributedSearchTestCase;
	import org.apache.solr.client.solrj.SolrClient;
	import org.apache.solr.common.params.SolrParams;
	import org.apache.solr.common.util.NamedList;
	import org.junit.Test;

	/**
	* A test that demonstrates some expected behavior for "long tail" terms when using <code>
	* refine:simple</code>
	*
	* <p><b>NOTE:</b> This test ignores the control collection (in single node mode, there is no need
	* for the overrequesting, all the data is local -- so comparisons with it wouldn't be valid in the
	* cases we are testing here)
	*
	* <p><b>NOTE:</b> This test is heavily inspired by (and uses the same indexed documents) as {@link
	* org.apache.solr.handler.component.DistributedFacetPivotLongTailTest} -- however the behavior of
	* <code>refine:simple</code> is "simpler" then the refinement logic used by <code>facet.pivot
	* </code> so the assertions in this test vary from that test.
	*/
	public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistributedSearchTestCase {

	private static List<String> ALL_STATS =
	Arrays.asList(
	"min",
	"max",
	"sum",
	"stddev",
	"avg",
	"sumsq",
	"unique",
	"missing",
	"countvals",
	"percentile",
	"variance",
	"hll");

	private final String STAT_FIELD;
	private String ALL_STATS_JSON = "";

	public DistributedFacetSimpleRefinementLongTailTest() {
	// we need DVs on point fields to compute stats & facets
	if (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP))
	System.setProperty(NUMERIC_DOCVALUES_SYSPROP, "true");

	STAT_FIELD = random().nextBoolean() ? "stat_is" : "stat_i";

	for (String stat : ALL_STATS) {
	String val = stat.equals("percentile") ? STAT_FIELD + ",90" : STAT_FIELD;
	ALL_STATS_JSON += stat + ":'" + stat + "(" + val + ")',";
	}
	}

	@Test
	@ShardsFixed(num = 3)
	public void test() throws Exception {
	buildIndexes(clients, STAT_FIELD);
	commit();

	sanityCheckIndividualShards();
	checkRefinementAndOverrequesting();
	checkSubFacetStats();
	}

	public static void buildIndexes(final List<SolrClient> clients, final String statField)
	throws Exception {

	assertEquals("This indexing code assumes exactly 3 shards/clients", 3, clients.size());

	int docNum = 0;
	final SolrClient shard0 = clients.get(0);
	final SolrClient shard1 = clients.get(1);
	final SolrClient shard2 = clients.get(2);

	// the 5 top foo_s terms have 100 docs each on every shard
	for (int i = 0; i < 100; i++) {
	for (int j = 0; j < 5; j++) {
	shard0.add(sdoc("id", docNum++, "foo_s", "aaa" + j, statField, j * 13 - i));
	shard1.add(sdoc("id", docNum++, "foo_s", "aaa" + j, statField, j * 3 + i));
	shard2.add(sdoc("id", docNum++, "foo_s", "aaa" + j, statField, i * 7 + j));
	}
	}

	// 20 foo_s terms that come in "second" with 50 docs each
	// on both shard0 & shard1 ("bbb_")
	for (int i = 0; i < 50; i++) {
	for (int j = 0; j < 20; j++) {
	shard0.add(sdoc("id", docNum++, "foo_s", "bbb" + j, statField, 0));
	shard1.add(sdoc("id", docNum++, "foo_s", "bbb" + j, statField, 1));
	}
	// distracting term appears on only on shard2 50 times
	shard2.add(sdoc("id", docNum++, "foo_s", "junkA"));
	}
	// put "bbb0" on shard2 exactly once to sanity check refinement
	shard2.add(sdoc("id", docNum++, "foo_s", "bbb0", statField, -2));

	// long 'tail' foo_s term appears in 45 docs on every shard
	// foo_s:tail is the only term with bar_s sub-pivot terms
	for (int i = 0; i < 45; i++) {

	// for sub-pivot, shard0 & shard1 have 6 docs each for "tailB"
	// but the top 5 terms are ccc(0-4) -- 7 on each shard
	// (4 docs each have junk terms)
	String sub_term = (i < 35) ? "ccc" + (i % 5) : ((i < 41) ? "tailB" : "junkA");
	shard0.add(sdoc("id", docNum++, "foo_s", "tail", "bar_s", sub_term, statField, i));
	shard1.add(sdoc("id", docNum++, "foo_s", "tail", "bar_s", sub_term, statField, i));

	// shard2's top 5 sub-pivot terms are junk only it has with 8 docs each
	// and 5 docs that use "tailB"
	// NOTE: none of these get statField ! !
	sub_term = (i < 40) ? "junkB" + (i % 5) : "tailB";
	shard2.add(sdoc("id", docNum++, "foo_s", "tail", "bar_s", sub_term));
	}

	// really long tail uncommon foo_s terms on shard2
	for (int i = 0; i < 30; i++) {
	// NOTE: using "Z" here so these sort before bbb0 when they tie for '1' instance each on
	// shard2
	shard2.add(sdoc("id", docNum++, "foo_s", "ZZZ" + i));
	}
	}

	@SuppressWarnings({"unchecked", "rawtypes"})
	private void sanityCheckIndividualShards() throws Exception {
	// sanity check that our expectations about each shard (non-distrib) are correct

	SolrParams req =
	params(
	"q",
	":",
	"distrib",
	"false",
	"json.facet",
	" { foo:{ type:terms, limit:10, field:foo_s, facet:{ bar:{ type:terms, limit:10, field:bar_s }}}}");

	List<NamedList>[] shardFooBuckets = new List[clients.size()];
	for (int i = 0; i < clients.size(); i++) {
	shardFooBuckets[i] =
	(List<NamedList>)
	((NamedList<NamedList>) clients.get(i).query(req).getResponse().get("facets"))
	.get("foo")
	.get("buckets");
	}

	// top 5 same on all shards
	for (int i = 0; i < 3; i++) {
	assertEquals(10, shardFooBuckets[i].size());
	for (int j = 0; j < 5; j++) {
	NamedList bucket = shardFooBuckets[i].get(j);
	assertEquals(bucket.toString(), "aaa" + j, bucket.get("val"));
	assertEquals(bucket.toString(), 100L, bucket.get("count"));
	}
	}
	// top 6-10 same on shard0 & shard1
	for (int i = 0; i < 2; i++) {
	for (int j = 5; j < 10; j++) {
	NamedList bucket = shardFooBuckets[i].get(j);
	assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("bbb"));
	assertEquals(bucket.toString(), 50L, bucket.get("count"));
	}
	}

	// 6-10 on shard2
	assertEquals("junkA", shardFooBuckets[2].get(5).get("val"));
	assertEquals(50L, shardFooBuckets[2].get(5).get("count"));
	assertEquals("tail", shardFooBuckets[2].get(6).get("val"));
	assertEquals(45L, shardFooBuckets[2].get(6).get("count"));
	for (int j = 7; j < 10; j++) {
	NamedList bucket = shardFooBuckets[2].get(j);
	assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("ZZZ"));
	assertEquals(bucket.toString(), 1L, bucket.get("count"));
	}

	// check 'bar' sub buckets on "tail" from shard2
	{
	List<NamedList> bar_buckets =
	(List<NamedList>)
	((NamedList<NamedList>) shardFooBuckets[2].get(6).get("bar")).get("buckets");
	assertEquals(6, bar_buckets.size());
	for (int j = 0; j < 5; j++) {
	NamedList bucket = bar_buckets.get(j);
	assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("junkB"));
	assertEquals(bucket.toString(), 8L, bucket.get("count"));
	}
	NamedList bucket = bar_buckets.get(5);
	assertEquals("tailB", bucket.get("val"));
	assertEquals(5L, bucket.get("count"));
	}
	}

	@SuppressWarnings({"unchecked", "rawtypes"})
	private void checkRefinementAndOverrequesting() throws Exception {
	// // distributed queries // //

	{ // w/o refinement, the default overrequest isn't enough to find the long 'tail' OR the
	// correct count for 'bbb0'...
	List<NamedList> foo_buckets =
	(List<NamedList>)
	((NamedList<NamedList>)
	queryServer(
	params(
	"q",
	":",
	"shards",
	getShardsString(),
	"json.facet",
	"{ foo: { type:terms, refine:none, limit:6, field:foo_s } }"))
	.getResponse()
	.get("facets"))
	.get("foo")
	.get("buckets");
	assertEquals(6, foo_buckets.size());
	for (int i = 0; i < 5; i++) {
	NamedList bucket = foo_buckets.get(i);
	assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("aaa"));
	assertEquals(bucket.toString(), 300L, bucket.get("count"));
	}

	// this will be short the "+1" fo the doc added to shard2...
	NamedList bucket = foo_buckets.get(5);
	assertEquals(bucket.toString(), "bbb0", bucket.get("val")); // 'tail' is missed
	assertEquals(
	bucket.toString(),
	100L,
	bucket.get("count")); // will not include the "+1" for the doc added to shard2
	}

	// even if we enable refinement, we still won't find the long 'tail' ...
	// regardless of whether we use either the default overrequest, or disable overrequesting...
	for (String over : Arrays.asList("", "overrequest:0,")) {
	List<NamedList> foo_buckets =
	(List<NamedList>)
	((NamedList<NamedList>)
	queryServer(
	params(
	"q",
	":",
	"shards",
	getShardsString(),
	"json.facet",
	"{ foo: { type:terms, refine:simple, limit:6, "
	+ over
	+ " field:foo_s, facet:{ "
	+ ALL_STATS_JSON
	+ " bar: { type:terms, refine:simple, limit:6, "
	+ over
	+ " field:bar_s, facet:{"
	+ ALL_STATS_JSON
	+ "}}}}}"))
	.getResponse()
	.get("facets"))
	.get("foo")
	.get("buckets");
	assertEquals(6, foo_buckets.size());
	for (int i = 0; i < 5; i++) {
	NamedList bucket = foo_buckets.get(i);
	assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("aaa"));
	assertEquals(bucket.toString(), 300L, bucket.get("count"));
	}
	// ...but it should have correctly asked shard2 to refine bbb0
	NamedList bucket = foo_buckets.get(5);
	assertEquals(bucket.toString(), "bbb0", bucket.get("val"));
	assertEquals(bucket.toString(), 101L, bucket.get("count"));
	// ...and the status under bbb0 should be correct to include the refinement
	assertEquals(ALL_STATS.size() + 3, bucket.size()); // val,count,facet
	assertEquals(-2L, bucket.get("min")); // this min only exists on shard2
	assertEquals(1L, bucket.get("max"));
	assertEquals(101L, bucket.get("countvals"));
	assertEquals(0L, bucket.get("missing"));
	assertEquals(48.0D, bucket.get("sum"));
	assertEquals(1.0D, bucket.get("percentile"));
	assertEquals(0.475247524752475D, (double) bucket.get("avg"), 0.1E-7);
	assertEquals(54.0D, (double) bucket.get("sumsq"), 0.1E-7);
	assertEquals(0.55846323792D, (double) bucket.get("stddev"), 0.1E-7);
	assertEquals(0.3118811881D, (double) bucket.get("variance"), 0.1E-7);
	assertEquals(3L, bucket.get("unique"));
	assertEquals(3L, bucket.get("hll"));
	}

	// with a limit==6, we have to "overrequest >= 20" in order to ensure that 'tail' is included in
	// the top 6 this is because of how the "simple" refinement process works: the "top buckets" are
	// determined based on the info available in the first pass request.
	//
	// Even though 'tail' is returned in the top6 for shard2, the cumulative total for 'bbb0' from
	// shard0 and shard1 is high enough that the simple facet refinement ignores 'tail' because it
	// assumes 'bbb0's final total will be greater.
	//
	// Meanwhile, for the sub-facet on 'bar', a limit==6 means we should correctly find 'tailB' as
	// the top sub-term of 'tail', regardless of how much overrequest is used (or even if we don't
	// have any refinement) since it's always in the top6...
	for (String bar_opts :
	Arrays.asList(
	"refine:none,",
	"refine:simple,",
	"refine:none, overrequest:0,",
	"refine:simple, overrequest:0,")) {

	List<NamedList> buckets =
	(List<NamedList>)
	((NamedList<NamedList>)
	queryServer(
	params(
	"q",
	":",
	"shards",
	getShardsString(),
	"json.facet",
	"{ foo: { type:terms, limit:6, overrequest:20, refine:simple, field:foo_s, facet:{ "
	+ " bar: { type:terms, limit:6, "
	+ bar_opts
	+ " field:bar_s }}}}"))
	.getResponse()
	.get("facets"))
	.get("foo")
	.get("buckets");

	assertEquals(6, buckets.size());
	for (int i = 0; i < 5; i++) {
	NamedList bucket = buckets.get(i);
	assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("aaa"));
	assertEquals(bucket.toString(), 300L, bucket.get("count"));
	}

	NamedList bucket = buckets.get(5);
	assertEquals(bucket.toString(), "tail", bucket.get("val"));
	assertEquals(bucket.toString(), 135L, bucket.get("count"));
	// check the sub buckets
	buckets = ((NamedList<NamedList<List<NamedList>>>) bucket).get("bar").get("buckets");
	assertEquals(6, buckets.size());
	bucket = buckets.get(0);
	assertEquals(bucket.toString(), "tailB", bucket.get("val"));
	assertEquals(bucket.toString(), 17L, bucket.get("count"));
	for (int i = 1; i < 6; i++) { // ccc(0-4)
	bucket = buckets.get(i);
	assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("ccc"));
	assertEquals(bucket.toString(), 14L, bucket.get("count"));
	}
	}

	// if we lower the limit on the sub-bucket to '5', overrequesting of at least 1 should still
	// ensure that we get the correct top5 including "tailB" -- even w/o refinement
	for (String bar_opts :
	Arrays.asList(
	"refine:none,",
	"refine:simple,",
	"refine:none, overrequest:1,",
	"refine:simple, overrequest:1,")) {

	List<NamedList> buckets =
	(List<NamedList>)
	((NamedList<NamedList>)
	queryServer(
	params(
	"q",
	":",
	"shards",
	getShardsString(),
	"json.facet",
	"{ foo: { type:terms, limit:6, overrequest:20, refine:simple, field:foo_s, facet:{ "
	+ " bar: { type:terms, limit:5, "
	+ bar_opts
	+ " field:bar_s }}}}"))
	.getResponse()
	.get("facets"))
	.get("foo")
	.get("buckets");

	assertEquals(6, buckets.size());
	for (int i = 0; i < 5; i++) {
	NamedList bucket = buckets.get(i);
	assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("aaa"));
	assertEquals(bucket.toString(), 300L, bucket.get("count"));
	}
	NamedList bucket = buckets.get(5);
	assertEquals(bucket.toString(), "tail", bucket.get("val"));
	assertEquals(bucket.toString(), 135L, bucket.get("count"));
	// check the sub buckets
	buckets = ((NamedList<NamedList<List<NamedList>>>) bucket).get("bar").get("buckets");
	assertEquals(5, buckets.size());
	bucket = buckets.get(0);
	assertEquals(bucket.toString(), "tailB", bucket.get("val"));
	assertEquals(bucket.toString(), 17L, bucket.get("count"));
	for (int i = 1; i < 5; i++) { // ccc(0-3)
	bucket = buckets.get(i);
	assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("ccc"));
	assertEquals(bucket.toString(), 14L, bucket.get("count"));
	}
	}

	// however: with a lower sub-facet limit==5, and overrequesting disabled,
	// we're going to miss out on tailB even if we have refinement
	for (String bar_opts :
	Arrays.asList("refine:none, overrequest:0,", "refine:simple, overrequest:0,")) {

	List<NamedList> buckets =
	(List<NamedList>)
	((NamedList<NamedList>)
	queryServer(
	params(
	"q",
	":",
	"shards",
	getShardsString(),
	"json.facet",
	"{ foo: { type:terms, limit:6, overrequest:20, refine:simple, field:foo_s, facet:{ "
	+ " bar: { type:terms, limit:5, "
	+ bar_opts
	+ " field:bar_s }}}}"))
	.getResponse()
	.get("facets"))
	.get("foo")
	.get("buckets");

	assertEquals(6, buckets.size());
	for (int i = 0; i < 5; i++) {
	NamedList bucket = buckets.get(i);
	assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("aaa"));
	assertEquals(bucket.toString(), 300L, bucket.get("count"));
	}
	NamedList bucket = buckets.get(5);
	assertEquals(bucket.toString(), "tail", bucket.get("val"));
	assertEquals(bucket.toString(), 135L, bucket.get("count"));
	// check the sub buckets
	buckets = ((NamedList<NamedList<List<NamedList>>>) bucket).get("bar").get("buckets");
	assertEquals(5, buckets.size());
	for (int i = 0; i < 5; i++) { // ccc(0-4)
	bucket = buckets.get(i);
	assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("ccc"));
	assertEquals(bucket.toString(), 14L, bucket.get("count"));
	}
	}
	}

	private void checkSubFacetStats() throws Exception {
	// Deep checking of some Facet stats

	// the assertions only care about the first 5 results of each facet, but to get the long tail
	// more are needed from the sub-shards. results should be the same regardless of: "high limit"
	// vs "low limit + high overrequest"
	checkSubFacetStats("refine:simple, limit: 100,");
	checkSubFacetStats("refine:simple, overrequest: 100,");

	// and the results shouldn't change if we explicitly disable refinement
	checkSubFacetStats("refine:none, limit: 100,");
	checkSubFacetStats("refine:none, overrequest: 100,");
	}

	private void checkSubFacetStats(String extraJson) throws Exception {
	String commonJson = "type: terms, " + extraJson;
	@SuppressWarnings({"unchecked", "rawtypes"})
	NamedList<NamedList> all_facets =
	(NamedList)
	queryServer(
	params(
	"q",
	":",
	"shards",
	getShardsString(),
	"rows",
	"0",
	"json.facet",
	"{ foo : { "
	+ commonJson
	+ " field: foo_s, facet: { "
	+ ALL_STATS_JSON
	+ " bar: { "
	+ commonJson
	+ " field: bar_s, facet: { "
	+ ALL_STATS_JSON
	+
	// under bar, in addition to "ALL" simple stats, we also ask for skg...
	", skg : 'relatedness($skg_fore,$skg_back)' } } } } }",
	"skg_fore",
	STAT_FIELD + ":[0 TO 40]",
	"skg_back",
	STAT_FIELD + ":[-10000 TO 10000]"))
	.getResponse()
	.get("facets");

	assertNotNull(all_facets);

	@SuppressWarnings({"unchecked", "rawtypes"})
	List<NamedList> foo_buckets = (List) (all_facets.get("foo")).get("buckets");

	@SuppressWarnings({"rawtypes"})
	NamedList aaa0_Bucket = foo_buckets.get(0);
	assertEquals(ALL_STATS.size() + 3, aaa0_Bucket.size()); // val,count,facet
	assertEquals("aaa0", aaa0_Bucket.get("val"));
	assertEquals(300L, aaa0_Bucket.get("count"));
	assertEquals(-99L, aaa0_Bucket.get("min"));
	assertEquals(693L, aaa0_Bucket.get("max"));
	assertEquals(300L, aaa0_Bucket.get("countvals"));
	assertEquals(0L, aaa0_Bucket.get("missing"));
	assertEquals(34650.0D, aaa0_Bucket.get("sum"));
	assertEquals(486.5D, (double) aaa0_Bucket.get("percentile"), 0.1E-7);
	assertEquals(115.5D, (double) aaa0_Bucket.get("avg"), 0.1E-7);
	assertEquals(1.674585E7D, (double) aaa0_Bucket.get("sumsq"), 0.1E-7);
	assertEquals(206.4493184076D, (double) aaa0_Bucket.get("stddev"), 0.1E-7);
	assertEquals(42621.32107023412D, (double) aaa0_Bucket.get("variance"), 0.1E-7);
	assertEquals(284L, aaa0_Bucket.get("unique"));
	assertEquals(284L, aaa0_Bucket.get("hll"));

	@SuppressWarnings({"rawtypes"})
	NamedList tail_Bucket = foo_buckets.get(5);
	assertEquals(ALL_STATS.size() + 3, tail_Bucket.size()); // val,count,facet
	assertEquals("tail", tail_Bucket.get("val"));
	assertEquals(135L, tail_Bucket.get("count"));
	assertEquals(0L, tail_Bucket.get("min"));
	assertEquals(44L, tail_Bucket.get("max"));
	assertEquals(90L, tail_Bucket.get("countvals"));
	assertEquals(40.0D, tail_Bucket.get("percentile"));
	assertEquals(45L, tail_Bucket.get("missing"));
	assertEquals(1980.0D, tail_Bucket.get("sum"));
	assertEquals(22.0D, (double) tail_Bucket.get("avg"), 0.1E-7);
	assertEquals(58740.0D, (double) tail_Bucket.get("sumsq"), 0.1E-7);
	assertEquals(13.0599310011D, (double) tail_Bucket.get("stddev"), 0.1E-7);
	assertEquals(170.5617977535D, (double) tail_Bucket.get("variance"), 0.1E-7);
	assertEquals(45L, tail_Bucket.get("unique"));
	assertEquals(45L, tail_Bucket.get("hll"));

	@SuppressWarnings({"unchecked", "rawtypes"})
	List<NamedList> tail_bar_buckets = (List) ((NamedList) tail_Bucket.get("bar")).get("buckets");

	@SuppressWarnings({"rawtypes"})
	NamedList tailB_Bucket = tail_bar_buckets.get(0);
	assertEquals(ALL_STATS.size() + 3, tailB_Bucket.size()); // val,count,skg ... NO SUB FACETS
	assertEquals("tailB", tailB_Bucket.get("val"));
	assertEquals(17L, tailB_Bucket.get("count"));
	assertEquals(35L, tailB_Bucket.get("min"));
	assertEquals(40L, tailB_Bucket.get("max"));
	assertEquals(12L, tailB_Bucket.get("countvals"));
	assertEquals(40.0D, tailB_Bucket.get("percentile"));
	assertEquals(5L, tailB_Bucket.get("missing"));
	assertEquals(450.0D, tailB_Bucket.get("sum"));
	assertEquals(37.5D, (double) tailB_Bucket.get("avg"), 0.1E-7);
	assertEquals(16910.0D, (double) tailB_Bucket.get("sumsq"), 0.1E-7);
	assertEquals(1.78376517D, (double) tailB_Bucket.get("stddev"), 0.1E-7);
	assertEquals(3.1818181817D, (double) tailB_Bucket.get("variance"), 0.1E-7);
	assertEquals(6L, tailB_Bucket.get("unique"));
	assertEquals(6L, tailB_Bucket.get("hll"));

	// check the SKG stats on our tailB bucket
	@SuppressWarnings({"rawtypes"})
	NamedList tailB_skg = (NamedList) tailB_Bucket.get("skg");
	assertEquals(tailB_skg.toString(), 3, tailB_skg.size());
	assertEquals(0.19990D, tailB_skg.get("relatedness"));
	assertEquals(0.00334D, tailB_skg.get("foreground_popularity"));
	assertEquals(0.00334D, tailB_skg.get("background_popularity"));
	// assertEquals(12L, tailB_skg.get("foreground_count"));
	// assertEquals(82L, tailB_skg.get("foreground_size"));
	// assertEquals(12L, tailB_skg.get("background_count"));
	// assertEquals(3591L, tailB_skg.get("background_size"));
	}
	}