solr/core/src/test/org/apache/solr/response/TestRetrieveFieldsOptimizer.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.solr.response;

 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.util.AbstractMap.SimpleEntry;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;

 import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.util.JavaBinCodec;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.schema.BoolField;
 import org.apache.solr.schema.DatePointField;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.schema.TrieDateField;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.search.SolrReturnFields;
 import org.apache.solr.util.RefCounted;
 import org.junit.BeforeClass;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.RuleChain;
 import org.junit.rules.TestRule;

 import static junit.framework.Assert.fail;
 import static org.apache.lucene.util.LuceneTestCase.random;
 import static org.apache.solr.search.SolrReturnFields.FIELD_SOURCES.ALL_FROM_STORED;
 import static org.apache.solr.search.SolrReturnFields.FIELD_SOURCES.MIXED_SOURCES;
 import static org.apache.solr.search.SolrReturnFields.FIELD_SOURCES.ALL_FROM_DV;

 public class TestRetrieveFieldsOptimizer extends SolrTestCaseJ4 {

   @Rule
   public TestRule solrTestRules = RuleChain.outerRule(new SystemPropertiesRestoreRule());

   @BeforeClass
   public static void initManagedSchemaCore() throws Exception {
     // This testing approach means no schema file or per-test temp solr-home!
     System.setProperty("managed.schema.mutable", "true");
     System.setProperty("managed.schema.resourceName", "schema-one-field-no-dynamic-field-unique-key.xml");
     System.setProperty("enable.update.log", "false");

     initCore("solrconfig-managed-schema.xml", "ignoredSchemaName");

     IndexSchema schema = h.getCore().getLatestSchema();
     setupAllFields();

     h.getCore().setLatestSchema(schema);
   }

   static String storedNotDvSv = "_s_ndv_sv";
   static String storedAndDvSv = "_s_dv_sv";
   static String notStoredDvSv = "_ns_dv_sv";

   static String storedNotDvMv = "_s_ndv_mv";
   static String storedAndDvMv = "_s_dv_mv";
   static String notStoredDvMv = "_ns_dv_mv";

   // Each doc needs a field I can use to identify it for value comparison
   static String idStoredNotDv = "id_s_ndv_sv";
   static String idNotStoredDv = "id_ns_dv_sv";

   static FieldTypeHolder typesHolder = new FieldTypeHolder();

   static FieldHolder fieldsHolder = new FieldHolder();
   static Map<String, Map<String, List<String>>> allFieldValuesInput = new HashMap<>();

   //TODO, how to generalize?

   @SuppressWarnings({"unchecked"})
   private static void setupAllFields() throws IOException {

     IndexSchema schema = h.getCore().getLatestSchema();

     // Add all the types before the fields.
     Map<String, Map<String, String>> fieldsToAdd = new HashMap<>();

     // We need our special id fields to find the docs later.
     typesHolder.addFieldType(schema, idNotStoredDv, RetrieveFieldType.TEST_TYPE.STRING);
     fieldsToAdd.put(idNotStoredDv, map("stored", "false", "docValues", "true", "multiValued", "false"));

     typesHolder.addFieldType(schema, idStoredNotDv, RetrieveFieldType.TEST_TYPE.STRING);
     fieldsToAdd.put(idStoredNotDv, map("stored", "true", "docValues", "false", "multiValued", "false"));

     for (RetrieveFieldType.TEST_TYPE type : RetrieveFieldType.solrClassMap.keySet()) {
       // We happen to be naming the fields and types identically.
       String myName = type.toString() + storedNotDvSv;
       typesHolder.addFieldType(schema, myName, type);
       fieldsToAdd.put(myName, map("stored", "true", "docValues", "false", "multiValued", "false"));

       myName = type.toString() + storedAndDvSv;
       typesHolder.addFieldType(schema, myName, type);
       fieldsToAdd.put(myName, map("stored", "true", "docValues", "true", "multiValued", "false"));

       myName = type.toString() + notStoredDvSv;
       typesHolder.addFieldType(schema, myName, type);
       fieldsToAdd.put(myName, map("stored", "false", "docValues", "true", "multiValued", "false"));

       myName = type.toString() + storedNotDvMv;
       typesHolder.addFieldType(schema, myName, type);
       fieldsToAdd.put(myName, map("stored", "true", "docValues", "false", "multiValued", "true"));

       myName = type.toString() + storedAndDvMv;
       typesHolder.addFieldType(schema, myName, type);
       fieldsToAdd.put(myName, map("stored", "true", "docValues", "true", "multiValued", "true"));

       myName = type.toString() + notStoredDvMv;
       typesHolder.addFieldType(schema, myName, type);
       fieldsToAdd.put(myName, map("stored", "false", "docValues", "true", "multiValued", "true"));
     }

     schema = typesHolder.addFieldTypes(schema);

     for (Map.Entry<String, Map<String, String>> ent : fieldsToAdd.entrySet()) {
       fieldsHolder.addField(schema, ent.getKey(), ent.getKey(), ent.getValue());
     }
     schema = fieldsHolder.addFields(schema);

     h.getCore().setLatestSchema(schema);

     // All that setup work and we're only going to add a very few docs!
     for (int idx = 0; idx < 10; ++idx) {
       addDocWithAllFields(idx);
     }
     assertU(commit());
     // Now we need to massage the expected values returned based on the docValues type 'cause it's weird.
     final RefCounted<SolrIndexSearcher> refCounted = h.getCore().getNewestSearcher(true);
     try {
       //static Map<String, Map<String, List<String>>>
       for (Map<String, List<String>> docFieldsEnt : allFieldValuesInput.values()) {
         for (Map.Entry<String, List<String>> oneField : docFieldsEnt.entrySet()) {
           RetrieveField field = fieldsHolder.getTestField(oneField.getKey());
           field.expectedValsAsStrings(refCounted.get().getSlowAtomicReader().getFieldInfos().fieldInfo(field.name),
               oneField.getValue());
         }
       }
     } finally {
       refCounted.decref();
     }
    }

   static void addDocWithAllFields(int idx) {

     // for each doc, add a doc with all the fields with values and store the expected return.
     Map<String, List<String>> fieldsExpectedVals = new HashMap<>();

     SolrInputDocument sdoc = new SolrInputDocument();
     String id = "str" + idx;
     sdoc.addField("str", id);
     sdoc.addField(idNotStoredDv, id);
     fieldsExpectedVals.put(idNotStoredDv, Collections.singletonList(id));
     sdoc.addField(idStoredNotDv, id);
     fieldsExpectedVals.put(idStoredNotDv, Collections.singletonList(id));

     for (RetrieveField field : fieldsHolder.fields.values()) {
       if (field.name.equals(idNotStoredDv) || field.name.equals(idStoredNotDv)) {
         continue;
       }
       List<String> valsAsStrings = field.getValsForField();
       for (String val : valsAsStrings) {
         sdoc.addField(field.schemaField.getName(), val);
       }
       fieldsExpectedVals.put(field.name, valsAsStrings);
     }

     allFieldValuesInput.put(id, fieldsExpectedVals);
     assertU(adoc(sdoc));
   }

   @Test
   public void testDocFetcher() throws Exception {

     int numThreads = random().nextInt(3) + 2;

     Thread threads[] = new Thread[numThreads];
     for (int idx = 0; idx < numThreads; idx++) {
       threads[idx] = new Thread() {
         @Override
         public void run() {
           try {
             checkFetchSources(ALL_FROM_DV);
             checkFetchSources(ALL_FROM_STORED);
             checkFetchSources(MIXED_SOURCES);
           } catch (Exception e) {
             fail("Failed with exception " + e.getMessage());
           }
         }
       };
       threads[idx].start();
     }
     for (int idx = 0; idx < numThreads; idx++) {
       threads[idx].join();
     }
   }

   @SuppressWarnings({"unchecked", "rawtypes"})
   private void checkFetchSources(SolrReturnFields.FIELD_SOURCES source) throws Exception {
     String flAll = fieldsHolder.allFields.stream()
         .map(RetrieveField::getName) // This will call testField.getName()
         .collect(Collectors.joining(","));

     List<RetrieveField> toCheck = new ArrayList<>();
     String idField = idNotStoredDv + ",";
     switch (source) {
       case ALL_FROM_DV:
         toCheck = new ArrayList(fieldsHolder.dvNotStoredFields);
         break;
       case ALL_FROM_STORED:
         idField = idStoredNotDv + ",";
         toCheck = new ArrayList(fieldsHolder.storedNotDvFields);
         break;
       case MIXED_SOURCES:
         toCheck = new ArrayList(fieldsHolder.allFields);
         break;
       default:
         fail("Value passed to checkFetchSources unknown: " + source.toString());
     }

     // MultiValued fields are _always_ read from stored data.
     toCheck.removeAll(fieldsHolder.multiValuedFields);

     // At this point, toCheck should be only singleValued fields. Adding in even a single multiValued field should
     // read stuff from stored.
     String fl = idField + toCheck.stream()
         .map(RetrieveField::getName) // This will call testField.getName()
         .collect(Collectors.joining(","));

     // Even a single multiValued and stored field should cause stored fields to be visited.

     List<Integer> shuffled = Arrays.asList(0, 1, 2);
     Collections.shuffle(shuffled, random());
     for (int which : shuffled) {
       switch (which) {
         case 0:
           check(fl, source);
           break;

         case 1:
           check(flAll, MIXED_SOURCES);
           break;

         case 2:
           List<RetrieveField> toCheckPlusMv = new ArrayList<>(toCheck);
           toCheckPlusMv.add(fieldsHolder.storedMvFields.get(random().nextInt(fieldsHolder.storedMvFields.size())));

           String flWithMv = idField + toCheckPlusMv.stream()
               .map(RetrieveField::getName) // This will call testField.getName()
               .collect(Collectors.joining(","));
           if (source == ALL_FROM_STORED) {
             check(flWithMv, ALL_FROM_STORED);
           } else {
             check(flWithMv, MIXED_SOURCES);
           }
           break;
         default:
           fail("Your shuffling should be between 0 and 2, inclusive. It was: " + which);
       }
     }
   }

   // This checks a couple of things:
   // 1> we got all the values from the place we expected.
   // 2> all the values we expect are actually returned.
   //
   // NOTE: multiValued fields are _NOT_ fetched from docValues by design so we don't have to worry about set semantics
   //
   private void check(String flIn, SolrReturnFields.FIELD_SOURCES source) throws Exception {
     Set<String> setDedupe = new HashSet<>(Arrays.asList(flIn.split(",")));
     String fl = String.join(",", setDedupe);

     SolrCore core = h.getCore();

     SolrQueryRequest req = lrf.makeRequest("q", "*:*", CommonParams.FL, fl);
     SolrQueryResponse rsp = h.queryAndResponse("", req);

     BinaryQueryResponseWriter writer = (BinaryQueryResponseWriter) core.getQueryResponseWriter("javabin");
     ByteArrayOutputStream baos = new ByteArrayOutputStream();
     writer.write(baos, req, rsp);

     // This is really the main point!
     assertEquals("We didn't get the values from the expected places! ",
         source, ((SolrReturnFields) rsp.returnFields).getFieldSources());

     @SuppressWarnings({"rawtypes"})
     NamedList res;
     try (JavaBinCodec jbc = new JavaBinCodec()) {
       res = (NamedList) jbc.unmarshal(new ByteArrayInputStream(baos.toByteArray()));
     }
     SolrDocumentList docs = (SolrDocumentList) res.get("response");
     for (Object doc : docs) {
       SolrDocument sdoc = (SolrDocument) doc;
       // Check that every (and only) the fields in the fl param were fetched and the values are as expected.
       // Since each doc has the same fields, we don't need to find the special doc.
       String[] requestedFields = fl.split(",");
       assertEquals("Should have exactly as many fields as requested, ", sdoc.getFieldNames().size(), requestedFields.length);

       String id = (String) sdoc.get(idNotStoredDv);
       if (id == null) {
         id = (String) sdoc.get(idStoredNotDv);
       }
       Map<String, List<String>> expected = allFieldValuesInput.get(id);
       for (String field : requestedFields) {
         Object[] docVals = sdoc.getFieldValues(field).toArray();
         RetrieveField testField = fieldsHolder.getTestField(field);
         List<String> expectedVals = expected.get(field);
         assertEquals("Returned fields should have the expected number of entries", docVals.length, expectedVals.size());
         for (int idx = 0; idx < docVals.length; ++idx) {
           assertEquals("Values should be identical and exactly in order. ", expectedVals.get(idx), testField.getValAsString(docVals[idx]));
         }
       }

     }
     req.close();
   }
 }

 class FieldTypeHolder {

   Map<String, RetrieveFieldType> testTypes = new HashMap<>();

   void addFieldType(IndexSchema schema, String name, RetrieveFieldType.TEST_TYPE type) {
     testTypes.put(name, new RetrieveFieldType(schema, name, type));
   }

   IndexSchema addFieldTypes(IndexSchema schema) {
     List<FieldType> typesToAdd = new ArrayList<>();
     for (RetrieveFieldType testType : testTypes.values()) {
       typesToAdd.add(testType.getFieldType());
     }
     return schema.addFieldTypes(typesToAdd, false);
   }

   RetrieveFieldType getTestType(String name) {
     return testTypes.get(name);
   }
 }

 class RetrieveFieldType {
   final String name;
   final FieldType solrFieldType;
   final TEST_TYPE testType;
   final String solrTypeClass;

   static enum TEST_TYPE {
     TINT, TLONG, TFLOAT, TDOUBLE, TDATE,
     PINT, PLONG, PFLOAT, PDOUBLE, PDATE,
     STRING, BOOL
   }

   static final Map<TEST_TYPE, String> solrClassMap = Collections.unmodifiableMap(Stream.of(
       new SimpleEntry<>(TEST_TYPE.TINT, "solr.TrieIntField"),
       new SimpleEntry<>(TEST_TYPE.TLONG, "solr.TrieLongField"),
       new SimpleEntry<>(TEST_TYPE.TFLOAT, "solr.TrieFloatField"),
       new SimpleEntry<>(TEST_TYPE.TDOUBLE, "solr.TrieDoubleField"),
       new SimpleEntry<>(TEST_TYPE.TDATE, "solr.TrieDateField"),
       new SimpleEntry<>(TEST_TYPE.PINT, "solr.IntPointField"),
       new SimpleEntry<>(TEST_TYPE.PLONG, "solr.LongPointField"),
       new SimpleEntry<>(TEST_TYPE.PFLOAT, "solr.FloatPointField"),
       new SimpleEntry<>(TEST_TYPE.PDOUBLE, "solr.DoublePointField"),
       new SimpleEntry<>(TEST_TYPE.PDATE, "solr.DatePointField"),
       new SimpleEntry<>(TEST_TYPE.STRING, "solr.StrField"),
       new SimpleEntry<>(TEST_TYPE.BOOL, "solr.BoolField"))
       .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));

   RetrieveFieldType(IndexSchema schema, String name, TEST_TYPE type) {
     this.name = name;
     Map<String, String> opts = new HashMap<>();
     opts.put("name", name);
     this.solrTypeClass = solrClassMap.get(type);
     opts.put("class", solrTypeClass);
     solrFieldType = schema.newFieldType(name, solrTypeClass, opts);
     this.testType = type;
   }

   FieldType getFieldType() {
     return solrFieldType;
   }

   String getSolrTypeClass() {
     return solrTypeClass;
   }
 }

 class FieldHolder {

   Map<String, RetrieveField> fields = new HashMap<>();

   void addField(IndexSchema schema, String name, String type, Map<String, String> opts) {
     fields.put(name, new RetrieveField(schema, name, type, opts));
   }

   List<RetrieveField> dvNotStoredFields = new ArrayList<>();
   List<RetrieveField> storedNotDvFields = new ArrayList<>();
   List<RetrieveField> multiValuedFields = new ArrayList<>();
   List<RetrieveField> storedAndDvFields = new ArrayList<>();
   List<RetrieveField> storedMvFields = new ArrayList<>();
   List<RetrieveField> allFields = new ArrayList<>();

   IndexSchema addFields(IndexSchema schema) {

     List<SchemaField> fieldsToAdd = new ArrayList<>();

     for (RetrieveField field : fields.values()) {
       allFields.add(field);
       SchemaField schemaField = field.schemaField;
       fieldsToAdd.add(schemaField);
       if (schemaField.multiValued()) {
         multiValuedFields.add(field);
       }
       if (schemaField.hasDocValues() && schemaField.stored() == false) {
         dvNotStoredFields.add(field);
       }
       if (schemaField.hasDocValues() == false && schemaField.stored()) {
         storedNotDvFields.add(field);
       }
       if (schemaField.hasDocValues() && schemaField.stored()) {
         storedAndDvFields.add(field);
       }
       if (schemaField.stored() && schemaField.multiValued()) {
         storedMvFields.add(field);
       }
     }
     return schema.addFields(fieldsToAdd, Collections.emptyMap(), false);
   }

   RetrieveField getTestField(String field) {
     return fields.get(field);
   }
 }

 class RetrieveField {
   final String name;
   final String type;
   final SchemaField schemaField;
   final RetrieveFieldType testFieldType;

   RetrieveField(IndexSchema schema, String name, String type, Map<String, String> opts) {

     Map<String, String> fullOpts = new HashMap<>(opts);
     fullOpts.put("name", name);
     fullOpts.put("type", type);

     this.name = name;
     this.type = type;
     this.schemaField = schema.newField(name, type, opts);
     this.testFieldType = TestRetrieveFieldsOptimizer.typesHolder.getTestType(type);

   }

   String getValAsString(Object val) {

     FieldType fieldType = schemaField.getType();

     //Why do mutliValued date fields get here as Strings whereas single-valued fields are Dates?
     // Why do BoolFields sometimes get here as "F" or "T"?
     if (val instanceof String) {
       if (fieldType instanceof TrieDateField || fieldType instanceof DatePointField) {
         long lVal = Long.parseLong((String) val);
         return (new Date(lVal).toInstant().toString());
       }
       if (fieldType instanceof BoolField) {
         if (val.equals("F")) return "false";
         if (val.equals("T")) return "true";
       }
       return (String) val;
     }
     if (fieldType instanceof TrieDateField || fieldType instanceof DatePointField) {
       return ((Date) val).toInstant().toString();
     }

     return val.toString();
   }

   String getName() {
     return schemaField.getName();
   }

   static String chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890";

   private String randString() {
     StringBuilder sb = new StringBuilder();
     sb.setLength(0);

     for (int idx = 0; idx < 10; ++idx) {
       sb.append(chars.charAt(random().nextInt(chars.length())));
     }
     return sb.toString();
   }

   private String randDate() {
     return new Date(Math.abs(random().nextLong()) % 3_000_000_000_000L).toInstant().toString();
   }

   List<String> getValsForField() {
     List<String> valsAsStrings = new ArrayList<>();
     switch (testFieldType.getSolrTypeClass()) {
       case "solr.TrieIntField":
       case "solr.TrieLongField":
       case "solr.IntPointField":
       case "solr.LongPointField":
         valsAsStrings.add(Integer.toString(random().nextInt(10_000)));
         if (schemaField.multiValued() == false) break;
         for (int idx = 0; idx < random().nextInt(5); ++idx) {
           valsAsStrings.add(Integer.toString(random().nextInt(10_000)));
         }
         break;

       case "solr.TrieFloatField":
       case "solr.TrieDoubleField":
       case "solr.FloatPointField":
       case "solr.DoublePointField":
         valsAsStrings.add(Float.toString(random().nextFloat()));
         if (schemaField.multiValued() == false) break;
         for (int idx = 0; idx < random().nextInt(5); ++idx) {
           valsAsStrings.add(Float.toString(random().nextFloat()));
         }
         break;

       case "solr.TrieDateField":
       case "solr.DatePointField":
         valsAsStrings.add(randDate());
         if (schemaField.multiValued() == false) break;
         for (int idx = 0; idx < random().nextInt(5); ++idx) {
           valsAsStrings.add(randDate());
         }
         break;

       case "solr.StrField":
         valsAsStrings.add(randString());
         if (schemaField.multiValued() == false) break;
         for (int idx = 0; idx < random().nextInt(5); ++idx) {
           valsAsStrings.add(randString());
         }
         break;

       case "solr.BoolField":
         valsAsStrings.add(Boolean.toString(random().nextBoolean()));
         if (schemaField.multiValued() == false) break;
         for (int idx = 0; idx < random().nextInt(5); ++idx) {
           valsAsStrings.add(Boolean.toString(random().nextBoolean()));
         }
         break;

       default:
         fail("Found no case for field " + name + " type " + type);
         break;
     }
     // There are tricky cases with multiValued fields that are sometimes fetched from docValues that obey set
     // semantics so be sure we include at least one duplicate in a multValued field sometimes
     if (random().nextBoolean() && valsAsStrings.size() > 1) {
       valsAsStrings.add(valsAsStrings.get(random().nextInt(valsAsStrings.size())));

     }

     return valsAsStrings;
   }

   void expectedValsAsStrings(final FieldInfo info, List<String> valsAsStrings) {
     if (schemaField.stored() || schemaField.multiValued() == false) {
       return ;
     }

     switch (info.getDocValuesType()) {
       case NONE: // These three types are single values, just return.
       case NUMERIC:
       case BINARY: // here for completeness, really doesn't make sense.
         return;

       case SORTED_NUMERIC: // Can have multiple, identical values. This was a surprise to me.
         break;

       case SORTED_SET: // Obey set semantics.
       case SORTED:
         Set<String> uniq = new TreeSet<>(valsAsStrings);
         valsAsStrings.clear();
         valsAsStrings.addAll(uniq);
         break;
     }

     // Now order them if string-based comparison isn't reasonable
     switch (testFieldType.getSolrTypeClass()) {
       case "solr.TrieIntField":
       case "solr.TrieLongField":

         Collections.sort(valsAsStrings, Comparator.comparingInt(Integer::parseInt));
         break;
       case "solr.IntPointField":
       case "solr.LongPointField":
         Collections.sort(valsAsStrings, Comparator.comparingLong(Long::parseLong));
         break;

       case "solr.TrieFloatField":
       case "solr.FloatPointField":
       case "solr.TrieDoubleField":
       case "solr.DoublePointField":
         Collections.sort(valsAsStrings, Comparator.comparingDouble(Double::parseDouble));
         break;

       case "solr.TrieDateField":
       case "solr.DatePointField":
       case "solr.StrField":
       case "solr.BoolField":
         Collections.sort(valsAsStrings);
         break;

       default:
         fail("Found no case for field " + name + " type " + type);
         break;
     }
   }
 }