core/Lucy/Search/Collector/SortCollector.c - lucy - Git at Google

 /* Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #define C_LUCY_SORTCOLLECTOR
 #define C_LUCY_MATCHDOC
 #include "Lucy/Util/ToolSet.h"

 #include "Lucy/Search/Collector/SortCollector.h"
 #include "Lucy/Index/SegReader.h"
 #include "Lucy/Index/SortCache.h"
 #include "Lucy/Index/SortCache/NumericSortCache.h"
 #include "Lucy/Index/SortCache/TextSortCache.h"
 #include "Lucy/Index/SortReader.h"
 #include "Lucy/Plan/FieldType.h"
 #include "Lucy/Plan/Schema.h"
 #include "Lucy/Search/HitQueue.h"
 #include "Lucy/Search/MatchDoc.h"
 #include "Lucy/Search/Matcher.h"
 #include "Lucy/Search/SortRule.h"
 #include "Lucy/Search/SortSpec.h"

 #define COMPARE_BY_SCORE             0x1
 #define COMPARE_BY_SCORE_REV         0x2
 #define COMPARE_BY_DOC_ID            0x3
 #define COMPARE_BY_DOC_ID_REV        0x4
 #define COMPARE_BY_ORD1              0x5
 #define COMPARE_BY_ORD1_REV          0x6
 #define COMPARE_BY_ORD2              0x7
 #define COMPARE_BY_ORD2_REV          0x8
 #define COMPARE_BY_ORD4              0x9
 #define COMPARE_BY_ORD4_REV          0xA
 #define COMPARE_BY_ORD8              0xB
 #define COMPARE_BY_ORD8_REV          0xC
 #define COMPARE_BY_ORD16             0xD
 #define COMPARE_BY_ORD16_REV         0xE
 #define COMPARE_BY_ORD32             0xF
 #define COMPARE_BY_ORD32_REV         0x10
 #define COMPARE_BY_NATIVE_ORD16      0x11
 #define COMPARE_BY_NATIVE_ORD16_REV  0x12
 #define COMPARE_BY_NATIVE_ORD32      0x13
 #define COMPARE_BY_NATIVE_ORD32_REV  0x14
 #define AUTO_ACCEPT                  0x15
 #define AUTO_REJECT                  0x16
 #define AUTO_TIE                     0x17
 #define ACTIONS_MASK                 0x1F

 // Pick an action based on a SortRule and if needed, a SortCache.
 static int8_t
 S_derive_action(SortRule *rule, SortCache *sort_cache);

 // Decide whether a doc should be inserted into the HitQueue.
 static INLINE bool_t
 SI_competitive(SortCollector *self, int32_t doc_id);

 SortCollector*
 SortColl_new(Schema *schema, SortSpec *sort_spec, uint32_t wanted) {
     SortCollector *self = (SortCollector*)VTable_Make_Obj(SORTCOLLECTOR);
     return SortColl_init(self, schema, sort_spec, wanted);
 }

 // Default to sort-by-score-then-doc-id.
 static VArray*
 S_default_sort_rules() {
     VArray *rules = VA_new(1);
     VA_Push(rules, (Obj*)SortRule_new(SortRule_SCORE, NULL, false));
     VA_Push(rules, (Obj*)SortRule_new(SortRule_DOC_ID, NULL, false));
     return rules;
 }

 SortCollector*
 SortColl_init(SortCollector *self, Schema *schema, SortSpec *sort_spec,
               uint32_t wanted) {
     VArray *rules = sort_spec
                     ? (VArray*)INCREF(SortSpec_Get_Rules(sort_spec))
                     : S_default_sort_rules();
     uint32_t num_rules = VA_Get_Size(rules);
     uint32_t i;

     // Validate.
     if (sort_spec && !schema) {
         THROW(ERR, "Can't supply a SortSpec without a Schema.");
     }
     if (!num_rules) {
         THROW(ERR, "Can't supply a SortSpec with no SortRules.");
     }

     // Init.
     Coll_init((Collector*)self);
     self->total_hits    = 0;
     self->bubble_doc    = I32_MAX;
     self->bubble_score  = F32_NEGINF;
     self->seg_doc_max   = 0;

     // Assign.
     self->wanted        = wanted;

     // Derive.
     self->hit_q         = HitQ_new(schema, sort_spec, wanted);
     self->rules         = rules; // absorb refcount.
     self->num_rules     = num_rules;
     self->sort_caches   = (SortCache**)CALLOCATE(num_rules, sizeof(SortCache*));
     self->ord_arrays    = (void**)CALLOCATE(num_rules, sizeof(void*));
     self->actions       = (uint8_t*)CALLOCATE(num_rules, sizeof(uint8_t));

     // Build up an array of "actions" which we will execute during each call
     // to Collect(). Determine whether we need to track scores and field
     // values.
     self->need_score  = false;
     self->need_values = false;
     for (i = 0; i < num_rules; i++) {
         SortRule *rule   = (SortRule*)VA_Fetch(rules, i);
         int32_t rule_type  = SortRule_Get_Type(rule);
         self->actions[i] = S_derive_action(rule, NULL);
         if (rule_type == SortRule_SCORE) {
             self->need_score = true;
         }
         else if (rule_type == SortRule_FIELD) {
             CharBuf *field = SortRule_Get_Field(rule);
             FieldType *type = Schema_Fetch_Type(schema, field);
             if (!type || !FType_Sortable(type)) {
                 THROW(ERR, "'%o' isn't a sortable field", field);
             }
             self->need_values = true;
         }
     }

     // Perform an optimization.  So long as we always collect docs in
     // ascending order, Collect() will favor lower doc numbers -- so we may
     // not need to execute a final COMPARE_BY_DOC_ID action.
     self->num_actions = num_rules;
     if (self->actions[num_rules - 1] == COMPARE_BY_DOC_ID) {
         self->num_actions--;
     }

     // Override our derived actions with an action which will be excecuted
     // autmatically until the queue fills up.
     self->auto_actions    = (uint8_t*)MALLOCATE(1);
     self->auto_actions[0] = wanted ? AUTO_ACCEPT : AUTO_REJECT;
     self->derived_actions = self->actions;
     self->actions         = self->auto_actions;


     // Prepare a MatchDoc-in-waiting.
     {
         VArray *values = self->need_values ? VA_new(num_rules) : NULL;
         float   score  = self->need_score  ? F32_NEGINF : F32_NAN;
         self->bumped = MatchDoc_new(I32_MAX, score, values);
         DECREF(values);
     }

     return self;
 }

 void
 SortColl_destroy(SortCollector *self) {
     DECREF(self->hit_q);
     DECREF(self->rules);
     DECREF(self->bumped);
     FREEMEM(self->sort_caches);
     FREEMEM(self->ord_arrays);
     FREEMEM(self->auto_actions);
     FREEMEM(self->derived_actions);
     SUPER_DESTROY(self, SORTCOLLECTOR);
 }

 static int8_t
 S_derive_action(SortRule *rule, SortCache *cache) {
     int32_t  rule_type = SortRule_Get_Type(rule);
     bool_t reverse   = !!SortRule_Get_Reverse(rule);

     if (rule_type == SortRule_SCORE) {
         return COMPARE_BY_SCORE + reverse;
     }
     else if (rule_type == SortRule_DOC_ID) {
         return COMPARE_BY_DOC_ID + reverse;
     }
     else if (rule_type == SortRule_FIELD) {
         if (cache) {
             int8_t width = SortCache_Get_Ord_Width(cache);
             switch (width) {
                 case 1:  return COMPARE_BY_ORD1  + reverse;
                 case 2:  return COMPARE_BY_ORD2  + reverse;
                 case 4:  return COMPARE_BY_ORD4  + reverse;
                 case 8:  return COMPARE_BY_ORD8  + reverse;
                 case 16:
                     if (SortCache_Get_Native_Ords(cache)) {
                         return COMPARE_BY_NATIVE_ORD16 + reverse;
                     }
                     else {
                         return COMPARE_BY_ORD16 + reverse;
                     }
                 case 32:
                     if (SortCache_Get_Native_Ords(cache)) {
                         return COMPARE_BY_NATIVE_ORD32 + reverse;
                     }
                     else {
                         return COMPARE_BY_ORD32 + reverse;
                     }
                 default: THROW(ERR, "Unknown width: %i8", width);
             }
         }
         else {
             return AUTO_TIE;
         }
     }
     else {
         THROW(ERR, "Unrecognized SortRule type %i32", rule_type);
     }
     UNREACHABLE_RETURN(int8_t);
 }

 void
 SortColl_set_reader(SortCollector *self, SegReader *reader) {
     SortReader *sort_reader
         = (SortReader*)SegReader_Fetch(reader, VTable_Get_Name(SORTREADER));

     // Reset threshold variables and trigger auto-action behavior.
     self->bumped->doc_id = I32_MAX;
     self->bubble_doc     = I32_MAX;
     self->bumped->score  = self->need_score ? F32_NEGINF : F32_NAN;
     self->bubble_score   = self->need_score ? F32_NEGINF : F32_NAN;
     self->actions        = self->auto_actions;

     // Obtain sort caches. Derive actions array for this segment.
     if (self->need_values && sort_reader) {
         uint32_t i, max;
         for (i = 0, max = self->num_rules; i < max; i++) {
             SortRule  *rule  = (SortRule*)VA_Fetch(self->rules, i);
             CharBuf   *field = SortRule_Get_Field(rule);
             SortCache *cache = field
                                ? SortReader_Fetch_Sort_Cache(sort_reader, field)
                                : NULL;
             self->sort_caches[i] = cache;
             self->derived_actions[i] = S_derive_action(rule, cache);
             if (cache) { self->ord_arrays[i] = SortCache_Get_Ords(cache); }
             else       { self->ord_arrays[i] = NULL; }
         }
     }
     self->seg_doc_max = reader ? SegReader_Doc_Max(reader) : 0;
     Coll_set_reader((Collector*)self, reader);
 }

 VArray*
 SortColl_pop_match_docs(SortCollector *self) {
     return HitQ_Pop_All(self->hit_q);
 }

 uint32_t
 SortColl_get_total_hits(SortCollector *self) {
     return self->total_hits;
 }

 bool_t
 SortColl_need_score(SortCollector *self) {
     return self->need_score;
 }

 void
 SortColl_collect(SortCollector *self, int32_t doc_id) {
     // Add to the total number of hits.
     self->total_hits++;

     // Collect this hit if it's competitive.
     if (SI_competitive(self, doc_id)) {
         MatchDoc *const match_doc = self->bumped;
         match_doc->doc_id = doc_id + self->base;

         if (self->need_score && match_doc->score == F32_NEGINF) {
             match_doc->score = Matcher_Score(self->matcher);
         }

         // Fetch values so that cross-segment sorting can work.
         if (self->need_values) {
             VArray *values = match_doc->values;
             uint32_t i, max;

             for (i = 0, max = self->num_rules; i < max; i++) {
                 SortCache *cache   = self->sort_caches[i];
                 Obj       *old_val = (Obj*)VA_Delete(values, i);
                 if (cache) {
                     int32_t ord = SortCache_Ordinal(cache, doc_id);
                     Obj *blank = old_val
                                  ? old_val
                                  : SortCache_Make_Blank(cache);
                     Obj *val = SortCache_Value(cache, ord, blank);
                     if (val) { VA_Store(values, i, (Obj*)val); }
                     else     { DECREF(blank); }
                 }
             }
         }

         // Insert the new MatchDoc.
         self->bumped = (MatchDoc*)HitQ_Jostle(self->hit_q, (Obj*)match_doc);

         if (self->bumped) {
             if (self->bumped == match_doc) {
                 /* The queue is full, and we have established a threshold for
                  * this segment as to what sort of document is definitely not
                  * acceptable.  Turn off AUTO_ACCEPT and start actually
                  * testing whether hits are competitive. */
                 self->bubble_score  = match_doc->score;
                 self->bubble_doc    = doc_id;
                 self->actions       = self->derived_actions;
             }

             // Recycle.
             self->bumped->score = self->need_score ? F32_NEGINF : F32_NAN;
         }
         else {
             // The queue isn't full yet, so create a fresh MatchDoc.
             VArray *values = self->need_values
                              ? VA_new(self->num_rules)
                              : NULL;
             float fake_score = self->need_score ? F32_NEGINF : F32_NAN;
             self->bumped = MatchDoc_new(I32_MAX, fake_score, values);
             DECREF(values);
         }

     }
 }

 static INLINE int32_t
 SI_compare_by_ord1(SortCollector *self, uint32_t tick, int32_t a, int32_t b) {
     void *const ords = self->ord_arrays[tick];
     int32_t a_ord = NumUtil_u1get(ords, a);
     int32_t b_ord = NumUtil_u1get(ords, b);
     return a_ord - b_ord;
 }
 static INLINE int32_t
 SI_compare_by_ord2(SortCollector *self, uint32_t tick, int32_t a, int32_t b) {
     void *const ords = self->ord_arrays[tick];
     int32_t a_ord = NumUtil_u2get(ords, a);
     int32_t b_ord = NumUtil_u2get(ords, b);
     return a_ord - b_ord;
 }
 static INLINE int32_t
 SI_compare_by_ord4(SortCollector *self, uint32_t tick, int32_t a, int32_t b) {
     void *const ords = self->ord_arrays[tick];
     int32_t a_ord = NumUtil_u4get(ords, a);
     int32_t b_ord = NumUtil_u4get(ords, b);
     return a_ord - b_ord;
 }
 static INLINE int32_t
 SI_compare_by_ord8(SortCollector *self, uint32_t tick, int32_t a, int32_t b) {
     uint8_t *ords = (uint8_t*)self->ord_arrays[tick];
     int32_t a_ord = ords[a];
     int32_t b_ord = ords[b];
     return a_ord - b_ord;
 }
 static INLINE int32_t
 SI_compare_by_ord16(SortCollector *self, uint32_t tick, int32_t a, int32_t b) {
     uint8_t *ord_bytes = (uint8_t*)self->ord_arrays[tick];
     uint8_t *address_a = ord_bytes + a * sizeof(uint16_t);
     uint8_t *address_b = ord_bytes + b * sizeof(uint16_t);
     int32_t  ord_a = NumUtil_decode_bigend_u16(address_a);
     int32_t  ord_b = NumUtil_decode_bigend_u16(address_b);
     return ord_a - ord_b;
 }
 static INLINE int32_t
 SI_compare_by_ord32(SortCollector *self, uint32_t tick, int32_t a, int32_t b) {
     uint8_t *ord_bytes = (uint8_t*)self->ord_arrays[tick];
     uint8_t *address_a = ord_bytes + a * sizeof(uint32_t);
     uint8_t *address_b = ord_bytes + b * sizeof(uint32_t);
     int32_t  ord_a = NumUtil_decode_bigend_u32(address_a);
     int32_t  ord_b = NumUtil_decode_bigend_u32(address_b);
     return ord_a - ord_b;
 }
 static INLINE int32_t
 SI_compare_by_native_ord16(SortCollector *self, uint32_t tick,
                            int32_t a, int32_t b) {
     uint16_t *ords = (uint16_t*)self->ord_arrays[tick];
     int32_t a_ord = ords[a];
     int32_t b_ord = ords[b];
     return a_ord - b_ord;
 }
 static INLINE int32_t
 SI_compare_by_native_ord32(SortCollector *self, uint32_t tick,
                            int32_t a, int32_t b) {
     int32_t *ords = (int32_t*)self->ord_arrays[tick];
     return ords[a] - ords[b];
 }

 // Bounds checking for doc id against the segment doc_max.  We assume that any
 // sort cache ord arrays can accomodate lookups up to this number.
 static INLINE int32_t
 SI_validate_doc_id(SortCollector *self, int32_t doc_id) {
     // Check as uint32_t since we're using these doc ids as array indexes.
     if ((uint32_t)doc_id > (uint32_t)self->seg_doc_max) {
         THROW(ERR, "Doc ID %i32 greater than doc max %i32", doc_id,
               self->seg_doc_max);
     }
     return doc_id;
 }

 static INLINE bool_t
 SI_competitive(SortCollector *self, int32_t doc_id) {
     /* Ordinarily, we would cache local copies of more member variables in
      * const automatic variables in order to improve code clarity and provide
      * more hints to the compiler about what variables are actually invariant
      * for the duration of this routine:
      *
      *     uint8_t *const actions    = self->actions;
      *     const uint32_t num_rules  = self->num_rules;
      *     const int32_t bubble_doc = self->bubble_doc;
      *
      * However, our major goal is to return as quickly as possible, and the
      * common case is that we'll have our answer before the first loop iter
      * finishes -- so we don't worry about the cost of performing extra
      * dereferencing on subsequent loop iters.
      *
      * The goal of returning quickly also drives the choice of a "do-while"
      * loop instead of a "for" loop, and the switch statement optimized for
      * compilation to a jump table.
      */
     uint8_t *const actions = self->actions;
     uint32_t i = 0;

     // Iterate through our array of actions, returning as quickly as possible.
     do {
         switch (actions[i] & ACTIONS_MASK) {
             case AUTO_ACCEPT:
                 return true;
             case AUTO_REJECT:
                 return false;
             case AUTO_TIE:
                 break;
             case COMPARE_BY_SCORE: {
                     float score = Matcher_Score(self->matcher);
                     if (*(int32_t*)&score == *(int32_t*)&self->bubble_score) {
                         break;
                     }
                     if (score > self->bubble_score) {
                         self->bumped->score = score;
                         return true;
                     }
                     else if (score < self->bubble_score) {
                         return false;
                     }
                 }
                 break;
             case COMPARE_BY_SCORE_REV: {
                     float score = Matcher_Score(self->matcher);
                     if (*(int32_t*)&score == *(int32_t*)&self->bubble_score) {
                         break;
                     }
                     if (score < self->bubble_score) {
                         self->bumped->score = score;
                         return true;
                     }
                     else if (score > self->bubble_score) {
                         return false;
                     }
                 }
                 break;
             case COMPARE_BY_DOC_ID:
                 if (doc_id > self->bubble_doc)      { return false; }
                 else if (doc_id < self->bubble_doc) { return true; }
                 break;
             case COMPARE_BY_DOC_ID_REV:
                 if (doc_id > self->bubble_doc)      { return true; }
                 else if (doc_id < self->bubble_doc) { return false; }
                 break;
             case COMPARE_BY_ORD1: {
                     int32_t comparison
                         = SI_compare_by_ord1(
                               self, i, SI_validate_doc_id(self, doc_id),
                               self->bubble_doc);
                     if (comparison < 0)      { return true; }
                     else if (comparison > 0) { return false; }
                 }
                 break;
             case COMPARE_BY_ORD1_REV: {
                     int32_t comparison
                         = SI_compare_by_ord1(
                               self, i, self->bubble_doc,
                               SI_validate_doc_id(self, doc_id));
                     if (comparison < 0)      { return true; }
                     else if (comparison > 0) { return false; }
                 }
                 break;
             case COMPARE_BY_ORD2: {
                     int32_t comparison
                         = SI_compare_by_ord2(
                               self, i, SI_validate_doc_id(self, doc_id),
                               self->bubble_doc);
                     if (comparison < 0)      { return true; }
                     else if (comparison > 0) { return false; }
                 }
                 break;
             case COMPARE_BY_ORD2_REV: {
                     int32_t comparison
                         = SI_compare_by_ord2(
                               self, i, self->bubble_doc,
                               SI_validate_doc_id(self, doc_id));
                     if (comparison < 0)      { return true; }
                     else if (comparison > 0) { return false; }
                 }
                 break;
             case COMPARE_BY_ORD4: {
                     int32_t comparison
                         = SI_compare_by_ord4(
                               self, i, SI_validate_doc_id(self, doc_id),
                               self->bubble_doc);
                     if (comparison < 0)      { return true; }
                     else if (comparison > 0) { return false; }
                 }
                 break;
             case COMPARE_BY_ORD4_REV: {
                     int32_t comparison
                         = SI_compare_by_ord4(
                               self, i, self->bubble_doc,
                               SI_validate_doc_id(self, doc_id));
                     if (comparison < 0)      { return true; }
                     else if (comparison > 0) { return false; }
                 }
                 break;
             case COMPARE_BY_ORD8: {
                     int32_t comparison
                         = SI_compare_by_ord8(
                               self, i, SI_validate_doc_id(self, doc_id),
                               self->bubble_doc);
                     if (comparison < 0)      { return true; }
                     else if (comparison > 0) { return false; }
                 }
                 break;
             case COMPARE_BY_ORD8_REV: {
                     int32_t comparison
                         = SI_compare_by_ord8(
                               self, i, self->bubble_doc,
                               SI_validate_doc_id(self, doc_id));
                     if (comparison < 0)      { return true; }
                     else if (comparison > 0) { return false; }
                 }
                 break;
             case COMPARE_BY_ORD16: {
                     int32_t comparison
                         = SI_compare_by_ord16(
                               self, i, SI_validate_doc_id(self, doc_id),
                               self->bubble_doc);
                     if (comparison < 0)      { return true; }
                     else if (comparison > 0) { return false; }
                 }
                 break;
             case COMPARE_BY_ORD16_REV: {
                     int32_t comparison
                         = SI_compare_by_ord16(
                               self, i, self->bubble_doc,
                               SI_validate_doc_id(self, doc_id));
                     if (comparison < 0)      { return true; }
                     else if (comparison > 0) { return false; }
                 }
                 break;
             case COMPARE_BY_ORD32: {
                     int32_t comparison
                         = SI_compare_by_ord32(
                               self, i, SI_validate_doc_id(self, doc_id),
                               self->bubble_doc);
                     if (comparison < 0)      { return true; }
                     else if (comparison > 0) { return false; }
                 }
                 break;
             case COMPARE_BY_ORD32_REV: {
                     int32_t comparison
                         = SI_compare_by_ord32(
                               self, i, self->bubble_doc,
                               SI_validate_doc_id(self, doc_id));
                     if (comparison < 0)      { return true; }
                     else if (comparison > 0) { return false; }
                 }
                 break;
             case COMPARE_BY_NATIVE_ORD16: {
                     int32_t comparison
                         = SI_compare_by_native_ord16(
                               self, i, SI_validate_doc_id(self, doc_id),
                               self->bubble_doc);
                     if (comparison < 0)      { return true; }
                     else if (comparison > 0) { return false; }
                 }
                 break;
             case COMPARE_BY_NATIVE_ORD16_REV: {
                     int32_t comparison
                         = SI_compare_by_native_ord16(
                               self, i, self->bubble_doc,
                               SI_validate_doc_id(self, doc_id));
                     if (comparison < 0)      { return true; }
                     else if (comparison > 0) { return false; }
                 }
                 break;
             case COMPARE_BY_NATIVE_ORD32: {
                     int32_t comparison
                         = SI_compare_by_native_ord32(
                               self, i, SI_validate_doc_id(self, doc_id),
                               self->bubble_doc);
                     if (comparison < 0)      { return true; }
                     else if (comparison > 0) { return false; }
                 }
                 break;
             case COMPARE_BY_NATIVE_ORD32_REV: {
                     int32_t comparison
                         = SI_compare_by_native_ord32(
                               self, i, self->bubble_doc,
                               SI_validate_doc_id(self, doc_id));
                     if (comparison < 0)      { return true; }
                     else if (comparison > 0) { return false; }
                 }
                 break;
             default:
                 THROW(ERR, "UNEXPECTED action %u8", actions[i]);
         }
     } while (++i < self->num_actions);

     // If we've made it this far and we're still tied, reject the doc so that
     // we prefer items already in the queue.  This has the effect of
     // implicitly breaking ties by doc num, since docs are collected in order.
     return false;
 }
	/* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#define C_LUCY_SORTCOLLECTOR
	#define C_LUCY_MATCHDOC
	#include "Lucy/Util/ToolSet.h"

	#include "Lucy/Search/Collector/SortCollector.h"
	#include "Lucy/Index/SegReader.h"
	#include "Lucy/Index/SortCache.h"
	#include "Lucy/Index/SortCache/NumericSortCache.h"
	#include "Lucy/Index/SortCache/TextSortCache.h"
	#include "Lucy/Index/SortReader.h"
	#include "Lucy/Plan/FieldType.h"
	#include "Lucy/Plan/Schema.h"
	#include "Lucy/Search/HitQueue.h"
	#include "Lucy/Search/MatchDoc.h"
	#include "Lucy/Search/Matcher.h"
	#include "Lucy/Search/SortRule.h"
	#include "Lucy/Search/SortSpec.h"

	#define COMPARE_BY_SCORE 0x1
	#define COMPARE_BY_SCORE_REV 0x2
	#define COMPARE_BY_DOC_ID 0x3
	#define COMPARE_BY_DOC_ID_REV 0x4
	#define COMPARE_BY_ORD1 0x5
	#define COMPARE_BY_ORD1_REV 0x6
	#define COMPARE_BY_ORD2 0x7
	#define COMPARE_BY_ORD2_REV 0x8
	#define COMPARE_BY_ORD4 0x9
	#define COMPARE_BY_ORD4_REV 0xA
	#define COMPARE_BY_ORD8 0xB
	#define COMPARE_BY_ORD8_REV 0xC
	#define COMPARE_BY_ORD16 0xD
	#define COMPARE_BY_ORD16_REV 0xE
	#define COMPARE_BY_ORD32 0xF
	#define COMPARE_BY_ORD32_REV 0x10
	#define COMPARE_BY_NATIVE_ORD16 0x11
	#define COMPARE_BY_NATIVE_ORD16_REV 0x12
	#define COMPARE_BY_NATIVE_ORD32 0x13
	#define COMPARE_BY_NATIVE_ORD32_REV 0x14
	#define AUTO_ACCEPT 0x15
	#define AUTO_REJECT 0x16
	#define AUTO_TIE 0x17
	#define ACTIONS_MASK 0x1F

	// Pick an action based on a SortRule and if needed, a SortCache.
	static int8_t
	S_derive_action(SortRule rule, SortCache sort_cache);

	// Decide whether a doc should be inserted into the HitQueue.
	static INLINE bool_t
	SI_competitive(SortCollector *self, int32_t doc_id);

	SortCollector*
	SortColl_new(Schema schema, SortSpec sort_spec, uint32_t wanted) {
	SortCollector self = (SortCollector)VTable_Make_Obj(SORTCOLLECTOR);
	return SortColl_init(self, schema, sort_spec, wanted);
	}

	// Default to sort-by-score-then-doc-id.
	static VArray*
	S_default_sort_rules() {
	VArray *rules = VA_new(1);
	VA_Push(rules, (Obj*)SortRule_new(SortRule_SCORE, NULL, false));
	VA_Push(rules, (Obj*)SortRule_new(SortRule_DOC_ID, NULL, false));
	return rules;
	}

	SortCollector*
	SortColl_init(SortCollector self, Schema schema, SortSpec *sort_spec,
	uint32_t wanted) {
	VArray *rules = sort_spec
	? (VArray*)INCREF(SortSpec_Get_Rules(sort_spec))
	: S_default_sort_rules();
	uint32_t num_rules = VA_Get_Size(rules);
	uint32_t i;

	// Validate.
	if (sort_spec && !schema) {
	THROW(ERR, "Can't supply a SortSpec without a Schema.");
	}
	if (!num_rules) {
	THROW(ERR, "Can't supply a SortSpec with no SortRules.");
	}

	// Init.
	Coll_init((Collector*)self);
	self->total_hits = 0;
	self->bubble_doc = I32_MAX;
	self->bubble_score = F32_NEGINF;
	self->seg_doc_max = 0;

	// Assign.
	self->wanted = wanted;

	// Derive.
	self->hit_q = HitQ_new(schema, sort_spec, wanted);
	self->rules = rules; // absorb refcount.
	self->num_rules = num_rules;
	self->sort_caches = (SortCache*)CALLOCATE(num_rules, sizeof(SortCache));
	self->ord_arrays = (void*)CALLOCATE(num_rules, sizeof(void));
	self->actions = (uint8_t*)CALLOCATE(num_rules, sizeof(uint8_t));

	// Build up an array of "actions" which we will execute during each call
	// to Collect(). Determine whether we need to track scores and field
	// values.
	self->need_score = false;
	self->need_values = false;
	for (i = 0; i < num_rules; i++) {
	SortRule rule = (SortRule)VA_Fetch(rules, i);
	int32_t rule_type = SortRule_Get_Type(rule);
	self->actions[i] = S_derive_action(rule, NULL);
	if (rule_type == SortRule_SCORE) {
	self->need_score = true;
	}
	else if (rule_type == SortRule_FIELD) {
	CharBuf *field = SortRule_Get_Field(rule);
	FieldType *type = Schema_Fetch_Type(schema, field);
	if (!type \|\| !FType_Sortable(type)) {
	THROW(ERR, "'%o' isn't a sortable field", field);
	}
	self->need_values = true;
	}
	}

	// Perform an optimization. So long as we always collect docs in
	// ascending order, Collect() will favor lower doc numbers -- so we may
	// not need to execute a final COMPARE_BY_DOC_ID action.
	self->num_actions = num_rules;
	if (self->actions[num_rules - 1] == COMPARE_BY_DOC_ID) {
	self->num_actions--;
	}

	// Override our derived actions with an action which will be excecuted
	// autmatically until the queue fills up.
	self->auto_actions = (uint8_t*)MALLOCATE(1);
	self->auto_actions[0] = wanted ? AUTO_ACCEPT : AUTO_REJECT;
	self->derived_actions = self->actions;
	self->actions = self->auto_actions;


	// Prepare a MatchDoc-in-waiting.
	{
	VArray *values = self->need_values ? VA_new(num_rules) : NULL;
	float score = self->need_score ? F32_NEGINF : F32_NAN;
	self->bumped = MatchDoc_new(I32_MAX, score, values);
	DECREF(values);
	}

	return self;
	}

	void
	SortColl_destroy(SortCollector *self) {
	DECREF(self->hit_q);
	DECREF(self->rules);
	DECREF(self->bumped);
	FREEMEM(self->sort_caches);
	FREEMEM(self->ord_arrays);
	FREEMEM(self->auto_actions);
	FREEMEM(self->derived_actions);
	SUPER_DESTROY(self, SORTCOLLECTOR);
	}

	static int8_t
	S_derive_action(SortRule rule, SortCache cache) {
	int32_t rule_type = SortRule_Get_Type(rule);
	bool_t reverse = !!SortRule_Get_Reverse(rule);

	if (rule_type == SortRule_SCORE) {
	return COMPARE_BY_SCORE + reverse;
	}
	else if (rule_type == SortRule_DOC_ID) {
	return COMPARE_BY_DOC_ID + reverse;
	}
	else if (rule_type == SortRule_FIELD) {
	if (cache) {
	int8_t width = SortCache_Get_Ord_Width(cache);
	switch (width) {
	case 1: return COMPARE_BY_ORD1 + reverse;
	case 2: return COMPARE_BY_ORD2 + reverse;
	case 4: return COMPARE_BY_ORD4 + reverse;
	case 8: return COMPARE_BY_ORD8 + reverse;
	case 16:
	if (SortCache_Get_Native_Ords(cache)) {
	return COMPARE_BY_NATIVE_ORD16 + reverse;
	}
	else {
	return COMPARE_BY_ORD16 + reverse;
	}
	case 32:
	if (SortCache_Get_Native_Ords(cache)) {
	return COMPARE_BY_NATIVE_ORD32 + reverse;
	}
	else {
	return COMPARE_BY_ORD32 + reverse;
	}
	default: THROW(ERR, "Unknown width: %i8", width);
	}
	}
	else {
	return AUTO_TIE;
	}
	}
	else {
	THROW(ERR, "Unrecognized SortRule type %i32", rule_type);
	}
	UNREACHABLE_RETURN(int8_t);
	}

	void
	SortColl_set_reader(SortCollector self, SegReader reader) {
	SortReader *sort_reader
	= (SortReader*)SegReader_Fetch(reader, VTable_Get_Name(SORTREADER));

	// Reset threshold variables and trigger auto-action behavior.
	self->bumped->doc_id = I32_MAX;
	self->bubble_doc = I32_MAX;
	self->bumped->score = self->need_score ? F32_NEGINF : F32_NAN;
	self->bubble_score = self->need_score ? F32_NEGINF : F32_NAN;
	self->actions = self->auto_actions;

	// Obtain sort caches. Derive actions array for this segment.
	if (self->need_values && sort_reader) {
	uint32_t i, max;
	for (i = 0, max = self->num_rules; i < max; i++) {
	SortRule rule = (SortRule)VA_Fetch(self->rules, i);
	CharBuf *field = SortRule_Get_Field(rule);
	SortCache *cache = field
	? SortReader_Fetch_Sort_Cache(sort_reader, field)
	: NULL;
	self->sort_caches[i] = cache;
	self->derived_actions[i] = S_derive_action(rule, cache);
	if (cache) { self->ord_arrays[i] = SortCache_Get_Ords(cache); }
	else { self->ord_arrays[i] = NULL; }
	}
	}
	self->seg_doc_max = reader ? SegReader_Doc_Max(reader) : 0;
	Coll_set_reader((Collector*)self, reader);
	}

	VArray*
	SortColl_pop_match_docs(SortCollector *self) {
	return HitQ_Pop_All(self->hit_q);
	}

	uint32_t
	SortColl_get_total_hits(SortCollector *self) {
	return self->total_hits;
	}

	bool_t
	SortColl_need_score(SortCollector *self) {
	return self->need_score;
	}

	void
	SortColl_collect(SortCollector *self, int32_t doc_id) {
	// Add to the total number of hits.
	self->total_hits++;

	// Collect this hit if it's competitive.
	if (SI_competitive(self, doc_id)) {
	MatchDoc *const match_doc = self->bumped;
	match_doc->doc_id = doc_id + self->base;

	if (self->need_score && match_doc->score == F32_NEGINF) {
	match_doc->score = Matcher_Score(self->matcher);
	}

	// Fetch values so that cross-segment sorting can work.
	if (self->need_values) {
	VArray *values = match_doc->values;
	uint32_t i, max;

	for (i = 0, max = self->num_rules; i < max; i++) {
	SortCache *cache = self->sort_caches[i];
	Obj old_val = (Obj)VA_Delete(values, i);
	if (cache) {
	int32_t ord = SortCache_Ordinal(cache, doc_id);
	Obj *blank = old_val
	? old_val
	: SortCache_Make_Blank(cache);
	Obj *val = SortCache_Value(cache, ord, blank);
	if (val) { VA_Store(values, i, (Obj*)val); }
	else { DECREF(blank); }
	}
	}
	}

	// Insert the new MatchDoc.
	self->bumped = (MatchDoc)HitQ_Jostle(self->hit_q, (Obj)match_doc);

	if (self->bumped) {
	if (self->bumped == match_doc) {
	/* The queue is full, and we have established a threshold for
	* this segment as to what sort of document is definitely not
	* acceptable. Turn off AUTO_ACCEPT and start actually
	* testing whether hits are competitive. */
	self->bubble_score = match_doc->score;
	self->bubble_doc = doc_id;
	self->actions = self->derived_actions;
	}

	// Recycle.
	self->bumped->score = self->need_score ? F32_NEGINF : F32_NAN;
	}
	else {
	// The queue isn't full yet, so create a fresh MatchDoc.
	VArray *values = self->need_values
	? VA_new(self->num_rules)
	: NULL;
	float fake_score = self->need_score ? F32_NEGINF : F32_NAN;
	self->bumped = MatchDoc_new(I32_MAX, fake_score, values);
	DECREF(values);
	}

	}
	}

	static INLINE int32_t
	SI_compare_by_ord1(SortCollector *self, uint32_t tick, int32_t a, int32_t b) {
	void *const ords = self->ord_arrays[tick];
	int32_t a_ord = NumUtil_u1get(ords, a);
	int32_t b_ord = NumUtil_u1get(ords, b);
	return a_ord - b_ord;
	}
	static INLINE int32_t
	SI_compare_by_ord2(SortCollector *self, uint32_t tick, int32_t a, int32_t b) {
	void *const ords = self->ord_arrays[tick];
	int32_t a_ord = NumUtil_u2get(ords, a);
	int32_t b_ord = NumUtil_u2get(ords, b);
	return a_ord - b_ord;
	}
	static INLINE int32_t
	SI_compare_by_ord4(SortCollector *self, uint32_t tick, int32_t a, int32_t b) {
	void *const ords = self->ord_arrays[tick];
	int32_t a_ord = NumUtil_u4get(ords, a);
	int32_t b_ord = NumUtil_u4get(ords, b);
	return a_ord - b_ord;
	}
	static INLINE int32_t
	SI_compare_by_ord8(SortCollector *self, uint32_t tick, int32_t a, int32_t b) {
	uint8_t ords = (uint8_t)self->ord_arrays[tick];
	int32_t a_ord = ords[a];
	int32_t b_ord = ords[b];
	return a_ord - b_ord;
	}
	static INLINE int32_t
	SI_compare_by_ord16(SortCollector *self, uint32_t tick, int32_t a, int32_t b) {
	uint8_t ord_bytes = (uint8_t)self->ord_arrays[tick];
	uint8_t address_a = ord_bytes + a sizeof(uint16_t);
	uint8_t address_b = ord_bytes + b sizeof(uint16_t);
	int32_t ord_a = NumUtil_decode_bigend_u16(address_a);
	int32_t ord_b = NumUtil_decode_bigend_u16(address_b);
	return ord_a - ord_b;
	}
	static INLINE int32_t
	SI_compare_by_ord32(SortCollector *self, uint32_t tick, int32_t a, int32_t b) {
	uint8_t ord_bytes = (uint8_t)self->ord_arrays[tick];
	uint8_t address_a = ord_bytes + a sizeof(uint32_t);
	uint8_t address_b = ord_bytes + b sizeof(uint32_t);
	int32_t ord_a = NumUtil_decode_bigend_u32(address_a);
	int32_t ord_b = NumUtil_decode_bigend_u32(address_b);
	return ord_a - ord_b;
	}
	static INLINE int32_t
	SI_compare_by_native_ord16(SortCollector *self, uint32_t tick,
	int32_t a, int32_t b) {
	uint16_t ords = (uint16_t)self->ord_arrays[tick];
	int32_t a_ord = ords[a];
	int32_t b_ord = ords[b];
	return a_ord - b_ord;
	}
	static INLINE int32_t
	SI_compare_by_native_ord32(SortCollector *self, uint32_t tick,
	int32_t a, int32_t b) {
	int32_t ords = (int32_t)self->ord_arrays[tick];
	return ords[a] - ords[b];
	}

	// Bounds checking for doc id against the segment doc_max. We assume that any
	// sort cache ord arrays can accomodate lookups up to this number.
	static INLINE int32_t
	SI_validate_doc_id(SortCollector *self, int32_t doc_id) {
	// Check as uint32_t since we're using these doc ids as array indexes.
	if ((uint32_t)doc_id > (uint32_t)self->seg_doc_max) {
	THROW(ERR, "Doc ID %i32 greater than doc max %i32", doc_id,
	self->seg_doc_max);
	}
	return doc_id;
	}

	static INLINE bool_t
	SI_competitive(SortCollector *self, int32_t doc_id) {
	/* Ordinarily, we would cache local copies of more member variables in
	* const automatic variables in order to improve code clarity and provide
	* more hints to the compiler about what variables are actually invariant
	* for the duration of this routine:
	*
	* uint8_t *const actions = self->actions;
	* const uint32_t num_rules = self->num_rules;
	* const int32_t bubble_doc = self->bubble_doc;
	*
	* However, our major goal is to return as quickly as possible, and the
	* common case is that we'll have our answer before the first loop iter
	* finishes -- so we don't worry about the cost of performing extra
	* dereferencing on subsequent loop iters.
	*
	* The goal of returning quickly also drives the choice of a "do-while"
	* loop instead of a "for" loop, and the switch statement optimized for
	* compilation to a jump table.
	*/
	uint8_t *const actions = self->actions;
	uint32_t i = 0;

	// Iterate through our array of actions, returning as quickly as possible.
	do {
	switch (actions[i] & ACTIONS_MASK) {
	case AUTO_ACCEPT:
	return true;
	case AUTO_REJECT:
	return false;
	case AUTO_TIE:
	break;
	case COMPARE_BY_SCORE: {
	float score = Matcher_Score(self->matcher);
	if ((int32_t)&score == (int32_t)&self->bubble_score) {
	break;
	}
	if (score > self->bubble_score) {
	self->bumped->score = score;
	return true;
	}
	else if (score < self->bubble_score) {
	return false;
	}
	}
	break;
	case COMPARE_BY_SCORE_REV: {
	float score = Matcher_Score(self->matcher);
	if ((int32_t)&score == (int32_t)&self->bubble_score) {
	break;
	}
	if (score < self->bubble_score) {
	self->bumped->score = score;
	return true;
	}
	else if (score > self->bubble_score) {
	return false;
	}
	}
	break;
	case COMPARE_BY_DOC_ID:
	if (doc_id > self->bubble_doc) { return false; }
	else if (doc_id < self->bubble_doc) { return true; }
	break;
	case COMPARE_BY_DOC_ID_REV:
	if (doc_id > self->bubble_doc) { return true; }
	else if (doc_id < self->bubble_doc) { return false; }
	break;
	case COMPARE_BY_ORD1: {
	int32_t comparison
	= SI_compare_by_ord1(
	self, i, SI_validate_doc_id(self, doc_id),
	self->bubble_doc);
	if (comparison < 0) { return true; }
	else if (comparison > 0) { return false; }
	}
	break;
	case COMPARE_BY_ORD1_REV: {
	int32_t comparison
	= SI_compare_by_ord1(
	self, i, self->bubble_doc,
	SI_validate_doc_id(self, doc_id));
	if (comparison < 0) { return true; }
	else if (comparison > 0) { return false; }
	}
	break;
	case COMPARE_BY_ORD2: {
	int32_t comparison
	= SI_compare_by_ord2(
	self, i, SI_validate_doc_id(self, doc_id),
	self->bubble_doc);
	if (comparison < 0) { return true; }
	else if (comparison > 0) { return false; }
	}
	break;
	case COMPARE_BY_ORD2_REV: {
	int32_t comparison
	= SI_compare_by_ord2(
	self, i, self->bubble_doc,
	SI_validate_doc_id(self, doc_id));
	if (comparison < 0) { return true; }
	else if (comparison > 0) { return false; }
	}
	break;
	case COMPARE_BY_ORD4: {
	int32_t comparison
	= SI_compare_by_ord4(
	self, i, SI_validate_doc_id(self, doc_id),
	self->bubble_doc);
	if (comparison < 0) { return true; }
	else if (comparison > 0) { return false; }
	}
	break;
	case COMPARE_BY_ORD4_REV: {
	int32_t comparison
	= SI_compare_by_ord4(
	self, i, self->bubble_doc,
	SI_validate_doc_id(self, doc_id));
	if (comparison < 0) { return true; }
	else if (comparison > 0) { return false; }
	}
	break;
	case COMPARE_BY_ORD8: {
	int32_t comparison
	= SI_compare_by_ord8(
	self, i, SI_validate_doc_id(self, doc_id),
	self->bubble_doc);
	if (comparison < 0) { return true; }
	else if (comparison > 0) { return false; }
	}
	break;
	case COMPARE_BY_ORD8_REV: {
	int32_t comparison
	= SI_compare_by_ord8(
	self, i, self->bubble_doc,
	SI_validate_doc_id(self, doc_id));
	if (comparison < 0) { return true; }
	else if (comparison > 0) { return false; }
	}
	break;
	case COMPARE_BY_ORD16: {
	int32_t comparison
	= SI_compare_by_ord16(
	self, i, SI_validate_doc_id(self, doc_id),
	self->bubble_doc);
	if (comparison < 0) { return true; }
	else if (comparison > 0) { return false; }
	}
	break;
	case COMPARE_BY_ORD16_REV: {
	int32_t comparison
	= SI_compare_by_ord16(
	self, i, self->bubble_doc,
	SI_validate_doc_id(self, doc_id));
	if (comparison < 0) { return true; }
	else if (comparison > 0) { return false; }
	}
	break;
	case COMPARE_BY_ORD32: {
	int32_t comparison
	= SI_compare_by_ord32(
	self, i, SI_validate_doc_id(self, doc_id),
	self->bubble_doc);
	if (comparison < 0) { return true; }
	else if (comparison > 0) { return false; }
	}
	break;
	case COMPARE_BY_ORD32_REV: {
	int32_t comparison
	= SI_compare_by_ord32(
	self, i, self->bubble_doc,
	SI_validate_doc_id(self, doc_id));
	if (comparison < 0) { return true; }
	else if (comparison > 0) { return false; }
	}
	break;
	case COMPARE_BY_NATIVE_ORD16: {
	int32_t comparison
	= SI_compare_by_native_ord16(
	self, i, SI_validate_doc_id(self, doc_id),
	self->bubble_doc);
	if (comparison < 0) { return true; }
	else if (comparison > 0) { return false; }
	}
	break;
	case COMPARE_BY_NATIVE_ORD16_REV: {
	int32_t comparison
	= SI_compare_by_native_ord16(
	self, i, self->bubble_doc,
	SI_validate_doc_id(self, doc_id));
	if (comparison < 0) { return true; }
	else if (comparison > 0) { return false; }
	}
	break;
	case COMPARE_BY_NATIVE_ORD32: {
	int32_t comparison
	= SI_compare_by_native_ord32(
	self, i, SI_validate_doc_id(self, doc_id),
	self->bubble_doc);
	if (comparison < 0) { return true; }
	else if (comparison > 0) { return false; }
	}
	break;
	case COMPARE_BY_NATIVE_ORD32_REV: {
	int32_t comparison
	= SI_compare_by_native_ord32(
	self, i, self->bubble_doc,
	SI_validate_doc_id(self, doc_id));
	if (comparison < 0) { return true; }
	else if (comparison > 0) { return false; }
	}
	break;
	default:
	THROW(ERR, "UNEXPECTED action %u8", actions[i]);
	}
	} while (++i < self->num_actions);

	// If we've made it this far and we're still tied, reject the doc so that
	// we prefer items already in the queue. This has the effect of
	// implicitly breaking ties by doc num, since docs are collected in order.
	return false;
	}