blob: 148f46e25581e4a2d40353a9f2517b8d524bd22b [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define C_LUCY_SORTUTILS
#define LUCY_USE_SHORT_NAMES
#define CHY_USE_SHORT_NAMES
#include <string.h>
#include "Lucy/Util/SortUtils.h"
#include "Lucy/Object/Err.h"
// Define four-byte and eight-byte types so that we can dereference void
// pointers like integer pointers. The only significance of using int32_t and
// int64_t is that they are 4 and 8 bytes.
#define FOUR_BYTE_TYPE int32_t
#define EIGHT_BYTE_TYPE int64_t
/***************************** mergesort ************************************/
// Recursive merge sorting functions.
static void
S_msort4(void *velems, void *vscratch, uint32_t left, uint32_t right,
lucy_Sort_compare_t compare, void *context);
static void
S_msort8(void *velems, void *vscratch, uint32_t left, uint32_t right,
lucy_Sort_compare_t compare, void *context);
static void
S_msort_any(void *velems, void *vscratch, uint32_t left, uint32_t right,
lucy_Sort_compare_t compare, void *context, size_t width);
static INLINE void
SI_merge(void *left_vptr, uint32_t left_size,
void *right_vptr, uint32_t right_size,
void *vdest, size_t width, lucy_Sort_compare_t compare, void *context);
void
Sort_mergesort(void *elems, void *scratch, uint32_t num_elems, uint32_t width,
lucy_Sort_compare_t compare, void *context) {
// Arrays of 0 or 1 items are already sorted.
if (num_elems < 2) { return; }
// Validate.
if (num_elems >= I32_MAX) {
THROW(ERR, "Provided %u64 elems, but can't handle more than %i32",
(uint64_t)num_elems, I32_MAX);
}
// Dispatch by element size.
switch (width) {
case 0:
THROW(ERR, "Parameter 'width' cannot be 0");
break;
case 4:
S_msort4(elems, scratch, 0, num_elems - 1, compare, context);
break;
case 8:
S_msort8(elems, scratch, 0, num_elems - 1, compare, context);
break;
default:
S_msort_any(elems, scratch, 0, num_elems - 1, compare,
context, width);
break;
}
}
void
Sort_merge(void *left_ptr, uint32_t left_size,
void *right_ptr, uint32_t right_size,
void *dest, size_t width, lucy_Sort_compare_t compare,
void *context) {
switch (width) {
case 0:
THROW(ERR, "Parameter 'width' cannot be 0");
break;
case 4:
SI_merge(left_ptr, left_size, right_ptr, right_size,
dest, 4, compare, context);
break;
case 8:
SI_merge(left_ptr, left_size, right_ptr, right_size,
dest, 8, compare, context);
break;
default:
SI_merge(left_ptr, left_size, right_ptr, right_size,
dest, width, compare, context);
break;
}
}
#define WIDTH 4
static void
S_msort4(void *velems, void *vscratch, uint32_t left, uint32_t right,
lucy_Sort_compare_t compare, void *context) {
uint8_t *elems = (uint8_t*)velems;
uint8_t *scratch = (uint8_t*)vscratch;
if (right > left) {
const uint32_t mid = ((right + left) / 2) + 1;
S_msort4(elems, scratch, left, mid - 1, compare, context);
S_msort4(elems, scratch, mid, right, compare, context);
SI_merge((elems + left * WIDTH), (mid - left),
(elems + mid * WIDTH), (right - mid + 1),
scratch, WIDTH, compare, context);
memcpy((elems + left * WIDTH), scratch, ((right - left + 1) * WIDTH));
}
}
#undef WIDTH
#define WIDTH 8
static void
S_msort8(void *velems, void *vscratch, uint32_t left, uint32_t right,
lucy_Sort_compare_t compare, void *context) {
uint8_t *elems = (uint8_t*)velems;
uint8_t *scratch = (uint8_t*)vscratch;
if (right > left) {
const uint32_t mid = ((right + left) / 2) + 1;
S_msort8(elems, scratch, left, mid - 1, compare, context);
S_msort8(elems, scratch, mid, right, compare, context);
SI_merge((elems + left * WIDTH), (mid - left),
(elems + mid * WIDTH), (right - mid + 1),
scratch, WIDTH, compare, context);
memcpy((elems + left * WIDTH), scratch, ((right - left + 1) * WIDTH));
}
}
#undef WIDTH
static void
S_msort_any(void *velems, void *vscratch, uint32_t left, uint32_t right,
lucy_Sort_compare_t compare, void *context, size_t width) {
uint8_t *elems = (uint8_t*)velems;
uint8_t *scratch = (uint8_t*)vscratch;
if (right > left) {
const uint32_t mid = ((right + left) / 2) + 1;
S_msort_any(elems, scratch, left, mid - 1, compare, context, width);
S_msort_any(elems, scratch, mid, right, compare, context, width);
SI_merge((elems + left * width), (mid - left),
(elems + mid * width), (right - mid + 1),
scratch, width, compare, context);
memcpy((elems + left * width), scratch, ((right - left + 1) * width));
}
}
static INLINE void
SI_merge(void *left_vptr, uint32_t left_size,
void *right_vptr, uint32_t right_size,
void *vdest, size_t width, lucy_Sort_compare_t compare,
void *context) {
uint8_t *left_ptr = (uint8_t*)left_vptr;
uint8_t *right_ptr = (uint8_t*)right_vptr;
uint8_t *left_limit = left_ptr + left_size * width;
uint8_t *right_limit = right_ptr + right_size * width;
uint8_t *dest = (uint8_t*)vdest;
while (left_ptr < left_limit && right_ptr < right_limit) {
if (compare(context, left_ptr, right_ptr) < 1) {
memcpy(dest, left_ptr, width);
dest += width;
left_ptr += width;
}
else {
memcpy(dest, right_ptr, width);
dest += width;
right_ptr += width;
}
}
const size_t left_remaining = left_limit - left_ptr;
memcpy(dest, left_ptr, left_remaining);
dest += left_remaining;
const size_t right_remaining = right_limit - right_ptr;
memcpy(dest, right_ptr, right_remaining);
}
/***************************** quicksort ************************************/
// Quicksort implementations optimized for four-byte and eight-byte elements.
static void
S_qsort4(FOUR_BYTE_TYPE *elems, int32_t left, int32_t right,
lucy_Sort_compare_t compare, void *context);
static void
S_qsort8(EIGHT_BYTE_TYPE *elems, int32_t left, int32_t right,
lucy_Sort_compare_t compare, void *context);
// Swap two elements.
static INLINE void
SI_exchange4(FOUR_BYTE_TYPE *elems, int32_t left, int32_t right);
static INLINE void
SI_exchange8(EIGHT_BYTE_TYPE *elems, int32_t left, int32_t right);
/* Select a pivot by choosing the median of three values, guarding against
* the worst-case behavior of quicksort. Place the pivot in the rightmost
* slot.
*
* Possible states:
*
* abc => abc => abc => acb
* acb => acb => acb => acb
* bac => abc => abc => acb
* bca => bca => acb => acb
* cba => bca => acb => acb
* cab => acb => acb => acb
* aab => aab => aab => aba
* aba => aba => aba => aba
* baa => aba => aba => aba
* bba => bba => abb => abb
* bab => abb => abb => abb
* abb => abb => abb => abb
* aaa => aaa => aaa => aaa
*/
static INLINE FOUR_BYTE_TYPE*
SI_choose_pivot4(FOUR_BYTE_TYPE *elems, int32_t left, int32_t right,
lucy_Sort_compare_t compare, void *context);
static INLINE EIGHT_BYTE_TYPE*
SI_choose_pivot8(EIGHT_BYTE_TYPE *elems, int32_t left, int32_t right,
lucy_Sort_compare_t compare, void *context);
void
Sort_quicksort(void *elems, size_t num_elems, size_t width,
lucy_Sort_compare_t compare, void *context) {
// Arrays of 0 or 1 items are already sorted.
if (num_elems < 2) { return; }
// Validate.
if (num_elems >= I32_MAX) {
THROW(ERR, "Provided %u64 elems, but can't handle more than %i32",
(uint64_t)num_elems, I32_MAX);
}
if (width == 4) {
S_qsort4((FOUR_BYTE_TYPE*)elems, 0, num_elems - 1, compare, context);
}
else if (width == 8) {
S_qsort8((EIGHT_BYTE_TYPE*)elems, 0, num_elems - 1, compare, context);
}
else {
THROW(ERR, "Unsupported width: %i64", (int64_t)width);
}
}
/************************* quicksort 4 byte *********************************/
static INLINE void
SI_exchange4(FOUR_BYTE_TYPE *elems, int32_t left, int32_t right) {
FOUR_BYTE_TYPE saved = elems[left];
elems[left] = elems[right];
elems[right] = saved;
}
static INLINE FOUR_BYTE_TYPE*
SI_choose_pivot4(FOUR_BYTE_TYPE *elems, int32_t left, int32_t right,
lucy_Sort_compare_t compare, void *context) {
if (right - left > 1) {
int32_t mid = left + (right - left) / 2;
if (compare(context, elems + left, elems + mid) > 0) {
SI_exchange4(elems, left, mid);
}
if (compare(context, elems + left, elems + right) > 0) {
SI_exchange4(elems, left, right);
}
if (compare(context, elems + right, elems + mid) > 0) {
SI_exchange4(elems, right, mid);
}
}
return elems + right;
}
static void
S_qsort4(FOUR_BYTE_TYPE *elems, int32_t left, int32_t right,
lucy_Sort_compare_t compare, void *context) {
FOUR_BYTE_TYPE *const pivot
= SI_choose_pivot4(elems, left, right, compare, context);
int32_t i = left - 1;
int32_t j = right;
int32_t p = left - 1;
int32_t q = right;
if (right <= left) { return; }
/* TODO: A standard optimization for quicksort is to fall back to an
* insertion sort when the the number of elements to be sorted becomes
* small enough. */
while (1) {
int comparison1;
int comparison2;
// Find an element from the left that is greater than or equal to the
// pivot (i.e. that should move to the right).
while (1) {
i++;
comparison1 = compare(context, elems + i, pivot);
if (comparison1 >= 0) { break; }
}
// Find an element from the right that is less than or equal to the
// pivot (i.e. that should move to the left).
while (1) {
j--;
comparison2 = compare(context, elems + j, pivot);
if (comparison2 <= 0) { break; }
if (j == left) { break; }
}
// Bail out of loop when we meet in the middle.
if (i >= j) { break; }
// Swap the elements we found, so the lesser element moves left and
// the greater element moves right.
SI_exchange4(elems, i, j);
// Move any elements which test as "equal" to the pivot to the outside
// edges of the array.
if (comparison2 == 0) {
p++;
SI_exchange4(elems, p, i);
}
if (comparison1 == 0) {
q--;
SI_exchange4(elems, j, q);
}
}
/* Move "equal" elements from the outside edges to the center.
*
* Before:
*
* equal | less_than | greater_than | equal
*
* After:
*
* less_than | equal | greater_than
*/
SI_exchange4(elems, i, right);
j = i - 1;
i++;
for (int32_t k = left; k < p; k++, j--) { SI_exchange4(elems, k, j); }
for (int32_t k = right - 1; k > q; k--, i++) { SI_exchange4(elems, i, k); }
// Recurse.
S_qsort4(elems, left, j, compare, context); // Sort less_than.
S_qsort4(elems, i, right, compare, context); // Sort greater_than.
}
/************************* quicksort 8 byte *********************************/
static INLINE void
SI_exchange8(EIGHT_BYTE_TYPE *elems, int32_t left, int32_t right) {
EIGHT_BYTE_TYPE saved = elems[left];
elems[left] = elems[right];
elems[right] = saved;
}
static INLINE EIGHT_BYTE_TYPE*
SI_choose_pivot8(EIGHT_BYTE_TYPE *elems, int32_t left, int32_t right,
lucy_Sort_compare_t compare, void *context) {
if (right - left > 1) {
int32_t mid = left + (right - left) / 2;
if (compare(context, elems + left, elems + mid) > 0) {
SI_exchange8(elems, left, mid);
}
if (compare(context, elems + left, elems + right) > 0) {
SI_exchange8(elems, left, right);
}
if (compare(context, elems + right, elems + mid) > 0) {
SI_exchange8(elems, right, mid);
}
}
return elems + right;
}
static void
S_qsort8(EIGHT_BYTE_TYPE *elems, int32_t left, int32_t right,
lucy_Sort_compare_t compare, void *context) {
EIGHT_BYTE_TYPE *const pivot
= SI_choose_pivot8(elems, left, right, compare, context);
int32_t i = left - 1;
int32_t j = right;
int32_t p = left - 1;
int32_t q = right;
if (right <= left) { return; }
/* TODO: A standard optimization for quicksort is to fall back to an
* insertion sort when the the number of elements to be sorted becomes
* small enough. */
while (1) {
int comparison1;
int comparison2;
// Find an element from the left that is greater than or equal to the
// pivot (i.e. that should move to the right).
while (1) {
i++;
comparison1 = compare(context, elems + i, pivot);
if (comparison1 >= 0) { break; }
}
// Find an element from the right that is less than or equal to the
// pivot (i.e. that should move to the left).
while (1) {
j--;
comparison2 = compare(context, elems + j, pivot);
if (comparison2 <= 0) { break; }
if (j == left) { break; }
}
// Bail out of loop when we meet in the middle.
if (i >= j) { break; }
// Swap the elements we found, so the lesser element moves left and
// the greater element moves right.
SI_exchange8(elems, i, j);
// Move any elements which test as "equal" to the pivot to the outside
// edges of the array.
if (comparison2 == 0) {
p++;
SI_exchange8(elems, p, i);
}
if (comparison1 == 0) {
q--;
SI_exchange8(elems, j, q);
}
}
/* Move "equal" elements from the outside edges to the center.
*
* Before:
*
* equal | less_than | greater_than | equal
*
* After:
*
* less_than | equal | greater_than
*/
int32_t k;
SI_exchange8(elems, i, right);
j = i - 1;
i++;
for (k = left; k < p; k++, j--) { SI_exchange8(elems, k, j); }
for (k = right - 1; k > q; k--, i++) { SI_exchange8(elems, i, k); }
// Recurse.
S_qsort8(elems, left, j, compare, context); // Sort less_than.
S_qsort8(elems, i, right, compare, context); // Sort greater_than.
}