Alan Modra | 250d07d | 2021-01-01 09:28:58 +1030 | [diff] [blame] | 1 | /* Copyright (C) 1991-2021 Free Software Foundation, Inc. |
Jose E. Marchesi | a0486ba | 2019-05-31 11:10:51 +0200 | [diff] [blame] | 2 | This file is part of libctf (imported from Gnulib). |
| 3 | Written by Douglas C. Schmidt (schmidt@ics.uci.edu). |
| 4 | |
| 5 | The GNU C Library is free software; you can redistribute it and/or |
| 6 | modify it under the terms of the GNU Lesser General Public |
| 7 | License as published by the Free Software Foundation; either |
| 8 | version 2.1 of the License, or (at your option) any later version. |
| 9 | |
| 10 | The GNU C Library is distributed in the hope that it will be useful, |
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | Lesser General Public License for more details. |
| 14 | |
| 15 | You should have received a copy of the GNU Lesser General Public |
| 16 | License along with the GNU C Library; if not, see |
| 17 | <https://www.gnu.org/licenses/>. */ |
| 18 | |
| 19 | /* If you consider tuning this algorithm, you should consult first: |
| 20 | Engineering a sort function; Jon Bentley and M. Douglas McIlroy; |
| 21 | Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */ |
| 22 | |
| 23 | #ifndef _LIBC |
| 24 | # include <config.h> |
| 25 | #endif |
| 26 | |
| 27 | #include <limits.h> |
| 28 | #include <stdlib.h> |
| 29 | #include <string.h> |
| 30 | #include "ctf-decls.h" |
| 31 | |
| 32 | #ifndef _LIBC |
Nick Alcock | 6b22174 | 2019-06-03 14:02:09 +0100 | [diff] [blame] | 33 | # define _quicksort ctf_qsort_r |
Jose E. Marchesi | a0486ba | 2019-05-31 11:10:51 +0200 | [diff] [blame] | 34 | # define __compar_d_fn_t compar_d_fn_t |
| 35 | typedef int (*compar_d_fn_t) (const void *, const void *, void *); |
| 36 | #endif |
| 37 | |
| 38 | /* Byte-wise swap two items of size SIZE. */ |
| 39 | #define SWAP(a, b, size) \ |
| 40 | do \ |
| 41 | { \ |
| 42 | size_t __size = (size); \ |
| 43 | char *__a = (a), *__b = (b); \ |
| 44 | do \ |
| 45 | { \ |
| 46 | char __tmp = *__a; \ |
| 47 | *__a++ = *__b; \ |
| 48 | *__b++ = __tmp; \ |
| 49 | } while (--__size > 0); \ |
| 50 | } while (0) |
| 51 | |
| 52 | /* Discontinue quicksort algorithm when partition gets below this size. |
| 53 | This particular magic number was chosen to work best on a Sun 4/260. */ |
| 54 | #define MAX_THRESH 4 |
| 55 | |
| 56 | /* Stack node declarations used to store unfulfilled partition obligations. */ |
| 57 | typedef struct |
| 58 | { |
| 59 | char *lo; |
| 60 | char *hi; |
| 61 | } stack_node; |
| 62 | |
| 63 | /* The next 4 #defines implement a very fast in-line stack abstraction. */ |
| 64 | /* The stack needs log (total_elements) entries (we could even subtract |
| 65 | log(MAX_THRESH)). Since total_elements has type size_t, we get as |
| 66 | upper bound for log (total_elements): |
| 67 | bits per byte (CHAR_BIT) * sizeof(size_t). */ |
| 68 | #define STACK_SIZE (CHAR_BIT * sizeof(size_t)) |
| 69 | #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top)) |
| 70 | #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi))) |
| 71 | #define STACK_NOT_EMPTY (stack < top) |
| 72 | |
| 73 | |
| 74 | /* Order size using quicksort. This implementation incorporates |
| 75 | four optimizations discussed in Sedgewick: |
| 76 | |
| 77 | 1. Non-recursive, using an explicit stack of pointer that store the |
| 78 | next array partition to sort. To save time, this maximum amount |
| 79 | of space required to store an array of SIZE_MAX is allocated on the |
| 80 | stack. Assuming a 32-bit (64 bit) integer for size_t, this needs |
| 81 | only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes). |
| 82 | Pretty cheap, actually. |
| 83 | |
| 84 | 2. Chose the pivot element using a median-of-three decision tree. |
| 85 | This reduces the probability of selecting a bad pivot value and |
| 86 | eliminates certain extraneous comparisons. |
| 87 | |
| 88 | 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving |
| 89 | insertion sort to order the MAX_THRESH items within each partition. |
| 90 | This is a big win, since insertion sort is faster for small, mostly |
| 91 | sorted array segments. |
| 92 | |
| 93 | 4. The larger of the two sub-partitions is always pushed onto the |
| 94 | stack first, with the algorithm then concentrating on the |
| 95 | smaller partition. This *guarantees* no more than log (total_elems) |
| 96 | stack size is needed (actually O(1) in this case)! */ |
| 97 | |
| 98 | void |
| 99 | _quicksort (void *const pbase, size_t total_elems, size_t size, |
| 100 | __compar_d_fn_t cmp, void *arg) |
| 101 | { |
| 102 | char *base_ptr = (char *) pbase; |
| 103 | |
| 104 | const size_t max_thresh = MAX_THRESH * size; |
| 105 | |
| 106 | if (total_elems == 0) |
| 107 | /* Avoid lossage with unsigned arithmetic below. */ |
| 108 | return; |
| 109 | |
| 110 | if (total_elems > MAX_THRESH) |
| 111 | { |
| 112 | char *lo = base_ptr; |
| 113 | char *hi = &lo[size * (total_elems - 1)]; |
| 114 | stack_node stack[STACK_SIZE]; |
| 115 | stack_node *top = stack; |
| 116 | |
| 117 | PUSH (NULL, NULL); |
| 118 | |
| 119 | while (STACK_NOT_EMPTY) |
| 120 | { |
| 121 | char *left_ptr; |
| 122 | char *right_ptr; |
| 123 | |
| 124 | /* Select median value from among LO, MID, and HI. Rearrange |
| 125 | LO and HI so the three values are sorted. This lowers the |
| 126 | probability of picking a pathological pivot value and |
| 127 | skips a comparison for both the LEFT_PTR and RIGHT_PTR in |
| 128 | the while loops. */ |
| 129 | |
| 130 | char *mid = lo + size * ((hi - lo) / size >> 1); |
| 131 | |
| 132 | if ((*cmp) ((void *) mid, (void *) lo, arg) < 0) |
| 133 | SWAP (mid, lo, size); |
| 134 | if ((*cmp) ((void *) hi, (void *) mid, arg) < 0) |
| 135 | SWAP (mid, hi, size); |
| 136 | else |
| 137 | goto jump_over; |
| 138 | if ((*cmp) ((void *) mid, (void *) lo, arg) < 0) |
| 139 | SWAP (mid, lo, size); |
| 140 | jump_over:; |
| 141 | |
| 142 | left_ptr = lo + size; |
| 143 | right_ptr = hi - size; |
| 144 | |
| 145 | /* Here's the famous ``collapse the walls'' section of quicksort. |
| 146 | Gotta like those tight inner loops! They are the main reason |
| 147 | that this algorithm runs much faster than others. */ |
| 148 | do |
| 149 | { |
| 150 | while ((*cmp) ((void *) left_ptr, (void *) mid, arg) < 0) |
| 151 | left_ptr += size; |
| 152 | |
| 153 | while ((*cmp) ((void *) mid, (void *) right_ptr, arg) < 0) |
| 154 | right_ptr -= size; |
| 155 | |
| 156 | if (left_ptr < right_ptr) |
| 157 | { |
| 158 | SWAP (left_ptr, right_ptr, size); |
| 159 | if (mid == left_ptr) |
| 160 | mid = right_ptr; |
| 161 | else if (mid == right_ptr) |
| 162 | mid = left_ptr; |
| 163 | left_ptr += size; |
| 164 | right_ptr -= size; |
| 165 | } |
| 166 | else if (left_ptr == right_ptr) |
| 167 | { |
| 168 | left_ptr += size; |
| 169 | right_ptr -= size; |
| 170 | break; |
| 171 | } |
| 172 | } |
| 173 | while (left_ptr <= right_ptr); |
| 174 | |
| 175 | /* Set up pointers for next iteration. First determine whether |
| 176 | left and right partitions are below the threshold size. If so, |
| 177 | ignore one or both. Otherwise, push the larger partition's |
| 178 | bounds on the stack and continue sorting the smaller one. */ |
| 179 | |
| 180 | if ((size_t) (right_ptr - lo) <= max_thresh) |
| 181 | { |
| 182 | if ((size_t) (hi - left_ptr) <= max_thresh) |
| 183 | /* Ignore both small partitions. */ |
| 184 | POP (lo, hi); |
| 185 | else |
| 186 | /* Ignore small left partition. */ |
| 187 | lo = left_ptr; |
| 188 | } |
| 189 | else if ((size_t) (hi - left_ptr) <= max_thresh) |
| 190 | /* Ignore small right partition. */ |
| 191 | hi = right_ptr; |
| 192 | else if ((right_ptr - lo) > (hi - left_ptr)) |
| 193 | { |
| 194 | /* Push larger left partition indices. */ |
| 195 | PUSH (lo, right_ptr); |
| 196 | lo = left_ptr; |
| 197 | } |
| 198 | else |
| 199 | { |
| 200 | /* Push larger right partition indices. */ |
| 201 | PUSH (left_ptr, hi); |
| 202 | hi = right_ptr; |
| 203 | } |
| 204 | } |
| 205 | } |
| 206 | |
| 207 | /* Once the BASE_PTR array is partially sorted by quicksort the rest |
| 208 | is completely sorted using insertion sort, since this is efficient |
| 209 | for partitions below MAX_THRESH size. BASE_PTR points to the beginning |
| 210 | of the array to sort, and END_PTR points at the very last element in |
| 211 | the array (*not* one beyond it!). */ |
| 212 | |
| 213 | #define min(x, y) ((x) < (y) ? (x) : (y)) |
| 214 | |
| 215 | { |
| 216 | char *const end_ptr = &base_ptr[size * (total_elems - 1)]; |
| 217 | char *tmp_ptr = base_ptr; |
| 218 | char *thresh = min(end_ptr, base_ptr + max_thresh); |
| 219 | char *run_ptr; |
| 220 | |
| 221 | /* Find smallest element in first threshold and place it at the |
| 222 | array's beginning. This is the smallest array element, |
| 223 | and the operation speeds up insertion sort's inner loop. */ |
| 224 | |
| 225 | for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size) |
| 226 | if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0) |
| 227 | tmp_ptr = run_ptr; |
| 228 | |
| 229 | if (tmp_ptr != base_ptr) |
| 230 | SWAP (tmp_ptr, base_ptr, size); |
| 231 | |
| 232 | /* Insertion sort, running from left-hand-side up to right-hand-side. */ |
| 233 | |
| 234 | run_ptr = base_ptr + size; |
| 235 | while ((run_ptr += size) <= end_ptr) |
| 236 | { |
| 237 | tmp_ptr = run_ptr - size; |
| 238 | while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, arg) < 0) |
| 239 | tmp_ptr -= size; |
| 240 | |
| 241 | tmp_ptr += size; |
| 242 | if (tmp_ptr != run_ptr) |
| 243 | { |
| 244 | char *trav; |
| 245 | |
| 246 | trav = run_ptr + size; |
| 247 | while (--trav >= run_ptr) |
| 248 | { |
| 249 | char c = *trav; |
| 250 | char *hi, *lo; |
| 251 | |
| 252 | for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo) |
| 253 | *hi = *lo; |
| 254 | *hi = c; |
| 255 | } |
| 256 | } |
| 257 | } |
| 258 | } |
| 259 | } |