Not to hijack the thread, but:

Originally Posted by

**rcgldr** It's not much of a radix sort

Here is a real radix sort, which sorts about ten million unsigned 64-bit pseudorandom integers (tested using 64-bit xorshift) with an associated pointer payload in less than two seconds on my machine:

Code:

#include <stdint.h> /* for uint64_t */
#include <string.h> /* for memset() */
/* 64-bit unsigned integer radix sort, with a pointer payload, using 8 8-bit bins.
* Pointers may alias each other, as long as
* src_key != tmp1key
* src_ptr != tmp1key
* tmp1key != tmp2key
* tmp1ptr != tmp2ptr
* tmp2key != tmp3key
* tmp2ptr != tmp3ptr
* dst_key != tmp3key
* dst_ptr != tmp3ptr
* In particular,
* src_key == tmp2key == dst_key
* tmp1key == tmp3key
* src_ptr == tmp2ptr == dst_ptr
* tmp1ptr == tmp3ptr
* is perfectly okay.
*/
static void radix_sort_u64_ptr(uint64_t *const dst_key, void * *const dst_ptr,
uint64_t *const tmp3key, void * *const tmp3ptr,
uint64_t *const tmp2key, void * *const tmp2ptr,
uint64_t *const tmp1key, void * *const tmp1ptr,
const uint64_t *const src_key, void *const *const src_ptr,
const size_t len)
{
size_t count[256];
size_t i, sum;
/* Bits 0..7: tmp1 <- src. */
memset(count, 0, 256 * sizeof count[0]);
for (i = 0; i < len; i++)
count[src_key[i] & 255]++;
for (i = 0, sum = 0; i < 256; i++) {
const size_t start = sum;
sum += count[i];
count[i] = start;
}
for (i = 0; i < len; i++) {
const size_t offset = count[src_key[i] & 255]++;
tmp1key[offset] = src_key[i];
tmp1ptr[offset] = src_ptr[i];
}
/* Bits 8..15: tmp2 <- tmp1. */
memset(count, 0, 256 * sizeof count[0]);
for (i = 0; i < len; i++)
count[(tmp1key[i] >> 8) & 255]++;
for (i = 0, sum = 0; i < 256; i++) {
const size_t start = sum;
sum += count[i];
count[i] = start;
}
for (i = 0; i < len; i++) {
const size_t offset = count[(tmp1key[i] >> 8) & 255]++;
tmp2key[offset] = tmp1key[i];
tmp2ptr[offset] = tmp1ptr[i];
}
/* Bits 16..23: tmp1 <- tmp2. */
memset(count, 0, 256 * sizeof count[0]);
for (i = 0; i < len; i++)
count[(tmp2key[i] >> 16) & 255]++;
for (i = 0, sum = 0; i < 256; i++) {
const size_t start = sum;
sum += count[i];
count[i] = start;
}
for (i = 0; i < len; i++) {
const size_t offset = count[(tmp2key[i] >> 16) & 255]++;
tmp1key[offset] = tmp2key[i];
tmp1ptr[offset] = tmp2ptr[i];
}
/* Bits 24..31: tmp2 <- tmp1. */
memset(count, 0, 256 * sizeof count[0]);
for (i = 0; i < len; i++)
count[(tmp1key[i] >> 24) & 255]++;
for (i = 0, sum = 0; i < 256; i++) {
const size_t start = sum;
sum += count[i];
count[i] = start;
}
for (i = 0; i < len; i++) {
const size_t offset = count[(tmp1key[i] >> 24) & 255]++;
tmp2key[offset] = tmp1key[i];
tmp2ptr[offset] = tmp1ptr[i];
}
/* Bits 32..39: tmp1 <- tmp2. */
memset(count, 0, 256 * sizeof count[0]);
for (i = 0; i < len; i++)
count[(tmp2key[i] >> 32) & 255]++;
for (i = 0, sum = 0; i < 256; i++) {
const size_t start = sum;
sum += count[i];
count[i] = start;
}
for (i = 0; i < len; i++) {
const size_t offset = count[(tmp2key[i] >> 32) & 255]++;
tmp1key[offset] = tmp2key[i];
tmp1ptr[offset] = tmp2ptr[i];
}
/* Bits 40..47: tmp2 <- tmp1. */
memset(count, 0, 256 * sizeof count[0]);
for (i = 0; i < len; i++)
count[(tmp1key[i] >> 40) & 255]++;
for (i = 0, sum = 0; i < 256; i++) {
const size_t start = sum;
sum += count[i];
count[i] = start;
}
for (i = 0; i < len; i++) {
const size_t offset = count[(tmp1key[i] >> 40) & 255]++;
tmp2key[offset] = tmp1key[i];
tmp2ptr[offset] = tmp1ptr[i];
}
/* Bits 48..55: tmp3 <- tmp2. */
memset(count, 0, 256 * sizeof count[0]);
for (i = 0; i < len; i++)
count[(tmp2key[i] >> 48) & 255]++;
for (i = 0, sum = 0; i < 256; i++) {
const size_t start = sum;
sum += count[i];
count[i] = start;
}
for (i = 0; i < len; i++) {
const size_t offset = count[(tmp2key[i] >> 48) & 255]++;
tmp3key[offset] = tmp2key[i];
tmp3ptr[offset] = tmp2ptr[i];
}
/* Bits 56..63: dst <- tmp3. */
memset(count, 0, 256 * sizeof count[0]);
for (i = 0; i < len; i++)
count[(tmp3key[i] >> 56) & 255]++;
for (i = 0, sum = 0; i < 256; i++) {
const size_t start = sum;
sum += count[i];
count[i] = start;
}
for (i = 0; i < len; i++) {
const size_t offset = count[(tmp3key[i] >> 56) & 255]++;
dst_key[offset] = tmp3key[i];
dst_ptr[offset] = tmp3ptr[i];
}
}

To sort a dataset of *N* entries, you need one temporary array of *N* entries also, plus a constant kilobyte or so of stack when calling the function.

(If you wish to keep the original data intact, you need two arrays of *N* entries; one for the results, and one for the temporaries.)

The reason the above has more than two pointer arguments is that the same function can be used to sort e.g. *double*s (if it uses IEEE-754 Binary64 representation and the same byte order as integer types), with two extra passes over the data. Signed integers (assuming two's complement format) need a similar adjustment:

Code:

#define SIGN64 ((uint64_t)9223372036854775808.0)
int radix_sort_double_ptr(double *const keys, void **const data, const size_t len)
{
uint64_t *const dst_key = (uint64_t *)keys;
void **const dst_ptr = data;
uint64_t *tmp_key;
void **tmp_ptr;
size_t i;
if (len < 2)
return 0;
if (sizeof (double) != sizeof (uint64_t))
return errno = ENOTSUP;
tmp_key = malloc(len * sizeof (uint64_t));
tmp_ptr = malloc(len * sizeof (void *));
if (!tmp_key || !tmp_ptr) {
free(tmp_ptr);
free(tmp_key);
return errno = ENOMEM;
}
for (i = 0; i < len; i++)
tmp_key[i] = (dst_key[i] & SIGN64) ? ~dst_key[i] : dst_key[i] ^ SIGN64;
radix_sort_u64_ptr(tmp_key, dst_ptr,
dst_key, tmp_ptr,
tmp_key, dst_ptr,
dst_key, tmp_ptr,
tmp_key, dst_ptr, len);
for (i = 0; i < len; i++)
dst_key[i] = (tmp_key[i] & SIGN64) ? tmp_key[i] ^ SIGN64 : ~tmp_key[i];
free(tmp_ptr);
free(tmp_key);
return 0;
}
int radix_sort_i64_ptr(int64_t *const keys, void **const data, const size_t len)
{
uint64_t *const dst_key = (uint64_t *)keys;
void **const dst_ptr = data;
uint64_t *tmp_key;
void **tmp_ptr;
size_t i;
if (len < 2)
return 0;
tmp_key = malloc(len * sizeof (uint64_t));
tmp_ptr = malloc(len * sizeof (void *));
if (!tmp_key || !tmp_ptr) {
free(tmp_ptr);
free(tmp_key);
return errno = ENOMEM;
}
for (i = 0; i < len; i++)
tmp_key[i] = dst_key[i] + SIGN64;
radix_sort_u64_ptr(tmp_key, dst_ptr,
dst_key, tmp_ptr,
tmp_key, dst_ptr,
dst_key, tmp_ptr,
tmp_key, dst_ptr, len);
for (i = 0; i < len; i++)
dst_key[i] = tmp_key[i] - SIGN64;
free(tmp_ptr);
free(tmp_key);
return 0;
}