Not to hijack the thread, but:
Originally Posted by
rcgldr
It's not much of a radix sort
Here is a real radix sort, which sorts about ten million unsigned 64-bit pseudorandom integers (tested using 64-bit xorshift) with an associated pointer payload in less than two seconds on my machine:
Code:
#include <stdint.h> /* for uint64_t */
#include <string.h> /* for memset() */
/* 64-bit unsigned integer radix sort, with a pointer payload, using 8 8-bit bins.
* Pointers may alias each other, as long as
* src_key != tmp1key
* src_ptr != tmp1key
* tmp1key != tmp2key
* tmp1ptr != tmp2ptr
* tmp2key != tmp3key
* tmp2ptr != tmp3ptr
* dst_key != tmp3key
* dst_ptr != tmp3ptr
* In particular,
* src_key == tmp2key == dst_key
* tmp1key == tmp3key
* src_ptr == tmp2ptr == dst_ptr
* tmp1ptr == tmp3ptr
* is perfectly okay.
*/
static void radix_sort_u64_ptr(uint64_t *const dst_key, void * *const dst_ptr,
uint64_t *const tmp3key, void * *const tmp3ptr,
uint64_t *const tmp2key, void * *const tmp2ptr,
uint64_t *const tmp1key, void * *const tmp1ptr,
const uint64_t *const src_key, void *const *const src_ptr,
const size_t len)
{
size_t count[256];
size_t i, sum;
/* Bits 0..7: tmp1 <- src. */
memset(count, 0, 256 * sizeof count[0]);
for (i = 0; i < len; i++)
count[src_key[i] & 255]++;
for (i = 0, sum = 0; i < 256; i++) {
const size_t start = sum;
sum += count[i];
count[i] = start;
}
for (i = 0; i < len; i++) {
const size_t offset = count[src_key[i] & 255]++;
tmp1key[offset] = src_key[i];
tmp1ptr[offset] = src_ptr[i];
}
/* Bits 8..15: tmp2 <- tmp1. */
memset(count, 0, 256 * sizeof count[0]);
for (i = 0; i < len; i++)
count[(tmp1key[i] >> 8) & 255]++;
for (i = 0, sum = 0; i < 256; i++) {
const size_t start = sum;
sum += count[i];
count[i] = start;
}
for (i = 0; i < len; i++) {
const size_t offset = count[(tmp1key[i] >> 8) & 255]++;
tmp2key[offset] = tmp1key[i];
tmp2ptr[offset] = tmp1ptr[i];
}
/* Bits 16..23: tmp1 <- tmp2. */
memset(count, 0, 256 * sizeof count[0]);
for (i = 0; i < len; i++)
count[(tmp2key[i] >> 16) & 255]++;
for (i = 0, sum = 0; i < 256; i++) {
const size_t start = sum;
sum += count[i];
count[i] = start;
}
for (i = 0; i < len; i++) {
const size_t offset = count[(tmp2key[i] >> 16) & 255]++;
tmp1key[offset] = tmp2key[i];
tmp1ptr[offset] = tmp2ptr[i];
}
/* Bits 24..31: tmp2 <- tmp1. */
memset(count, 0, 256 * sizeof count[0]);
for (i = 0; i < len; i++)
count[(tmp1key[i] >> 24) & 255]++;
for (i = 0, sum = 0; i < 256; i++) {
const size_t start = sum;
sum += count[i];
count[i] = start;
}
for (i = 0; i < len; i++) {
const size_t offset = count[(tmp1key[i] >> 24) & 255]++;
tmp2key[offset] = tmp1key[i];
tmp2ptr[offset] = tmp1ptr[i];
}
/* Bits 32..39: tmp1 <- tmp2. */
memset(count, 0, 256 * sizeof count[0]);
for (i = 0; i < len; i++)
count[(tmp2key[i] >> 32) & 255]++;
for (i = 0, sum = 0; i < 256; i++) {
const size_t start = sum;
sum += count[i];
count[i] = start;
}
for (i = 0; i < len; i++) {
const size_t offset = count[(tmp2key[i] >> 32) & 255]++;
tmp1key[offset] = tmp2key[i];
tmp1ptr[offset] = tmp2ptr[i];
}
/* Bits 40..47: tmp2 <- tmp1. */
memset(count, 0, 256 * sizeof count[0]);
for (i = 0; i < len; i++)
count[(tmp1key[i] >> 40) & 255]++;
for (i = 0, sum = 0; i < 256; i++) {
const size_t start = sum;
sum += count[i];
count[i] = start;
}
for (i = 0; i < len; i++) {
const size_t offset = count[(tmp1key[i] >> 40) & 255]++;
tmp2key[offset] = tmp1key[i];
tmp2ptr[offset] = tmp1ptr[i];
}
/* Bits 48..55: tmp3 <- tmp2. */
memset(count, 0, 256 * sizeof count[0]);
for (i = 0; i < len; i++)
count[(tmp2key[i] >> 48) & 255]++;
for (i = 0, sum = 0; i < 256; i++) {
const size_t start = sum;
sum += count[i];
count[i] = start;
}
for (i = 0; i < len; i++) {
const size_t offset = count[(tmp2key[i] >> 48) & 255]++;
tmp3key[offset] = tmp2key[i];
tmp3ptr[offset] = tmp2ptr[i];
}
/* Bits 56..63: dst <- tmp3. */
memset(count, 0, 256 * sizeof count[0]);
for (i = 0; i < len; i++)
count[(tmp3key[i] >> 56) & 255]++;
for (i = 0, sum = 0; i < 256; i++) {
const size_t start = sum;
sum += count[i];
count[i] = start;
}
for (i = 0; i < len; i++) {
const size_t offset = count[(tmp3key[i] >> 56) & 255]++;
dst_key[offset] = tmp3key[i];
dst_ptr[offset] = tmp3ptr[i];
}
}
To sort a dataset of N entries, you need one temporary array of N entries also, plus a constant kilobyte or so of stack when calling the function.
(If you wish to keep the original data intact, you need two arrays of N entries; one for the results, and one for the temporaries.)
The reason the above has more than two pointer arguments is that the same function can be used to sort e.g. doubles (if it uses IEEE-754 Binary64 representation and the same byte order as integer types), with two extra passes over the data. Signed integers (assuming two's complement format) need a similar adjustment:
Code:
#define SIGN64 ((uint64_t)9223372036854775808.0)
int radix_sort_double_ptr(double *const keys, void **const data, const size_t len)
{
uint64_t *const dst_key = (uint64_t *)keys;
void **const dst_ptr = data;
uint64_t *tmp_key;
void **tmp_ptr;
size_t i;
if (len < 2)
return 0;
if (sizeof (double) != sizeof (uint64_t))
return errno = ENOTSUP;
tmp_key = malloc(len * sizeof (uint64_t));
tmp_ptr = malloc(len * sizeof (void *));
if (!tmp_key || !tmp_ptr) {
free(tmp_ptr);
free(tmp_key);
return errno = ENOMEM;
}
for (i = 0; i < len; i++)
tmp_key[i] = (dst_key[i] & SIGN64) ? ~dst_key[i] : dst_key[i] ^ SIGN64;
radix_sort_u64_ptr(tmp_key, dst_ptr,
dst_key, tmp_ptr,
tmp_key, dst_ptr,
dst_key, tmp_ptr,
tmp_key, dst_ptr, len);
for (i = 0; i < len; i++)
dst_key[i] = (tmp_key[i] & SIGN64) ? tmp_key[i] ^ SIGN64 : ~tmp_key[i];
free(tmp_ptr);
free(tmp_key);
return 0;
}
int radix_sort_i64_ptr(int64_t *const keys, void **const data, const size_t len)
{
uint64_t *const dst_key = (uint64_t *)keys;
void **const dst_ptr = data;
uint64_t *tmp_key;
void **tmp_ptr;
size_t i;
if (len < 2)
return 0;
tmp_key = malloc(len * sizeof (uint64_t));
tmp_ptr = malloc(len * sizeof (void *));
if (!tmp_key || !tmp_ptr) {
free(tmp_ptr);
free(tmp_key);
return errno = ENOMEM;
}
for (i = 0; i < len; i++)
tmp_key[i] = dst_key[i] + SIGN64;
radix_sort_u64_ptr(tmp_key, dst_ptr,
dst_key, tmp_ptr,
tmp_key, dst_ptr,
dst_key, tmp_ptr,
tmp_key, dst_ptr, len);
for (i = 0; i < len; i++)
dst_key[i] = tmp_key[i] - SIGN64;
free(tmp_ptr);
free(tmp_key);
return 0;
}