Code:
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include <stdio.h>
typedef struct {
const char *ptr;
size_t len;
/* Sort key? */
long keyval;
const char *keyptr;
size_t keylen;
} line_t;
typedef struct {
int map_desc; /* File descriptor */
size_t map_size; /* Map size */
size_t map_used; /* File size */
const char *map_data; /* Memory-mapped data */
size_t lines; /* Number of lines */
line_t *line; /* Array of lines, includes newlines */
} file_t;
/* Compare two lines.
* This sorts by keyval, then case insensitively at keyptr.
*/
static int compare_lines(const void *left, const void *right)
{
const long val1 = ((const line_t *const)left)->keyval;
const char *key1 = ((const line_t *const)left)->keyptr;
const size_t len1 = ((const line_t *const)left)->keylen;
const long val2 = ((const line_t *const)right)->keyval;
const char *key2 = ((const line_t *const)right)->keyptr;
const size_t len2 = ((const line_t *const)right)->keylen;
if (val1 < val2)
return -1;
else
if (val1 > val2)
return +1;
if (len1 < len2)
return (strncasecmp(key1, key2, len1) <= 0) ? -1 : +1;
else
if (len1 > len2)
return (strncasecmp(key1, key2, len2) < 0) ? -1 : +1;
else
return strncasecmp(key1, key2, len1);
}
/* Define a line.
*/
static void define_line(line_t *const line, const char *ptr, const char *const end)
{
long val = 0L;
int negative = 0;
line->ptr = ptr;
line->len = (size_t)(end - ptr);
/* Skip leading whitespace. */
while (ptr < end && (*ptr == '\t' || *ptr == ' ')) ptr++;
/* Parse signs. */
while (ptr < end && (*ptr == '+' || *ptr == '-'))
if (*(ptr++) == '-')
negative = !negative;
/* Parse numeric value. */
while (ptr < end && *ptr >= '0' && *ptr <= '9')
val = 10L * val + (long)(*(ptr++) - '0');
/* Skip whitespace. */
while (ptr < end && (*ptr == '\t' || *ptr == ' ')) ptr++;
/* The second token is the key. */
line->keyval = (negative) ? -val : val;
line->keyptr = ptr;
line->keylen = (size_t)(end - ptr);
}
static int get_lines(const char *const data,
const size_t size,
line_t **const arrayptr,
size_t *const countptr)
{
const char *cur = data;
const char *ptr;
const char *const end = data + size;
line_t *line = NULL;
size_t lines = 0;
size_t lines_max = 0;
if (!arrayptr || !countptr)
return errno = EINVAL;
while (cur < end) {
/* Find end of this line. */
ptr = cur;
while (ptr < end && (*ptr != '\n' && *ptr != '\r'))
ptr++;
if (ptr < end) {
if (*ptr == '\n') {
ptr++;
if (ptr < end && *ptr == '\r')
ptr++;
} else
if (*ptr == '\r') {
ptr++;
if (ptr < end && *ptr == '\n')
ptr++;
}
}
/* Allocate more lines if necessary. */
if (lines >= lines_max) {
line_t *const old = line;
lines_max = (lines | 65535) + 65537;
line = realloc(old, lines_max * sizeof *line);
if (!line) {
free(old);
return errno = ENOMEM;
}
}
define_line(&line[lines], cur, ptr);
lines++;
cur = ptr;
}
/* Optimize line array. */
if (!lines) {
free(line);
line = NULL;
lines_max = 0;
} else
if (lines != lines_max) {
line_t *temp;
temp = realloc(line, lines * sizeof *line);
if (temp) {
line = temp;
lines_max = lines;
}
}
*arrayptr = line;
*countptr = lines;
return 0;
}
int open_input(file_t *const file, const char *const name)
{
struct stat info;
const char *data;
size_t length;
long pagesize;
int fd, result;
if (!file || !name || !*name)
return errno = EINVAL;
file->map_data = MAP_FAILED;
file->map_used = 0;
file->map_size = 0;
file->lines = 0;
file->line = NULL;
pagesize = sysconf(_SC_PAGE_SIZE);
if (pagesize < 1L)
return errno = ENOTSUP;
do {
fd = open(name, O_RDONLY | O_NOCTTY);
} while (fd == -1 && errno == EINTR);
if (fd == -1)
return errno;
if (fstat(fd, &info)) {
do {
result = close(fd);
} while (result == -1 && errno == EINTR);
return errno = EPERM;
}
/* Length must be a positive multiple of page size. */
if (!info.st_size || (long)info.st_size % pagesize)
length = (size_t)info.st_size + (size_t)pagesize - (size_t)((long)info.st_size % pagesize);
else
length = (size_t)info.st_size;
data = mmap(NULL, length, PROT_READ, MAP_SHARED | MAP_NORESERVE, fd, 0);
if (data == MAP_FAILED) {
const int cause = errno;
do {
result = close(fd);
} while (result == -1 && errno == EINTR);
return errno = cause;
}
/* We will be accessing the file linearly. */
posix_fadvise(fd, 0, info.st_size, POSIX_FADV_SEQUENTIAL);
posix_madvise((void *)data, length, POSIX_MADV_SEQUENTIAL);
if (get_lines(data, (size_t)info.st_size, &file->line, &file->lines)) {
munmap((void *)data, length);
do {
result = close(fd);
} while (result == -1 && errno == EINTR);
return errno = ENOMEM;
}
/* From this point forwards, the file and map access will be random. */
posix_fadvise(fd, 0, info.st_size, POSIX_FADV_RANDOM);
posix_madvise((void *)data, length, POSIX_MADV_RANDOM);
file->map_desc = fd;
file->map_data = data;
file->map_size = length;
file->map_used = (size_t)info.st_size;
return 0;
}
void close_input(file_t *const file)
{
if (file) {
int result;
if (file->map_data != MAP_FAILED && file->map_size)
munmap((void *)file->map_data, file->map_size);
if (file->map_desc != -1)
do {
result = close(file->map_desc);
} while (result == -1 && errno == EINTR);
free(file->line);
file->map_desc = -1;
file->map_data = MAP_FAILED;
file->map_size = 0;
file->map_used = 0;
file->lines = 0;
file->line = NULL;
}
}
/* create: -1: Never, file must exist
* 0: Create if necessary
* +1: Always create
*/
int open_output(const char *const filename, const int create)
{
int fd;
if (!filename || !*filename) {
errno = EINVAL;
return -1;
}
if (create < 0)
do {
fd = open(filename, O_RDWR | O_NOCTTY);
} while (fd == -1 && errno == EINTR);
else
if (create > 0)
do {
fd = open(filename, O_RDWR | O_CREAT | O_EXCL, 0666);
} while (fd == -1 && errno == EINTR);
else
do {
fd = open(filename, O_RDWR | O_CREAT | O_NOCTTY, 0666);
} while (fd == -1 && errno == EINTR);
return fd;
}
int close_output(const int fd)
{
int result;
if (fd == -1)
return 0;
do {
result = close(fd);
} while (result == -1 && errno == EINTR);
if (result == -1)
return errno;
return 0;
}
int write_all(const file_t *const file, const int to_fd)
{
if (!file || to_fd == -1)
return errno = EINVAL;
/* Reset file position. Ignore errors (in case to_fd is not a file). */
lseek(to_fd, 0, SEEK_SET);
/* Resize file to final size. Ignore errors. */
if (ftruncate(to_fd, (off_t)file->map_used)) {
/* Ignore errors */
}
/* Preallocate file. Ignore errors. */
posix_fallocate(to_fd, (off_t)0, (off_t)file->map_used);
/* Write the lines. */
{
const size_t lines = file->lines;
const line_t *const line = file->line;
size_t i;
for (i = 0; i < lines; i++) {
const char *p = line[i].ptr;
const char *const q = line[i].ptr + line[i].len;
ssize_t n;
while (p < q) {
n = write(to_fd, p, (size_t)(q - p));
if (n > (ssize_t)0)
p += n;
else
if (n != (ssize_t)-1)
return errno = EIO;
else
if (errno != EINTR)
return errno;
}
if (p != q)
return errno = EIO; /* Should never happen. */
}
}
/* Success. */
return 0;
}
int main(int argc, char *argv[])
{
file_t input;
const char *outname;
int output, result;
if (argc < 2 || argc > 3 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
fprintf(stderr, " %s INPUT-FILE [ OUTPUT-FILE ]\n", argv[0]);
fprintf(stderr, "\n");
return 1;
}
if (argc >= 3) {
outname = argv[2];
output = open_output(outname, +1); /* Always create */
if (output == -1) {
fprintf(stderr, "%s: %s.\n", argv[1], strerror(errno));
return 1;
}
} else {
output = STDOUT_FILENO;
outname = NULL;
}
if (open_input(&input, argv[1])) {
fprintf(stderr, "%s: %s.\n", argv[1], strerror(errno));
if (outname) {
unlink(outname);
do {
result = close(output);
} while (result == -1 && errno == EINTR);
}
return 1;
}
fprintf(stderr, "Read %lu lines.\nSorting .. ", (unsigned long)input.lines);
fflush(stderr);
qsort(input.line, input.lines, sizeof input.line[0], compare_lines);
fprintf(stderr, "Done.\nSaving .. ");
fflush(stderr);
if (write_all(&input, output)) {
if (outname) {
fprintf(stderr, "%s: %s.\n", outname, strerror(errno));
unlink(outname);
do {
result = close(output);
} while (result == -1 && errno == EINTR);
} else
fprintf(stderr, "Error writing to standard output: %s.\n", strerror(errno));
return 1;
}
if (close_output(output)) {
if (outname) {
fprintf(stderr, "%s: Write error: %s.\n", outname, strerror(errno));
unlink(outname);
} else
fprintf(stderr, "Error writing to standard output: %s.\n", strerror(errno));
return 1;
}
fprintf(stderr, "Done.\n");
close_input(&input);
return 0;
}
The above code defines the numeric value of the initial field in