Code:
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <sys/time.h>
#ifdef DEBUG
# include <inttypes.h>
# include "cycle_counting.h"
#endif
#define FILENAME "t8.shakespeare.txt"
// Ideal buffer size?!
// I got the best results for disk I/O with this size.
#define BUFFER_SIZE 16384
// is_vowel() and is_consonant() assumes ASCII.
// Technically this is not "portable", since we could've been dealing
// with EBCDIC, for example!
// But I think is "portable" enough.
//
// Also, don't challenge the default word size because using
// _Bool will slow things down.
static int is_vowel( unsigned char c )
{
// 1 KiB array to avoid "special" characters used in
// user's system (works with UTF-8 too).
static const int vowels[] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
return vowels[c];
}
static int is_consonant( unsigned char c )
{
// 1 KiB array to avoid "special" characters used in
// user's system (works with UTF-8 too).
static const int consonants[] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
return consonants[c];
}
int main( void )
{
static char buffer[BUFFER_SIZE];
char *p;
int fd;
ssize_t size;
unsigned long int v, c; // 'long int' is 64 bits long on x86-64.
// I could use 'unsigned int' to speed things up, but
// I don't know if the counts would overflow.
struct timeval start, end;
gettimeofday( &start, NULL );
#ifdef DEBUG
counter_T c1;
c1 = BEGIN_TSC();
#endif
// NOTE: Using file descriptors and syscalls
// because FILE streams are too slow.
if ( ( fd = open( FILENAME, O_RDONLY ) ) < 0 )
{
fputs( "ERROR opening file.\n", stderr );
return EXIT_FAILURE;
}
v = c = 0;
while ( ( size = read( fd, buffer, BUFFER_SIZE ) ) > 0 )
{
p = buffer;
while ( size-- )
{
// NOTE: 'else' is necessary not to do the test twice!
// And since we have more consonants than vowels (I think),
// the order of these tests are important.
if ( is_consonant( *p ) )
c++;
else
if ( is_vowel( *p ) )
v++;
p++;
}
}
close( fd );
#ifdef DEBUG
END_TSC( &c1 );
printf( "Cycles: %" PRIu64 ".\n", c1 );
#endif
gettimeofday( &end, NULL );
printf( "There are %lu consonants and %lu vowels in the Complete Works of William Shakespeare.\n", c, v );
printf( "Time: %ld μs.\n", ( long int )( end.tv_sec - start.tv_sec ) * 1000000 + ( end.tv_usec - start.tv_usec ) );
return EXIT_SUCCESS;
}
cycle_counting.h is this one: