Thantos:
okay... but my C skills are a little rusty. i am not getting your code to work. sure, it compiles, but seg faults on the fread().
here's my current code, and the fread() is working, but i can't process files over 10k without segfaults. should i be using malloc()?
Code:
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
struct data {
int count;
int position;
};
struct data symbol[255];
/*
* init_symbol_array(): Kinda obvious, but zero out array
*/
void
init_symbol_array(void)
{
int i;
for (i = 0; i < 256; i++) {
symbol[i].count = 0;
symbol[i].position = 0;
}
}
/*
* my_fopen(): Custom fopen routine
*/
FILE
*my_fopen(char *file, char *type)
{
FILE *fp;
if((fp = fopen(file, type)) == NULL) {
(void)fprintf(stderr, "\nerror: %s: Can't open '%s'\n",
strerror(ENOENT), file);
exit(EXIT_FAILURE);
}
return fp;
}
/*
* symbol_stats(): Get frequency count and position of each character
*/
void
symbol_stats(char *file)
{
FILE *i_file;
clock_t start_cpu, now_cpu; /* timing var */
char buffer[80000];
int c, i, nc;
i_file = my_fopen(file, "rb");
printf("\nGathering statistics... ");
nc = 1;
start_cpu = clock();
while((c = fread(buffer, 1, sizeof(buffer), i_file)) > 0) {
for (i = 0; i < c; i++) {
symbol[(int)buffer[i]].count++;
symbol[(int)buffer[i]].position = symbol[(int)buffer[i]].position + nc;
nc++;
}
}
now_cpu = clock();
printf("%lf seconds\n",(now_cpu-start_cpu)/(double)CLOCKS_PER_SEC);
fclose(i_file);
}
/*
* Print symbol stats to output file
*/
void
print_symbol_stats(char *file)
{
int i;
FILE *o_file;
o_file = my_fopen(file, "wb");
for (i = 0; i < 256; i++) {
if (symbol[i].count > 0) {
fprintf(o_file, "%c\t%d\t%d\n", i,
symbol[i].count, symbol[i].position);
}
}
fclose(o_file);
}
/*
* The main function; where it all happens.
*/
int
main(int argc, char **argv)
{
if (argv[2] == NULL) {
argv[2] = "compressed.txt";
}
init_symbol_array();
symbol_stats(argv[1]);
print_symbol_stats(argv[2]);
return 0;
}