Thread: structure woes equals undesired output

  1. #1
    Registered User
    Join Date
    Sep 2001
    Location
    pacific northwest
    Posts
    37

    Unhappy structure woes equals undesired output

    okay... i'm having a little trouble with the output i'm receiving. in the "context" column (read comment header below) at the end of each line are some control characters (ASCII values 01, 02, and 03).

    you'll really have to compile the code and review the output to understand. and if anyone can add any improvements, that would be great too.

    i've been fumbling with this for two days with no luck. thanks.

    Code:
    /*
     * FREQ.C -- blah blah
     *
     *  given input of "this_is_a_test" freq.c will output to a file
     *
     *	char	freq	context
     *	----	----	-------
     *	_ 	3	a, i, t
     *	a	1	_
     *	e	1	s
     *	i	2	s
     *	h	1	i
     *	s	3	_, t
     *	t	3	e, h
     *
     * the "context" column represents characters that appear to the right
     * of each character processed.
     *
     */
    
    #include <errno.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    #define TABLE_SIZE 256
    
    struct data {
    	unsigned char achar;
    	unsigned char context[TABLE_SIZE-1];
    	unsigned long int count;
    };
    
    struct data symbol[TABLE_SIZE-1];
    
    void init_symbol_array();
    void get_stats(char *);
    void print_stats(char *);
    int main(int, char *[]);
    
    /*
     * Kinda obvious, but zero out array
     */
    void
    init_symbol_array(void)
    {
    	int i, j;
    
    	for (i = 0; i < TABLE_SIZE; i++) {
    		symbol[i].achar = i;
    		symbol[i].count = 0;
    		for (j = 0; j < TABLE_SIZE; j++)
    			symbol[i].context[j] = 0;
    	}
    }
    
    
    /*
     * Get frequency count of each character
     */
    void
    get_stats(char *file)
    {
    	int c, i;
    	unsigned char buf[8192];
    	FILE *fp;
    
    	if((fp = fopen(file, "rb")) == NULL) {
    		(void)fprintf(stderr, "\nerror: %s: Can't open '%s'\n",
    		    strerror(ENOENT), file);
    		exit(EXIT_FAILURE);
    	}
    
    	while ((c = fread(buf, 1, 8192, fp)) > 0) {
    		for (i = 0; i < c; i++) {
    			symbol[(int)buf[i]].count++;
    			symbol[(int)buf[i]].context[(int)buf[i+1]] = (int)buf[i+1];
    		}
    	}
    
    	fclose(fp);
    }
    
    
    /*
     * Print symbol stats to output file
     */
    void
    print_stats(char *file)
    {
    	FILE *fp;
    	int i, j;
    
    	if((fp = fopen(file, "w")) == NULL) {
    		(void)fprintf(stderr, "\nerror: %s: Can't write to '%s'\n",
    		    strerror(ENOENT), file);
    		exit(EXIT_FAILURE);
    	}
    
    	fprintf(fp, "char\tfreq\tcontext\n");
    	fprintf(fp, "----\t----\t-------");
    
    	for (i = 0; i < TABLE_SIZE; i++) {
    		if (symbol[i].count > 0) {
    			fprintf(fp, "\n%c\t%lu\t", symbol[i].achar, symbol[i].count);
    			for (j = 0; j < TABLE_SIZE; j++) {
    				if (symbol[i].context[j] > 0)
    					fprintf(fp, "%c", symbol[i].context[j]);
    			}
    		}
    	}
    
    	fclose(fp);
    }
    
    
    /*
     * This is where it all happens; the main function.
     */
    int
    main(int argc, char *argv[])
    {
    
    	if(argv[2] == NULL)
    		argv[2] = "output.txt";
    
    	init_symbol_array();
    	printf("\nGathering statistics... ");
    	get_stats(argv[1]);
    	printf("done\n");
    	print_stats(argv[2]);
    
    	exit(EXIT_SUCCESS);
    }
    "No, I am not wise, but I am a lover of wisdom." --Pythagoras

  2. #2
    and the hat of int overfl Salem's Avatar
    Join Date
    Aug 2001
    Location
    The edge of the known universe
    Posts
    39,659
    I see lots of buffer overruns and off-by-one errors

    This seems better
    Code:
    /*
     * FREQ.C -- blah blah
     *
     *  given input of "this_is_a_test" freq.c will output to a file
     *
     *	char	freq	context
     *	----	----	-------
     *	_ 	3	a, i, t
     *	a	1	_
     *	e	1	s
     *	i	2	s
     *	h	1	i
     *	s	3	_, t
     *	t	3	e, h
     *
     * the "context" column represents characters that appear to the right
     * of each character processed.
     *
     */
    
    #include <errno.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    #define TABLE_SIZE 256
    
    struct data {
    	unsigned char achar;
    	unsigned char context[TABLE_SIZE];
    	unsigned long int count;
    };
    
    struct data symbol[TABLE_SIZE];
    
    void init_symbol_array();
    void get_stats(char *);
    void print_stats(char *);
    int main(int, char *[]);
    
    /*
     * Kinda obvious, but zero out array
     */
    void
    init_symbol_array(void)
    {
    	int i, j;
    
    	for (i = 0; i < TABLE_SIZE; i++) {
    		symbol[i].achar = i;
    		symbol[i].count = 0;
    		for (j = 0; j < TABLE_SIZE; j++)
    			symbol[i].context[j] = 0;
    	}
    }
    
    
    /*
     * Get frequency count of each character
     */
    void
    get_stats(char *file)
    {
    	int c, i;
    	unsigned char buf[8192];
    	FILE *fp;
    
    	if((fp = fopen(file, "rb")) == NULL) {
    		(void)fprintf(stderr, "\nerror: %s: Can't open '%s'\n",
    		    strerror(ENOENT), file);
    		exit(EXIT_FAILURE);
    	}
    
    	while ((c = fread(buf, 1, sizeof(buf), fp)) > 0) {
    		for (i = 0; i < c-1; i++) { /* nothing to the right of the last char */
    			symbol[(int)buf[i]].count++;
    			symbol[(int)buf[i]].context[(int)buf[i+1]] = (int)buf[i+1];
    		}
    	}
    
    	fclose(fp);
    }
    
    
    /*
     * Print symbol stats to output file
     */
    void
    print_stats(char *file)
    {
    	FILE *fp;
    	int i, j;
    
    	if((fp = fopen(file, "w")) == NULL) {
    		(void)fprintf(stderr, "\nerror: %s: Can't write to '%s'\n",
    		    strerror(ENOENT), file);    /* use strerror(errno) */
    		exit(EXIT_FAILURE);
    	}
    
    	fprintf(fp, "char\tfreq\tcontext\n");
    	fprintf(fp, "----\t----\t-------\n");
    
    	for (i = 0; i < TABLE_SIZE; i++) {
    		if (symbol[i].count > 0) {
    			fprintf(fp, "%c\t%lu\t", symbol[i].achar, symbol[i].count);
    			for (j = 0; j < TABLE_SIZE; j++) {
    				if (symbol[i].context[j] > 0)
    					fprintf(fp, "%c", symbol[i].context[j]);
    			}
                fprintf(fp,"\n");
    		}
    	}
    
    	fclose(fp);
    }
    
    
    /*
     * This is where it all happens; the main function.
     */
    int
    main(int argc, char *argv[])
    {
    
    	if(argv[2] == NULL)
    		argv[2] = "output.txt";
    
    	init_symbol_array();
    	printf("\nGathering statistics... ");
    	get_stats(argv[1]);
    	printf("done\n");
    	print_stats(argv[2]);
    
    	exit(EXIT_SUCCESS);
    }
    If you dance barefoot on the broken glass of undefined behaviour, you've got to expect the occasional cut.
    If at first you don't succeed, try writing your phone number on the exam paper.

  3. #3
    Registered User
    Join Date
    Sep 2001
    Location
    pacific northwest
    Posts
    37
    perfect!

    thanks salem. i thought i had tried (c - 1) in the for loop. well, works now.
    "No, I am not wise, but I am a lover of wisdom." --Pythagoras

  4. #4
    and the hat of int overfl Salem's Avatar
    Join Date
    Aug 2001
    Location
    The edge of the known universe
    Posts
    39,659
    Didn't spot this first time around

    int main(int, char *[]);

    There is no need to prototype main
    It is incorrect to even try IIRC.
    If you dance barefoot on the broken glass of undefined behaviour, you've got to expect the occasional cut.
    If at first you don't succeed, try writing your phone number on the exam paper.

Popular pages Recent additions subscribe to a feed

Similar Threads

  1. Structure Problem
    By Generator in forum C Programming
    Replies: 5
    Last Post: 07-28-2003, 11:54 PM
  2. Need help fixing bugs in data parsing program
    By daluu in forum C Programming
    Replies: 8
    Last Post: 03-27-2003, 06:02 PM
  3. Control different DA output value!
    By Hunterhunter in forum A Brief History of Cprogramming.com
    Replies: 1
    Last Post: 03-13-2003, 12:11 PM
  4. Serial Communications in C
    By ExDigit in forum Windows Programming
    Replies: 7
    Last Post: 01-09-2002, 10:52 AM
  5. Simple File Creation Algorithm
    By muffin in forum C Programming
    Replies: 13
    Last Post: 08-24-2001, 03:28 PM