Thread: made a litte filecounting and byte tallying program ...

  1. #1
    Registered User geekoftheweek's Avatar
    Join Date
    Mar 2003
    Location
    maine
    Posts
    8

    made a litte filecounting and byte tallying program ...

    Hi.
    Made this utility to count all files recursively through directories and give a total of bytes used and would like any criticism as to how it could be more efficient, etc. Also, sometimes the counts are off on some directories compared to the count the OS (Linux in this case) gives me, and I was wondering if maybe the stat() part was counting file types wrong.

    Code:
    /* flcnt.c */
    #include <stdio.h>
    #include <dirent.h>
    #include <sys/types.h>
    #include <sys/stat.h>
    #include <unistd.h>
    #include <string.h>
    #include <stdlib.h>
    #include <errno.h>
    
    #define VALIDIR   1
    #define TOP_DIR 0
    #define SUB_DIR 1
    
    int do_open(char *dname, int dtype);
    int do_tally(char *fname);
    void updt_path_str(char *curpath, char *dname);
    
    unsigned int filecount = 0;
    unsigned int dircount  = 0;
    unsigned int totalfiles = 0;
    unsigned int totaldirs = 0;
    int verbose   = 0;
    char fullpath[PATH_MAX];
    
    /* we need long long to support totals over 4gigs */
    unsigned long long int totalbytes = 0;	
    
    int main(int argc, char *argv[])
    {
    	int c;
    
    	/* get opts */
    	while ( (c = getopt(argc, argv, "v")) != -1)
    		switch (c) {
    			case 'v':
    				verbose = 1;
    				break;
    			default:
    				break;
    		}
    
    	if (!(optind < argc)) {
    		fprintf(stderr, "Usage flcnt [-v] directory ...\n");
    		exit(1);
    	}
    
    	/* process each directory given as argument */
    	while (optind < argc) {
    		printf("processing directory %s, please wait ...\n\n", 
    			argv[optind]);
    		if ((do_open(argv[optind], TOP_DIR)) == 1) {
    		       fprintf(stderr, "Can't open %s: %s\n", 
    			       argv[optind], strerror(errno));
    		} else {	       
    			printf("%s contains %d files and %d sub-directories\n", 
    				argv[optind], filecount, dircount);
    			totalfiles = totalfiles + filecount;
    			totaldirs  = totaldirs  + dircount;
    			filecount = dircount = 0;
    		}
    		++optind;
    
    	}
    
    
    	printf("\ntotal size in bytes of directories and files is %llu\n", 
    		totalbytes);
    	printf("total files are %u, and total sub-directories are %u\n", 
    		totalfiles, totaldirs); 	
    			
    	return 0;
    }
    
    int do_open(char *dname, int dtype)
    {
    	DIR *p;
    	char curdir[PATH_MAX];
            char lastdir[PATH_MAX];	
    	char curpath[PATH_MAX];
    	char lastpath[PATH_MAX];
    	struct stat l;
    	struct dirent *t;
    	int filetype;
    
    
    	if ((p = opendir(dname)) == NULL) 
    		return 1;
    
    	/* save our last current directory, so that when we leave do_open()
    	 * we return to it with chdir()
    	 */
    	getcwd(lastdir, PATH_MAX);
    
    	/* if we're verbose, we update our path strings */
    	if (verbose) {
    		if (dtype == SUB_DIR) {
    	       		strcpy(lastpath, fullpath);
    	       		strcpy(curpath, fullpath);
    	       		updt_path_str(curpath, dname);
    		} else {
    			updt_path_str(curpath, dname);
    		}
    	}
    
    	/* if we have a TOP_DIR (meaning one given as argument), we have to
    	 * call stat() to get the directory's size for totalbytes tally and
    	 * we don't count this directory as a sub-directory
    	 */
    	if (dtype == SUB_DIR) {
    	       ++dircount;
    	} else {	       
    		stat(dname, &l);
    	totalbytes = totalbytes + (unsigned long long int) l.st_size;
    	}
    
    	chdir(dname);
    
    	if (verbose)
    		printf("Reading directory %s ... %u files ...\n", 
    			fullpath, filecount);
    
    	/* process directory at *p */
    	while ((t = readdir(p)) != NULL) {
    		if ((filetype = do_tally(t->d_name)) == -1) {
    			fprintf(stderr, "Couldn't stat %s: %s\n",
    				t->d_name, strerror(errno));
    		} else if (filetype == VALIDIR) {	
    			 if ((do_open(t->d_name, SUB_DIR)) == 1)
    				 fprintf(stderr, "Can't open %s: %s\n", 
    				         t->d_name, strerror(errno));
    		}
    	}
    	chdir(lastdir);
    	strcpy(fullpath, lastpath);
    
    	/* VERY IMPORTANT - program will crash if not included */
    	closedir(p);
    
    	return 0;
    
    }
    			
    int do_tally(char *fname)
    {
    	struct stat t;
    
    	/* don't do tallies or stat() for these directories */
    	if (strcmp(fname, ".") == 0 || strcmp(fname, "..") == 0)
    		return 0;
    
    	if (stat(fname, &t) == -1)
    	       return -1;
    
    	switch (t.st_mode & S_IFMT) {
    	case S_IFDIR:
    		totalbytes = totalbytes + (unsigned long long int) t.st_size;
    		return VALIDIR;
    		break;
    	default:
    		++filecount;
    		totalbytes = totalbytes + (unsigned long long int) t.st_size;
    		break;
    	}
    	return 0;
    }
    void updt_path_str(char *curpath, char *dname)
    {
    	strcpy(curpath, dname);
    	strcat(curpath, "/");
    	strcat(fullpath, curpath);
    }

  2. #2
    spurious conceit MK27's Avatar
    Join Date
    Jul 2008
    Location
    segmentation fault
    Posts
    8,300
    Quote Originally Posted by geekoftheweek View Post
    Also, sometimes the counts are off on some directories compared to the count the OS (Linux in this case) gives me
    How so? What did you use (du)?
    C programming resources:
    GNU C Function and Macro Index -- glibc reference manual
    The C Book -- nice online learner guide
    Current ISO draft standard
    CCAN -- new CPAN like open source library repository
    3 (different) GNU debugger tutorials: #1 -- #2 -- #3
    cpwiki -- our wiki on sourceforge

  3. #3
    Registered User
    Join Date
    Sep 2008
    Location
    Toronto, Canada
    Posts
    1,834
    Possibly you aren't traversing across hidden or other special files. But I'm not familiar with the options available in Linux when starting to walk directories.

  4. #4
    Banned master5001's Avatar
    Join Date
    Aug 2001
    Location
    Visalia, CA, USA
    Posts
    3,685
    Quote Originally Posted by nonoob View Post
    Possibly you aren't traversing across hidden or other special files. But I'm not familiar with the options available in Linux when starting to walk directories.
    From the code I see, yep. That pretty much sums up the problem.

  5. #5
    Registered User geekoftheweek's Avatar
    Join Date
    Mar 2003
    Location
    maine
    Posts
    8
    thanks everyone! I think what it may be is that I may not be traversing links. It's strange, because It works on most directories without special files. Being sort of naive about Linux file types and system programming, I may have to do a little more research.

    The main reason i did this little program is to understand directory traversal so that I can make a duplicate file finder utility and similar programs. Basically, I think
    that the best way is like i'm doing (using a readdir() loop and calling the same function recursively when I arrive at another directory) to recurse through
    directories.

    But, I program because it's fun and is a challenge!

    Thanks
    Last edited by geekoftheweek; 10-20-2008 at 06:51 PM.

  6. #6
    Banned master5001's Avatar
    Join Date
    Aug 2001
    Location
    Visalia, CA, USA
    Posts
    3,685
    I give it two thumbs way up. And want to give a shout out to geekoftheweek--oh who am I kidding, you are officially promoted to geekofthemonth. Get up on stage, you big nuckle head. Give the people what they want.

  7. #7
    Registered User
    Join Date
    Sep 2008
    Location
    Toronto, Canada
    Posts
    1,834
    I'm not going to agree that the best way is with recursion... Anything you can do with calling a function and stacking local variables can likely be better, faster, more controlled by managing your own "stack" array... with less overhead.

    Whenever an exercise was expressed as recursion, either because it's particularly elegant that way, or it was to introduce recursive function calls in programming, I have always found it to be less than optimum from a performance perspective.

    Definitely fun & challenge! Especially when you can exceed the performance of "built in" functions in some cases.

Popular pages Recent additions subscribe to a feed