xeddiex: Fair enough. I've excluded yours now since your specification is different.
For the other "contestants", I mashed a couple of them slightly to fit them into my test driver. If I screwed up somewhere, please post corrections. I had to fix a couple implementation bugs that caused seg faults or compilation failures, so look closely.
The correctness tests (using Salem's idea) involved an empty file, one with a newline after the last line, one without, one with one very big line, and the two IMDb files for crazy characters and huge input. Funny enough, wc pukes on the IMDb files because of those crazy characters, so I had to count newlines only. Here is the wc output:
Code:
whoie@linux:~/database> wc *.txt *.c
0 0 0 empty.txt
1 6 1785 longline.txt
0 2 10 nonewline.txt
1 2 9 oneline.txt
138 442 2440 lc.c
32 104 917 main.c
172 556 5161 total
whoie@linux:~/database> wc -l *.list
2552802 actors.list
1328278 actresses.list
3881080 total
notice that wc doesn't count lines according to our working line definition. The file without a newline after the last line yields zero.
Here are the contestants:
Code:
#include <stdio.h>
#include <string.h>
/* .......................................... */
int waltp(FILE *fp) {
int nl = 0;
char buf[BUFSIZ] = "";
while ( fgets(buf, sizeof buf, fp) )
if ( strchr(buf, '\n') ) ++nl;
if ( !strchr(buf, '\n') ) ++nl;
return nl;
}
/* .......................................... */
int waltp2(FILE *fp) {
int nl = 0;
int c = 0;
char buf[BUFSIZ] = "";
while ( fgets(buf, sizeof buf, fp) )
{
c = strlen(buf)-1;
if ( buf[c] == '\n' ) ++nl;
}
if ( buf[c] != '\n' ) ++nl;
return nl;
}
/* .......................................... */
int waltp3(FILE *fp) {
int nl = 0;
int buf;
int c;
while ( (buf = fgetc(fp)) != EOF )
{
if ( buf == '\n' ) ++nl;
c = buf;
}
if ( c != '\n' ) ++nl;
return nl;
}
/* .......................................... */
int whoie(FILE *fp) {
int c;
int nl = 0;
while ( (c = getc(fp)) != EOF ) {
++nl;
if ( c != '\n' ) {
fscanf(fp, "%*[^\n]");
getc(fp);
}
}
return nl;
}
/* .......................................... */
int Salem ( FILE *fp ) {
int nlines = 0;
int partial = 0;
char buff[BUFSIZ] = "";
while ( fgets( buff, sizeof(buff), fp ) != NULL ) {
if ( strchr(buff,'\n') ) {
nlines++;
partial = 0;
} else {
partial = 1;
}
}
return nlines += partial;
}
/* .......................................... */
int Dave_Sinkula(FILE *file)
{
int lines = 0;
int ch, prev = '\n' /* so empty files have no lines */;
while ( (ch = getc(file)) != EOF ) /* Read all chars in the file. */
{
if ( ch == '\n' )
{
++lines; /* Bump the counter for every newline. */
}
prev = ch; /* Keep a copy to later test whether... */
}
if ( prev != '\n' ) /* ...the last line did not end in a newline. */
{
++lines; /* If so, add one more to the total. */
}
return lines;
}
/* .......................................... */
size_t
count(const char * str, const char * fnd)
{
size_t cnt = 0, len = strlen(fnd);
while((str = strstr(str, fnd)) != 0)
{
str += len;
++cnt;
}
return cnt;
}
#define MAX 1024
size_t
file_count(FILE *inf, const char * fnd)
{
size_t cnt = 0;
char buf[MAX + 1];
const char * aok = 0;
if(inf)
{
aok = fgets(buf, MAX, inf);
if(aok && *aok)
{
cnt = count(buf, fnd);
while(fgets(buf, MAX, inf) != 0)
{
cnt += count(buf, fnd);
}
}
else
{
aok = 0;
}
}
return aok ? cnt : (size_t)-1;
}
int Sebastiani(FILE *fp)
{
return file_count(fp, "\n") + 1;
}
and the test driver:
Code:
#include <stdio.h>
int whoie(FILE*), waltp(FILE*), waltp2(FILE*)
,waltp3(FILE*), Salem(FILE*), Sebastiani(FILE*), Dave_Sinkula(FILE*);
int main(int argc, char *argv[]) {
FILE *fp = 0;
while ( --argc > 0 && (fp = fopen(*++argv, "r")) != 0 ) {
printf("File: %s\n", *argv);
printf("waltp found %d line(s).\n", waltp(fp));
rewind(fp);
printf("waltp2 found %d line(s).\n", waltp2(fp));
rewind(fp);
printf("waltp3 found %d line(s).\n", waltp3(fp));
rewind(fp);
printf("whoie found %d line(s).\n", whoie(fp));
rewind(fp);
printf("Dave_Sinkula found %d line(s).\n", Dave_Sinkula(fp));
rewind(fp);
printf("Sebastiani found %d line(s).\n", Sebastiani(fp));
rewind(fp);
printf("Salem found %d line(s).\n\n", Salem(fp));
fclose(fp);
}
if ( argc != 0 ) printf("Unable to open %s\n", *argv);
return 0;
}
Here are the correctness test results:
Code:
whoie@linux:~/database> gcc -W -Wall -ansi -pedantic -c *.c
whoie@linux:~/database> gcc -O3 -pg *.c -o lc
whoie@linux:~/database> ./lc *.txt *.c *.list
File: empty.txt
waltp found 1 line(s).
waltp2 found 1 line(s).
waltp3 found 1 line(s).
whoie found 0 line(s).
Dave_Sinkula found 0 line(s).
Sebastiani found 0 line(s).
Salem found 0 line(s).
File: longline.txt
waltp found 1 line(s).
waltp2 found 1 line(s).
waltp3 found 1 line(s).
whoie found 1 line(s).
Dave_Sinkula found 1 line(s).
Sebastiani found 2 line(s).
Salem found 1 line(s).
File: nonewline.txt
waltp found 1 line(s).
waltp2 found 1 line(s).
waltp3 found 1 line(s).
whoie found 1 line(s).
Dave_Sinkula found 1 line(s).
Sebastiani found 1 line(s).
Salem found 1 line(s).
File: oneline.txt
waltp found 1 line(s).
waltp2 found 1 line(s).
waltp3 found 1 line(s).
whoie found 1 line(s).
Dave_Sinkula found 1 line(s).
Sebastiani found 2 line(s).
Salem found 1 line(s).
File: lc.c
waltp found 138 line(s).
waltp2 found 138 line(s).
waltp3 found 138 line(s).
whoie found 138 line(s).
Dave_Sinkula found 138 line(s).
Sebastiani found 139 line(s).
Salem found 138 line(s).
File: main.c
waltp found 32 line(s).
waltp2 found 32 line(s).
waltp3 found 32 line(s).
whoie found 32 line(s).
Dave_Sinkula found 32 line(s).
Sebastiani found 33 line(s).
Salem found 32 line(s).
File: actors.list
waltp found 2552802 line(s).
waltp2 found 2552802 line(s).
waltp3 found 2552802 line(s).
whoie found 2552802 line(s).
Dave_Sinkula found 2552802 line(s).
Sebastiani found 2552803 line(s).
Salem found 2552802 line(s).
File: actresses.list
waltp found 1328278 line(s).
waltp2 found 1328278 line(s).
waltp3 found 1328278 line(s).
whoie found 1328278 line(s).
Dave_Sinkula found 1328278 line(s).
Sebastiani found 1328279 line(s).
Salem found 1328278 line(s).
Finally, just to show the relative importance (or unimportance) of speed in this, here is a flat profile of all contestants plus the standard library:
Code:
Flat profile:
Each sample counts as 0.01 seconds.
% cumulative self self total
time seconds seconds calls s/call s/call name
22.74 54.99 54.99 318576683 0.00 0.00 getc
10.83 81.18 26.20 3135302 0.00 0.00 _IO_vfscanf
6.61 97.16 15.98 8 2.00 6.92 Dave_Sinkula
6.36 112.55 15.39 __getclktck
6.20 127.54 14.99 337237166 0.00 0.00 flockfile
5.93 141.87 14.33 8 1.79 6.71 waltp3
5.41 154.95 13.08 __profile_frequency
4.91 166.84 11.89 15669489 0.00 0.00 memchr
4.20 177.00 10.16 15669489 0.00 0.00 memcpy
3.78 186.13 9.13 266321 0.00 0.00 read
3.77 195.25 9.12 337237166 0.00 0.00 funlockfile
3.63 204.02 8.77 7762514 0.00 0.00 index
3.04 211.38 7.36 3135302 0.00 0.00 vfscanf
2.31 216.97 5.59 15525045 0.00 0.00 fgets
2.21 222.30 5.34 15525045 0.00 0.00 _IO_getline_info
2.02 227.19 4.89 7762514 0.00 0.00 strstr
1.51 230.83 3.64 3135326 0.00 0.00 memset
1.36 234.11 3.29 _IO_wfile_overflow
0.95 236.41 2.30 8 0.29 1.58 waltp2
0.74 238.19 1.78 15525045 0.00 0.00 _IO_getline
0.24 238.76 0.57 8 0.07 1.91 waltp
0.21 239.26 0.50 8 0.06 2.01 Sebastiani
0.15 239.63 0.37 3881270 0.00 0.00 strlen
0.14 239.97 0.34 8 0.04 1.88 Salem
0.13 240.27 0.31 3135301 0.00 0.00 _IO_sputbackc
0.12 240.55 0.28 8 0.04 5.17 whoie
0.11 240.81 0.26 3135302 0.00 0.00 fscanf
0.10 241.04 0.23 8 0.03 0.03 fclose
0.06 241.19 0.15 mempcpy
0.06 241.34 0.15 8 0.02 0.02 fopen
0.04 241.44 0.11 266321 0.00 0.00 _IO_file_underflow
0.04 241.55 0.11 __default_morecore
0.03 241.62 0.08 266321 0.00 0.00 _IO_file_read
0.02 241.66 0.04 266321 0.00 0.00 __uflow
0.02 241.70 0.04 64 0.00 0.00 printf
0.01 241.74 0.04 dprintf
0.01 241.77 0.03 266321 0.00 0.00 _IO_switch_to_get_mode
0.01 241.79 0.03 _IO_default_seekoff
0.01 241.81 0.02 266321 0.00 0.00 _IO_default_uflow
0.00 241.82 0.01 48 0.00 0.00 llseek
0.00 241.83 0.01 __libc_register_dlfcn_hook
0.00 241.84 0.01 memmove
0.00 241.84 0.01 71 0.00 0.00 _IO_file_overflow
0.00 241.85 0.01 9 0.00 0.00 _IO_file_stat
0.00 241.85 0.01 _IO_default_underflow
0.00 241.85 0.00 192 0.00 0.00 _IO_new_file_xsputn
0.00 241.85 0.00 128 0.00 0.00 __find_specmb
0.00 241.85 0.00 74 0.00 0.00 __errno_location
0.00 241.85 0.00 65 0.00 0.00 _IO_new_do_write
0.00 241.85 0.00 64 0.00 0.00 _IO_file_write
0.00 241.85 0.00 64 0.00 0.00 new_do_write
0.00 241.85 0.00 64 0.00 0.00 vfprintf
0.00 241.85 0.00 64 0.00 0.00 write
0.00 241.85 0.00 56 0.00 0.00 _itoa_word
0.00 241.85 0.00 48 0.00 0.00 _IO_file_seek
0.00 241.85 0.00 48 0.00 0.00 _IO_file_seekoff
0.00 241.85 0.00 48 0.00 0.00 _IO_seekoff_unlocked
0.00 241.85 0.00 48 0.00 0.00 rewind
0.00 241.85 0.00 24 0.00 0.00 _IO_un_link
0.00 241.85 0.00 17 0.00 0.00 _IO_setb
0.00 241.85 0.00 16 0.00 0.00 _IO_link_in
0.00 241.85 0.00 10 0.00 0.00 _int_malloc
0.00 241.85 0.00 10 0.00 0.00 malloc
0.00 241.85 0.00 9 0.00 0.00 _IO_doallocbuf
0.00 241.85 0.00 9 0.00 0.00 _IO_file_doallocate
0.00 241.85 0.00 9 0.00 0.00 ___fxstat64
0.00 241.85 0.00 9 0.00 0.00 mmap
0.00 241.85 0.00 8 0.00 0.00 _IO_default_finish
0.00 241.85 0.00 8 0.00 0.00 _IO_file_close
0.00 241.85 0.00 8 0.00 0.00 _IO_file_finish
0.00 241.85 0.00 8 0.00 0.00 _IO_file_open
0.00 241.85 0.00 8 0.00 0.00 _IO_new_file_close_it
0.00 241.85 0.00 8 0.00 0.00 _IO_new_file_fopen
0.00 241.85 0.00 8 0.00 0.00 _IO_new_file_init
0.00 241.85 0.00 8 0.00 0.00 _IO_no_init
0.00 241.85 0.00 8 0.00 0.00 _IO_old_init
0.00 241.85 0.00 8 0.00 0.00 _IO_unsave_markers
0.00 241.85 0.00 8 0.00 0.00 __fopen_internal
0.00 241.85 0.00 8 0.00 0.00 __fopen_maybe_mmap
0.00 241.85 0.00 8 0.00 0.00 _int_free
0.00 241.85 0.00 8 0.00 0.00 cfree
0.00 241.85 0.00 8 0.00 0.00 getenv
0.00 241.85 0.00 8 0.00 0.00 munmap
0.00 241.85 0.00 8 0.00 0.00 open
0.00 241.85 0.00 8 0.00 0.00 sYSTRIm
0.00 241.85 0.00 8 0.00 0.00 strncmp
0.00 241.85 0.00 3 0.00 0.00 __cxa_atexit
0.00 241.85 0.00 3 0.00 0.00 __new_exitfn
0.00 241.85 0.00 1 0.00 0.00 _IO_default_xsputn
0.00 241.85 0.00 1 0.00 0.00 __init_misc
0.00 241.85 0.00 1 0.00 0.00 __libc_csu_fini
0.00 241.85 0.00 1 0.00 0.00 __libc_csu_init
0.00 241.85 0.00 1 0.00 0.00 __libc_init_first
0.00 241.85 0.00 1 0.00 0.00 __libc_init_secure
0.00 241.85 0.00 1 0.00 0.00 __libc_setup_tls
0.00 241.85 0.00 1 0.00 0.00 __libc_sigaction
0.00 241.85 0.00 1 0.00 209.76 __libc_start_main
0.00 241.85 0.00 1 0.00 0.00 __pthread_initialize_minimal
0.00 241.85 0.00 1 0.00 0.00 __setfpucw
0.00 241.85 0.00 1 0.00 0.00 _dl_aux_init
0.00 241.85 0.00 1 0.00 0.00 _dl_important_hwcaps
0.00 241.85 0.00 1 0.00 0.00 _dl_init_paths
0.00 241.85 0.00 1 0.00 0.00 _dl_non_dynamic_init
0.00 241.85 0.00 1 0.00 0.00 _mcleanup
0.00 241.85 0.00 1 0.00 0.00 atexit
0.00 241.85 0.00 1 0.00 0.00 exit
0.00 241.85 0.00 1 0.00 0.00 fini
0.00 241.85 0.00 1 0.00 0.00 init
0.00 241.85 0.00 1 0.00 209.76 main
0.00 241.85 0.00 1 0.00 0.00 moncontrol
0.00 241.85 0.00 1 0.00 0.00 setitimer
0.00 241.85 0.00 1 0.00 0.00 sigaction
0.00 241.85 0.00 1 0.00 0.00 strrchr
0.00 241.85 0.00 1 0.00 0.00 uname
Look closely at the entry for my solution:
Code:
% cumulative self self total
time seconds seconds calls s/call s/call name
0.12 240.55 0.28 8 0.04 5.17 whoie
Notice that the time spent in my function alone is the least (for this run), but that the fscanf call totally overwhelms it! The number of seconds spent total is 5.17, and looking at the scanf family calls in the profile list, they are all ranked well above the cummulative total for fgets (and more than one contestant uses fgets)!
In the end, it appears that Dave_Sinkula, Salem, and whoie pass all correctness tests for our working definition of a line. The other entries probably only need minor corrections to pass all tests. Salem's is the clear choice for efficiency. Nice job everyone! Congratulations Salem!
HTH,
Will