Some results from a 5M line file.
Code:
#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
using namespace std;
const int SIZE = 100;
void process ( char *node1, char *node2, int w ) {
*node1 = '\0';
*node2 = '\0';
w = 0;
}
int main ( ) {
ifstream fd("tmp");
char my_string[SIZE];
while (fd.getline(my_string, SIZE)) {
char node1[100], node2[100]; int Weight;
istringstream fichlin(my_string);
fichlin.getline(node1, SIZE, '|');
fichlin.getline(node2, SIZE, '|');
fichlin >> Weight;
process( node1, node2, Weight );
}
return 0;
}
# Using standard C++
$ g++ -W -Wall -ansi -pedantic -O2 foo.cpp
$ time ./a.out
real 0m23.032s
user 0m16.821s
sys 0m0.457s
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
void process ( char *node1, char *node2, int w ) {
*node1 = '\0';
*node2 = '\0';
w = 0;
}
int main ( ) {
char buff[BUFSIZ];
FILE *fp = fopen("tmp","r");
while ( fgets( buff, sizeof buff, fp ) != NULL ) {
char *node1 = strtok(buff,"|");
char *node2 = strtok(NULL,"|");
int Weight = atoi(strtok(NULL,"\n"));
process( node1, node2, Weight );
}
fclose(fp);
return 0;
}
# Using standard C
$ gcc -W -Wall -ansi -pedantic -O2 bar.c
$ time ./a.out
real 0m15.179s
user 0m5.492s
sys 0m0.431s
#include <stdlib.h>
#include <stdio.h>
int main ( ) {
char buff[BUFSIZ];
FILE *fp = fopen("tmp","r");
while ( fread ( buff, 1, sizeof buff, fp ) != 0 ) {
}
fclose(fp);
return 0;
}
# Using standard C API to read blocks of data
$ gcc -W -Wall -ansi -pedantic -O2 foo.c
$ time ./a.out
real 0m12.633s
user 0m0.022s
sys 0m0.448s
#include <unistd.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
int main ( ) {
char buff[BUFSIZ];
int fd = open("tmp", O_RDONLY );
while ( read( fd, buff, BUFSIZ ) > 0 ) {
}
close( fd );
return 0;
}
# Using implementation-specific API to read blocks of data
$ gcc -W -Wall -ansi -pedantic -O2 baz.c
$ time ./a.out
real 0m12.210s
user 0m0.015s
sys 0m0.510s
I didn't bother to write the tokeniser for the last two cases.
But as Hunter2 says, disk I/O is expensive, so it's hard to get away from it.