I give up. So here are my test codes and runtimes. File is exactly as i described it above. In the examples below i was interested in the first column and in the number in the second column.
Runtime tests were preformed on a 1 GB file
Line reader:
Code:
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <stdexcept>
using namespace std;
void ReadFile(const string file){
fstream fs;
fs.open (file.c_str(), ios::in);
if ( !fs.is_open())
throw runtime_error ("Cannot open file: " + file );
string line;
int e = 0, cnt = 0;
while( fs.good()) {
string t;
getline(fs,line);
if((cnt = sscanf(&line[0], "%*s\tpg|%d|", &t, &e)) == 2){
cout << t << " " << e << endl;
}
}
fs.close();
}
int main (){
try{
ReadFile("testfile");
}catch(runtime_error& e){
cerr << e.what() << "\n";
}
return 0;
}
Runtime:
real 0m45.838s
user 0m30.918s
sys 0m14.437s
Mem reader:
Code:
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <cstring>
#include <stdexcept>
#include <fcntl.h>
using namespace std;
void ReadFile(const string file){
int fd = open(&file[0], O_RDONLY);
if ( fd == -1)
throw runtime_error ("Cannot open file: " + file );
static const auto BUFFER_SIZE = 16*1024;
posix_fadvise(fd, 0, 0, 1); // FDADVICE_SEQUENTIAL
char buf[BUFFER_SIZE + 1];
while(size_t bytes_read = read(fd, buf, BUFFER_SIZE)){
if(bytes_read == (size_t)-1)
throw runtime_error("read failed");
if (!bytes_read)
break;
int cnt =0;
for(char *p = buf; (p = (char*) memchr(p, '\n', (buf + bytes_read) - p)); ++p); // end line detecting
}
}
int main (){
try{
ReadFile("testfile");
}catch(runtime_error& e){
cerr << e.what() << "\n";
}
return 0;
}
Runtime:
real 0m0.280s
user 0m0.100s
sys 0m0.176s
However, as soon as I start extracting information from the line (second solution) runtime increases and is almost equal to the first one (5 seconds faster). Any suggestions on how to improve upon this
thnx
PS
ok to be fair if i add
Code:
for(char *p = buf; (p = (char*) memchr(p, '\n', (buf + bytes_read) - p)); ++p){
cout << "write something " << endl;
}
Runtime:
real 0m12.260s
user 0m2.252s
sys 0m9.985s