> If you add new lines later, those lines would come to the right place in balanced binary tree
True, but keeping a tree balanced, and finding the right place are not free actions.
> As Micko suggested, multimaps are optimized for both insertion and lookup
But this application is a single insert and a single sequential lookup. You don't see the benefit of many random accesses (which would obviously suck on a vector).
But then again, the original example sorted on the second field of a line, which would need a more complex comparison function in the case of a vector sort.
Here's my simple test - the input file was the bash manual page.
Code:
#include <iostream>
#include <string>
#include <fstream>
#include <map>
#include <algorithm>
#include <vector>
#include <ctime>
using namespace std;
// use a map
void test1 ( const char *inFile, const char *outFile ) {
multimap < string, string > cont;
string value;
ifstream file (inFile);
while (file >> value) {
cont.insert (make_pair(value, value));
}
multimap <string, string >::iterator it;
ofstream out_file (outFile);
for (it = cont.begin(); it != cont.end(); ++it) {
out_file << it->second << endl;
}
}
// use a vector
void test2 ( const char *inFile, const char *outFile ) {
vector < string > cont;
string value;
ifstream file (inFile);
while (file >> value) {
cont.push_back (value);
}
sort( cont.begin(), cont.end() );
vector < string >::iterator it;
ofstream out_file (outFile);
for (it = cont.begin(); it != cont.end(); ++it) {
out_file << *it << endl;
}
}
// just the I/O overhead
void test3 ( const char *inFile, const char *outFile ) {
string value;
ifstream file (inFile);
ofstream out_file (outFile);
while (file >> value) {
out_file << value << endl;
}
}
int main ( ) {
clock_t t1, t2, t3, t4;
t1 = clock();
test1( "tmp.txt", "tmp1.txt" );
t2 = clock();
test2( "tmp.txt", "tmp2.txt" );
t3 = clock();
test3( "tmp.txt", "tmp3.txt" );
t4 = clock();
cout << "Test1 " << t2 - t1 << endl;
cout << "Test2 " << t3 - t2 << endl;
cout << "Test3 " << t4 - t3 << endl;
return 0;
}
$ g++ -W -Wall -ansi -pedantic -O2 foo.cpp
$ ./a.out
Test1 470000
Test2 420000
Test3 210000
$ ./a.out
Test1 650000
Test2 510000
Test3 280000
$ ./a.out
Test1 640000
Test2 500000
Test3 280000
$ ./a.out
Test1 550000
Test2 490000
Test3 280000
$ ./a.out
Test1 590000
Test2 500000
Test3 270000
$ wc tmp*.txt
36251 36251 215615 tmp1.txt
36251 36251 215615 tmp2.txt
36251 36251 215615 tmp3.txt
4793 36251 256697 tmp.txt
113546 145004 903542 total
$ diff tmp1.txt tmp2.txt
$