A map<string, set<unsigned> > is what I thought about too.
Here's how it might look like. Note that inserting a keyword and a pagenumber, while making sure that they are in sorted order and unique takes just one simple line.
Code:
#include <iostream>
#include <string>
#include <map>
#include <set>
#include <cctype>
int main()
{
typedef std::map<std::string, std::set<unsigned> > Index;
Index index;
std::string line;
unsigned line_num = 0;
//enter empty line to stop
while (std::cout << ++line_num << ": " && std::getline(std::cin, line) && !line.empty()) {
//tokenize
std::string::iterator word_start, word_end = line.begin();
while (
word_start = std::find_if(word_end, line.end(), std::ptr_fun<int, int>(std::isalnum)),
word_end = std::find_if(word_start, line.end(), std::not1(std::ptr_fun<int, int>(std::isalnum))),
word_start != word_end
) {
//could be converted to lower-case
if (word_end - word_start > 3)
index[std::string(word_start, word_end)].insert(line_num);
}
}
//display what we got
for (Index::iterator word_it = index.begin(); word_it != index.end(); ++word_it) {
std::cout << word_it->first << ':';
for (std::set<unsigned>::iterator line_it = word_it->second.begin(); line_it != word_it->second.end(); ++line_it) {
std::cout << ' ' << *line_it;
}
std::cout << '\n';
}
}
However, perhaps if you are going to index once but use the index a lot, you might also just throw the line (page) numbers into a vector and finally sort them and remove duplicates.