My recent posts have revolved around this concept.
Text file, like html, is input and result needs to be a list of distinct words with a wordcount.
Distinct words with count is easy with a map. The problem I had was that I didn't have an easy way to drop out symbols and numbers. That's when I happened on this tokeniterator code.
I modified it from an input source to a stringstream to fit my need.
This last part of the code (see notes in code) then populates a vector with the culled list of all non-distinct words from wordIter. I want to write out wordIter to a map instead of a vector to get a distinct list.
How can I copy wordIter, which is the collection of data, to my map? I don't think I can loop through it with a traditional mapname[value]++.
NOTE: wordIter is noted to reference pred and predicate.
Code:
//: C20:TokenIteratorTest.cpp
#include <fstream>
#include <iostream>
#include <vector>
#include "TokenIterator.h"
#include <sstream>
#include <map>
using namespace std;
int main() {
map<string, int> freq;
std::string newword;
//passing data as stream
std::stringstream oss;
//example defines string
oss << "bunch of words";
std::ostream_iterator<std::string> out(std::cout, "\n");
typedef std::istreambuf_iterator<char> IsbIt;
IsbIt begin(oss), isbEnd;
Delimiters delimiters(" \t\n~;()\"<>:{}[]+-=&*#.,/\\0123456789");
//strips out delimiters from string and produces a list of words (wordIter)
TokenIterator<IsbIt, Delimiters> wordIter(begin, isbEnd, delimiters), end;
//here is where I'm confused.
//The below code populates a vector with the results.
std::vector<std::string> wordlist;
std::copy(wordIter, end, std::back_inserter(wordlist));
std::copy(wordlist.begin(), wordlist.end(), out);
//instead of populating a vector, I want to populate a map
//so I can make the word list a distinct list with word count.
}
//: C20:TokenIterator.h
#ifndef TOKENITERATOR_H
#define TOKENITERATOR_H
#include <string>
#include <iterator>
#include <algorithm>
#include <cctype>
struct Isalpha {
bool operator()(char c) {
return isalpha(c);
}
};
class Delimiters {
std::string exclude;
public:
Delimiters() {}
Delimiters(const std::string& excl)
: exclude(excl) {}
bool operator()(char c) {
return exclude.find(c) == std::string::npos;
}
};
template <class InputIter, class Pred = Isalpha>
class TokenIterator: public std::iterator<
std::input_iterator_tag,std::string,ptrdiff_t>{
InputIter first;
InputIter last;
std::string word;
Pred predicate;
public:
TokenIterator(InputIter begin, InputIter end,
Pred pred = Pred())
: first(begin), last(end), predicate(pred) {
++*this;
}
TokenIterator() {} // End sentinel
// Prefix increment:
TokenIterator& operator++() {
word.resize(0);
first = std::find_if(first, last, predicate);
while (first != last && predicate(*first))
word += *first++;
return *this;
}
// Postfix increment
class Proxy {
std::string word;
public:
Proxy(const std::string& w) : word(w) {}
std::string operator*() { return word; }
};
Proxy operator++(int) {
Proxy d(word);
++*this;
return d;
}
// Produce the actual value:
std::string operator*() const { return word; }
std::string* operator->() const {
return &(operator*());
}
// Compare iterators:
bool operator==(const TokenIterator&) {
return word.size() == 0 && first == last;
}
bool operator!=(const TokenIterator& rv) {
return !(*this == rv);
}
};
#endif // TOKENITERATOR_H ///:~