Mine.
Code:
#include <iostream>
#include <string>
#include <algorithm>
#include <vector>
#include <fstream>
using namespace std;
struct url {
string address;
string extension;
};
//predicate for the find_if function in url_end
//returns true if the char is either a single or double quote, false otherwise
bool quotes ( char c ){
return ( c == '\'' || c == '\"' );
}
//function for find the end of the url, simply looks for the single or double quote
string::iterator url_end ( string::iterator a, string::iterator b ){
//take two iterators which delimit a string
//a will already be at the position of h
string::iterator i = a;
//i will be at the posisition of a " or '
i = find_if( a, b, quotes );
//iterates through the string until "quote" returns true
return i;
}
//looks for the beginning of the protocol http
string::iterator url_beg ( string::iterator a, string::iterator b){
//take two iterators which delimit a string
string link = "<a href="; //present in any link
//seach for "link" string to make sure we found a link and not <img src> or anything else
string::iterator i = search( a, b, link.begin(), link.end() );
if ( i != b ){
//i will the at < in <a href=", return 9 places past it
return i + 9;
}
else return b;
}
//looks for the file extension in the url
string extensions ( string url ){
//accepts a string (the url)
string::iterator j = url.end();
//iterate backwards through url until you encounter a . as in music.wav
while ( *(j) != '.' ){
j--;
}
//create a string delimited by u and url.end()
string d = string( j, url.end() );
return d;
}
//find the urls whithin the string
vector<url> find_urls ( string& link ){
typedef string::iterator iter;
iter a = link.begin(), b = link.end();
//this will hold all of the urls
vector<url> urls;
url add;
//continue until a reaches the end of the string
while ( a != b ){
//set a to the beginning
a = url_beg ( a, b );
//if a doesn't equal the end of the string, a link was found
if ( a != b ){
//creat an iterator to delimit the end of the url
iter c = url_end ( a, b );
//create the string
string d = string ( a, c );
add.address = d;
add.extension = extensions ( d );
urls.push_back( add );
//set a to equal c so we can look through the rest of the string
a = c;
}
}
return urls;
}
int main(){
string link, in, in_file;
cout << "Enter path of the file: ";
getline( cin, in_file );
cout << endl << "Results will output to a file called results.txt" << endl;
ifstream file( in_file.c_str() ); // for testing
ofstream out( "results.txt" );
while ( getline( file, in ) ){
//a string is read into in, and it is added to link to make one big string
link = link + in;
}
//find urls in the link string, put them into a vector
vector<url> urls = find_urls ( link );
if ( urls.empty() ){
cout << "No urls found.";
cin.get();
return 0;
}
else{
cout << "Please enter the extensions you wish to keep ( ex .mp3 ):" << endl;
string ext;
vector<string> extensions;
//input extensions you want to keep
while ( cin >> ext ){
extensions.push_back( ext );
}
vector<url>::iterator i = urls.begin();
//run through each url
while ( i != urls.end() ){
vector<string>::iterator j = extensions.begin();
//check to see if the extension of the current url matches any of
//the ones typed by the user
while ( j != extensions.end() ){
//if we found a match
if ( *j == (*i).extension ){
out << (*i).address << endl;
//no need to walk through the rest
j = extensions.end();
}
else{
//otherwise, check the other extension the user typed
j++;
}
}
//check next url
i++;
}
}
return 0;
}
Nowhere near as short. haha.