This code isn't working, and I don't understand why. It is supposed to collect all images and links on the site and indicate the pages where they are located, but for some reason, it is not functioning properly. If anyone can help, I would appreciate any tips on how to fix this code.
Code:
#include <iostream>
#include <string>
#include <vector>
#include <curl/curl.h>
#include <regex>
using namespace std;
size_t WriteCallback(void* contents, size_t size, size_t nmemb, void* userp) {
((string*)userp)->append((char*)contents, size * nmemb);
return size * nmemb;
}
string fetch_url(const string& url) {
CURL* curl;
CURLcode res;
string readBuffer;
curl = curl_easy_init();
if(curl) {
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &readBuffer);
res = curl_easy_perform(curl);
curl_easy_cleanup(curl);
}
return readBuffer;
}
vector<pair<string, string>> parse_images(const string& html, const string& page_url) {
vector<pair<string, string>> images;
regex img_regex("<img[^>]+src=\"([^\"]+)\"", regex::icase);
smatch match;
string::const_iterator searchStart(html.cbegin());
while (regex_search(searchStart, html.cend(), match, img_regex)) {
images.push_back(make_pair(match[1], page_url));
searchStart = match.suffix().first;
}
return images;
}
vector<pair<string, string>> parse_links(const string& html, const string& page_url) {
vector<pair<string, string>> links;
regex link_regex("<a[^>]+href=\"([^\"]+)\"", regex::icase);
smatch match;
string::const_iterator searchStart(html.cbegin());
while (regex_search(searchStart, html.cend(), match, link_regex)) {
links.push_back(make_pair(match[1], page_url));
searchStart = match.suffix().first;
}
return links;
}
int main() {
string url = "https://www.cprogramming.com";
string html = fetch_url(url);
vector<pair<string, string>> images = parse_images(html, url);
vector<pair<string, string>> links = parse_links(html, url);
cout << "Images found:\n";
for (const auto& img : images) {
cout << "Image URL: " << img.first << ", Page URL: " << img.second << "\n";
}
cout << "\nLinks found:\n";
for (const auto& link : links) {
cout << "Link URL: " << link.first << ", Page URL: " << link.second << "\n";
}
return 0;
}