Hi I'm making a program which takes arguments at command line for a specific website and filename and then saves the html code for you.
I have that working fine but I'd like to strip the HTML tags as well so it only saves the text of a webpage, I understand that won't work perfectly but I can't get it to work at all!
Need to put in something like this but I'm unsure where
Code:
if (c == '<' || c == '>') {
in_tag = (c == '<') ? 1 : 0;
Here's my full program
Code:
#include <curl/curl.h>#include <stdio.h>
size_t write_data(void *ptr, size_t size, size_t nmeb, void *stream) {
return fwrite(ptr, size, nmeb, stream);
}
int main(int argc, char *argv[]) {
//checks there is the required amount of arguments
if (argc == 3) {
char *getcwd(char *buf, size_t size);
char cwd[1024];
int confirm;
printf("Saving website \"%s\".\n", argv[1]);
printf("To file %s\n\n", argv[2]);
//request save file confirmation from user
printf("Are these details correct? (1 = Yes, 0 = No)\n\n");
scanf("%d", &confirm);
if (confirm == 1) {
//tells the user where the file has been saved
if (getcwd(cwd, sizeof (cwd)) != NULL)
fprintf(stdout, "Document saved in: \"%s\"\n\n", cwd);
//opens file for writing (doesn't need to exist)
FILE * file = (FILE *) fopen(argv[2], "w+");
if (!file) {
perror("File Open:");
exit(0);
}
CURL *handle = curl_easy_init();
//collecting the html from command line specified argument
curl_easy_setopt(handle, CURLOPT_URL, argv[1]);
curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, write_data);
curl_easy_setopt(handle, CURLOPT_WRITEDATA, file);
curl_easy_perform(handle);
curl_easy_cleanup(handle);
}//user chooses not to save
else if (confirm == 0) {
printf("File not saved\n");
return 0;
}//invalid input by user
else {
printf("Incorrect input\n");
return 0;
}
} else {
//showing correct usage of command line argument
printf("Correct usage:\n\n \"./gethtml http://www.example.com filename.txt\"\n\n");
return (0);
}
}
That doesn't include an attempt at stripping the HTML as I've been trying all day and I'm clueless right nowAny help much appreciated