Code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>
#include <time.h>
#include <utime.h>
#define CURL_STATICLIB
#include <curl/curl.h>
#include <curl/types.h>
#include <curl/easy.h>
#include <dirent.h>
#include <sys/types.h>
#include <sys/stat.h>
int maxEpisodes = 5;
int maxDays = 1;
int secondsPerDay = 86400;
char *podcastDest = "./podcasts";
char *podcastThumbnailFilename = "folder";
const char *timeFormat[4] = {
"%a, %d %b %Y %T %z",
"%a, %d %b %Y %T %Z",
"%d %b %Y %T %Z",
"%d %b %Y %T %z"};
size_t writeFile(void *ptr, size_t size, size_t nmemb, FILE *stream) {
size_t written;
written = fwrite(ptr, size, nmemb, stream);
return written;
}
int downloadFile(char *fileURL, char *fileName, time_t fileDate, xmlChar *podcastTitle) {
CURL *curl;
FILE *fp;
CURLcode res;
printf("Downloading %s (%s)\n", fileName, fileURL);
curl = curl_easy_init();
if (curl) {
fp = fopen(fileName,"wb");
curl_easy_setopt(curl, CURLOPT_URL, fileURL);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, "true"); // follow redirects
curl_easy_setopt(curl, CURLOPT_AUTOREFERER, "true"); // set referer on redirect
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 10); // stop after 10 redirects
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeFile);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
res = curl_easy_perform(curl);
if(CURLE_OK == res) {
double val;
/* check for bytes downloaded */
res = curl_easy_getinfo(curl, CURLINFO_SIZE_DOWNLOAD, &val);
if((CURLE_OK == res) && (val>0))
printf("Data downloaded: %0.0f bytes.\n", val);
} else {
fprintf(stderr, "Error while fetching '%s' : %s\n", fileURL, curl_easy_strerror(res));
}
curl_easy_cleanup(curl);
fclose(fp);
}
return 0;
}
void parseItem (xmlDocPtr doc, xmlNodePtr cur, xmlChar *podcastTitle, xmlChar *podcastPubdate) {
xmlChar *episodeTitle;
xmlChar *episodeURL;
xmlChar *episodePubdate;
cur = cur->xmlChildrenNode;
xmlAttrPtr attr;
struct tm podcastPubdateTM;
struct tm episodePubdateTM;
int newinteger;
time_t podcastPubdateMK;
time_t episodePubdateMK;
int timeFormatTotal = sizeof(timeFormat)/sizeof(char *);
int timeFormatIdentified = 0;
int timeFormatIndex = 0;
do {
if(strptime(podcastPubdate, timeFormat[timeFormatIndex],&podcastPubdateTM) != NULL)
timeFormatIdentified = 1;
timeFormatIndex++;
} while (timeFormatIdentified == (int) 0 && timeFormatIndex < timeFormatTotal);
if (timeFormatIdentified == (int) 0 ) {
fprintf(stderr,"Could not identify the time format!\n");
exit (1);
}
timeFormatIdentified = 0;
timeFormatIndex = 0;
while (cur != NULL) {
if ((!xmlStrcmp(cur->name, (const xmlChar *)"title"))){
episodeTitle = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
} else if ((!xmlStrcmp(cur->name, (const xmlChar *)"pubDate"))) {
episodePubdate = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
do {
if(strptime(episodePubdate, timeFormat[timeFormatIndex],&episodePubdateTM) != NULL)
timeFormatIdentified = 1;
timeFormatIndex++;
} while (timeFormatIdentified == (int) 0 && timeFormatIndex < timeFormatTotal);
if (timeFormatIdentified == (int) 0 ) {
fprintf(stderr,"Could not identify the time format!\n");
exit (1);
}
podcastPubdateMK = mktime(&podcastPubdateTM);
episodePubdateMK = mktime(&episodePubdateTM);
if(episodePubdateMK < (podcastPubdateMK - (maxDays*secondsPerDay))){
printf("Episode is too old: %s\n",episodeTitle);
return;
}
} else if ((!xmlStrcmp(cur->name, (const xmlChar *)"enclosure"))) {
for(attr = cur->properties; NULL != attr; attr = attr->next) {
if ((!xmlStrcmp(attr->name, (const xmlChar *)"url"))) {
episodeURL = xmlNodeListGetString(doc, attr->children, 1);
}
}
}
cur = cur->next;
}
char *fileNameFormat = "%s/%s";
size_t episodeFileNameLength;
size_t podcastDestLength;
char *episodeFileName = "";
// Abort this function if not all required data is passed.
if(!episodeTitle || !episodePubdate || !episodeURL) {
printf("Could not collect all required data\n");
return;
}
// Check if target directory exists, if not: create it.
struct stat checkDir;
if(!stat(podcastDest,&checkDir) == 0)
mkdir(podcastDest,0777);
// extract file name from url
char *token;
char *search = "/";
xmlChar *episodeURLCpy;
strcpy(episodeURLCpy, episodeURL);
token = strtok(episodeURLCpy, search);
do {
episodeFileName = token;
token = strtok(NULL, search);
} while (token != NULL);
printf("%s\n", episodeFileName);
// Joining the target directory and filename
episodeFileNameLength = strlen(episodeFileName);
podcastDestLength = strlen(podcastDest);
episodeFileNameLength += podcastDestLength;
char episodeFullFileName[episodeFileNameLength+2];
sprintf(episodeFullFileName,fileNameFormat,podcastDest,episodeFileName);
printf("%s\n", episodeFullFileName);
// Check if file is already downloaded, if so, skip this round.
FILE *episodeFile;
if (episodeFile = fopen(episodeFullFileName, "r"))
{
fclose(episodeFile);
printf("file already exists: %s\n", episodeFileName);
return;
}
downloadFile(episodeURL, episodeFullFileName, episodePubdateMK, podcastTitle);
xmlFree(episodeTitle);
xmlFree(episodePubdate);
xmlFree(episodeURL);
return;
}
static void parseRSS(char *podcastRssURL) {
xmlDocPtr doc;
xmlNodePtr cur;
xmlAttrPtr attr;
xmlChar *podcastTitle;
xmlChar *podcastThumbnailURL;
xmlChar *podcastPubdate;
int itemNo = 1;
doc = xmlParseFile(podcastRssURL);
if (doc == NULL ) {
fprintf(stderr,"Could not parse this document,\n");
return;
}
cur = xmlDocGetRootElement(doc);
if (cur == NULL) {
fprintf(stderr,"This podcast does not contain anything.\n");
xmlFreeDoc(doc);
return;
}
if (!xmlStrEqual(cur->name, (const xmlChar *) "rss")) {
fprintf(stderr,"This podcast does not seem to be valid.");
xmlFreeDoc(doc);
return;
}
cur = cur->xmlChildrenNode;
while (cur != NULL) {
if ((!xmlStrcmp(cur->name, (const xmlChar *)"channel"))){
cur = cur->xmlChildrenNode;
while (cur != NULL) {
if ((!xmlStrcmp(cur->name, (const xmlChar *)"item")) && maxEpisodes >= itemNo){
if (!podcastTitle || !podcastPubdate) {
fprintf(stderr,"Could not find all required info in sheet. \n");
exit (1);
}
parseItem (doc, cur, podcastTitle, podcastPubdate);
itemNo++;
} else if ((!xmlStrcmp(cur->name, (const xmlChar *)"title")) && podcastTitle != NULL){
podcastTitle = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
printf("Podcast Title: %s\n", podcastTitle);
} else if ((!xmlStrcmp(cur->name, (const xmlChar *)"thumbnail")) && podcastThumbnailURL != NULL){
for(attr = cur->properties; NULL != attr ; attr = attr->next) {
if ((!xmlStrcmp(attr->name, (const xmlChar *)"url"))) {
podcastThumbnailURL = xmlNodeListGetString(doc, attr->children, 1);
printf("Thumbnail location: %s\n", podcastThumbnailURL);
// downloadFile(char *fileURL, char *fileName, time_t fileDate, xmlChar *podcastTitle) {
}
}
} else if ((!xmlStrcmp(cur->name, (const xmlChar *)"pubDate")) && podcastPubdate != NULL){
podcastPubdate = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1);
printf("PubDate: %s\n", podcastPubdate);
}
cur = cur->next;
}
return;
}
cur = cur->next;
}
xmlFree(podcastTitle);
xmlFree(podcastThumbnailURL);
xmlFree(podcastPubdate);
xmlFreeDoc(doc);
return;
}
int main(int argc, char **argv) {
char *podcastRssURL;
if (argc <= 1) {
printf("Usage: %s docname\n", argv[0]);
return(0);
}
podcastRssURL = argv[1];
parseRSS (podcastRssURL);
return (1);
}