#include <stdio.
h>
#include <stdlib.h>
#include <string.h>
#include <libxml/HTMLparser.h>
#include <curl/curl.h>
struct MemoryStruct {
char *memory;
size_t size;
};
static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void
*userp) {
size_t realsize = size * nmemb;
struct MemoryStruct *mem = (struct MemoryStruct *)userp;
mem->memory = realloc(mem->memory, mem->size + realsize + 1);
if(mem->memory == NULL) return 0;
memcpy(&(mem->memory[mem->size]), contents, realsize);
mem->size += realsize;
mem->memory[mem->size] = 0;
return realsize;
}
void download_file(const char *url, const char *filename) {
FILE *fp = fopen(filename, "wb");
if (!fp) return;
CURL *curl = curl_easy_init();
if (!curl) return;
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp);
curl_easy_perform(curl);
curl_easy_cleanup(curl);
fclose(fp);
}
void find_and_download_pdfs(const char *html, const char *base_url) {
htmlDocPtr doc = htmlReadMemory(html, strlen(html), base_url, NULL,
HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
if (!doc) return;
xmlNode *root = xmlDocGetRootElement(doc);
xmlNode *cur = root;
for (; cur; cur = xmlNextNode(cur)) {
if (cur->type == XML_ELEMENT_NODE && strcmp((const char *)cur->name, "a")
== 0) {
xmlChar *href = xmlGetProp(cur, (const xmlChar *)"href");
if (href && strstr((const char *)href, ".pdf")) {
char full_url[1024];
if (strstr((const char *)href, "http") == (char *)href) {
snprintf(full_url, sizeof(full_url), "%s", (const char *)href);
} else {
snprintf(full_url, sizeof(full_url), "%s/%s", base_url, (const
char *)href);
}
const char *filename = strrchr(full_url, '/');
if (filename && *(filename + 1)) {
download_file(full_url, filename + 1);
}
xmlFree(href);
}
}
}
xmlFreeDoc(doc);
xmlCleanupParser();
}
int main(int argc, char **argv) {
if (argc != 2) {
printf("Usage: %s <url>\n", argv[0]);
return 1;
}
CURL *curl = curl_easy_init();
if (!curl) return 1;
struct MemoryStruct chunk = { malloc(1), 0 };
curl_easy_setopt(curl, CURLOPT_URL, argv[1]);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&chunk);
curl_easy_perform(curl);
curl_easy_cleanup(curl);
find_and_download_pdfs([Link], argv[1]);
free([Link]);
return 0;
}