#include <iostream>
#include <string>
#include <vector>
#include <curl/curl.h>
#include <regex>
size_t WriteCallback(void* contents, size_t size, size_t nmemb, void* userp) {
((std::string*)userp)->append((char*)contents, size * nmemb);
return size * nmemb;
}
std::vector<std::string> ExtractLinks(const std::string& html) {
std::vector<std::string> links;
std::regex url_regex(R"(<a\s+(?:[^>]*?\s+)?href="([^"]*)")");
std::smatch url_match;
std::string::const_iterator search_start(html.cbegin());
while (std::regex_search(search_start, html.cend(), url_match, url_regex)) {
links.push_back(url_match[1]);
search_start = url_match.suffix().first;
}
return links;
}
int main() {
CURL* curl;
CURLcode res;
std::string readBuffer;
curl_global_init(CURL_GLOBAL_DEFAULT);
curl = curl_easy_init();
if (curl) {
curl_easy_setopt(curl, CURLOPT_URL, "https://www.ejemplo.com");
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &readBuffer);
res = curl_easy_perform(curl);
curl_easy_cleanup(curl);
if (res == CURLE_OK) {
std::vector<std::string> links = ExtractLinks(readBuffer);
std::cout << "Enlaces encontrados:\n";
for (const auto& link : links) {
std::cout << link << std::endl;
}
} else {
std::cerr << "Error al realizar la solicitud: " << curl_easy_strerror(res) << std::endl;
}
}
curl_global_cleanup();
return 0;
}