Richard W.M. Jones
2023-Feb-07 16:29 UTC
[Libguestfs] [PATCH nbdkit v2 0/4] curl: Use a curl handle pool
NOTE! At least patch 4 should not be applied, and maybe the whole series is a bust. I am mainly posting this on the list for discussion and maybe to archive it. Version 1 was here: https://listman.redhat.com/archives/libguestfs/2023-February/030610.html This patch series introduces the concept of a pool of libcurl handles, instead of always associating one libcurl handle with one NBD connection. This gives you a bit more flexibility, eg. you can have a highly concurrent multi-conn NBD connection, but not overwhelm the remote web server with HTTP connections. Or vice versa. Compared to the earlier version, I have pushed a couple of simple patches from the old series upstream. The remaining patches are tidied up a bit and better tested, but are largely the same. Patches 1-3 on their own are performance neutral in the cases I tested where you have approximately the same number of NBD connections as web server connections (as expected). Patch 4 kills performance, for reasons discussed here: https://listman.redhat.com/archives/libguestfs/2023-February/030618.html I was not able to fix this although I have tried several approaches. Rich.
Richard W.M. Jones
2023-Feb-07 16:29 UTC
[Libguestfs] [PATCH nbdkit v2 1/4] curl: Split out the libcurl handle from the plugin handle
In a future commit we will create a pool of libcurl handles. In order to prepare for that, split out the libcurl handle and associated fields and buffers, from the plugin handle. The plugin handle (for the moment) contains a pointer to the libcurl handle so there is still a 1-1 relationship between them. This is mostly just refactoring/renaming, except for the callback and scripts functions. The callback functions (read_cb etc) only operate on libcurl handles, so pass the libcurl handle (not the plugin handle) as the opaque parameter. The scripts also only care about libcurl handles, so pass the libcurl handle to do_scripts. --- plugins/curl/curldefs.h | 23 +++- plugins/curl/curl.c | 225 +++++++++++++++++++++------------------- plugins/curl/scripts.c | 34 +++--- 3 files changed, 155 insertions(+), 127 deletions(-) diff --git a/plugins/curl/curldefs.h b/plugins/curl/curldefs.h index 5b88741d5..05b710ab2 100644 --- a/plugins/curl/curldefs.h +++ b/plugins/curl/curldefs.h @@ -1,5 +1,5 @@ /* nbdkit - * Copyright (C) 2014-2020 Red Hat Inc. + * Copyright (C) 2014-2023 Red Hat Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -83,20 +83,37 @@ extern int curl_debug_verbose; /* The per-connection handle. */ struct handle { - CURL *c; int readonly; + struct curl_handle *ch; +}; + +/* The libcurl handle and some associated fields and buffers. */ +struct curl_handle { + /* The underlying curl handle. */ + CURL *c; + + /* These fields are used/initialized when we create the handle. */ bool accept_range; int64_t exportsize; + char errbuf[CURL_ERROR_SIZE]; + + /* Before doing a read or write operation, set these to point to the + * buffer where you want the data to be stored / come from. Note + * the confusing terminology from libcurl: write_* is used when + * reading, read_* is used when writing. + */ char *write_buf; uint32_t write_count; const char *read_buf; uint32_t read_count; + + /* Used by scripts.c */ struct curl_slist *headers_copy; }; /* scripts.c */ -extern int do_scripts (struct handle *h); +extern int do_scripts (struct curl_handle *ch); extern void scripts_unload (void); #endif /* NBDKIT_CURLDEFS_H */ diff --git a/plugins/curl/curl.c b/plugins/curl/curl.c index b4a28155d..d90438e30 100644 --- a/plugins/curl/curl.c +++ b/plugins/curl/curl.c @@ -1,5 +1,5 @@ /* nbdkit - * Copyright (C) 2014-2020 Red Hat Inc. + * Copyright (C) 2014-2023 Red Hat Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -427,10 +427,10 @@ curl_config_complete (void) "user-agent=<USER-AGENT> Send user-agent header for HTTP/HTTPS." /* Translate CURLcode to nbdkit_error. */ -#define display_curl_error(h, r, fs, ...) \ +#define display_curl_error(ch, r, fs, ...) \ do { \ nbdkit_error ((fs ": %s: %s"), ## __VA_ARGS__, \ - curl_easy_strerror ((r)), (h)->errbuf); \ + curl_easy_strerror ((r)), (ch)->errbuf); \ } while (0) static int debug_cb (CURL *handle, curl_infotype type, @@ -458,8 +458,15 @@ curl_open (int readonly) } h->readonly = readonly; - h->c = curl_easy_init (); - if (h->c == NULL) { + h->ch = calloc (1, sizeof *h->ch); + if (h->ch == NULL) { + nbdkit_error ("calloc: %m"); + free (h); + return NULL; + } + + h->ch->c = curl_easy_init (); + if (h->ch->c == NULL) { nbdkit_error ("curl_easy_init: failed: %m"); goto err; } @@ -468,29 +475,29 @@ curl_open (int readonly) /* NB: Constants must be explicitly long because the parameter is * varargs. */ - curl_easy_setopt (h->c, CURLOPT_VERBOSE, 1L); - curl_easy_setopt (h->c, CURLOPT_DEBUGFUNCTION, debug_cb); + curl_easy_setopt (h->ch->c, CURLOPT_VERBOSE, 1L); + curl_easy_setopt (h->ch->c, CURLOPT_DEBUGFUNCTION, debug_cb); } - curl_easy_setopt (h->c, CURLOPT_ERRORBUFFER, h->errbuf); + curl_easy_setopt (h->ch->c, CURLOPT_ERRORBUFFER, h->ch->errbuf); r = CURLE_OK; if (unix_socket_path) { #if HAVE_CURLOPT_UNIX_SOCKET_PATH - r = curl_easy_setopt (h->c, CURLOPT_UNIX_SOCKET_PATH, unix_socket_path); + r = curl_easy_setopt (h->ch->c, CURLOPT_UNIX_SOCKET_PATH, unix_socket_path); #else r = CURLE_UNKNOWN_OPTION; #endif } if (r != CURLE_OK) { - display_curl_error (h, r, "curl_easy_setopt: CURLOPT_UNIX_SOCKET_PATH"); + display_curl_error (h->ch, r, "curl_easy_setopt: CURLOPT_UNIX_SOCKET_PATH"); goto err; } /* Set the URL. */ - r = curl_easy_setopt (h->c, CURLOPT_URL, url); + r = curl_easy_setopt (h->ch->c, CURLOPT_URL, url); if (r != CURLE_OK) { - display_curl_error (h, r, "curl_easy_setopt: CURLOPT_URL [%s]", url); + display_curl_error (h->ch, r, "curl_easy_setopt: CURLOPT_URL [%s]", url); goto err; } @@ -502,80 +509,80 @@ curl_open (int readonly) * For use of CURLOPT_NOSIGNAL see: * https://curl.se/libcurl/c/CURLOPT_NOSIGNAL.html */ - curl_easy_setopt (h->c, CURLOPT_NOSIGNAL, 1L); - curl_easy_setopt (h->c, CURLOPT_AUTOREFERER, 1L); + curl_easy_setopt (h->ch->c, CURLOPT_NOSIGNAL, 1L); + curl_easy_setopt (h->ch->c, CURLOPT_AUTOREFERER, 1L); if (followlocation) - curl_easy_setopt (h->c, CURLOPT_FOLLOWLOCATION, 1L); - curl_easy_setopt (h->c, CURLOPT_FAILONERROR, 1L); + curl_easy_setopt (h->ch->c, CURLOPT_FOLLOWLOCATION, 1L); + curl_easy_setopt (h->ch->c, CURLOPT_FAILONERROR, 1L); /* Options. */ if (cainfo) { if (strlen (cainfo) == 0) - curl_easy_setopt (h->c, CURLOPT_CAINFO, NULL); + curl_easy_setopt (h->ch->c, CURLOPT_CAINFO, NULL); else - curl_easy_setopt (h->c, CURLOPT_CAINFO, cainfo); + curl_easy_setopt (h->ch->c, CURLOPT_CAINFO, cainfo); } if (capath) - curl_easy_setopt (h->c, CURLOPT_CAPATH, capath); + curl_easy_setopt (h->ch->c, CURLOPT_CAPATH, capath); if (cookie) - curl_easy_setopt (h->c, CURLOPT_COOKIE, cookie); + curl_easy_setopt (h->ch->c, CURLOPT_COOKIE, cookie); if (cookiefile) - curl_easy_setopt (h->c, CURLOPT_COOKIEFILE, cookiefile); + curl_easy_setopt (h->ch->c, CURLOPT_COOKIEFILE, cookiefile); if (cookiejar) - curl_easy_setopt (h->c, CURLOPT_COOKIEJAR, cookiejar); + curl_easy_setopt (h->ch->c, CURLOPT_COOKIEJAR, cookiejar); if (headers) - curl_easy_setopt (h->c, CURLOPT_HTTPHEADER, headers); + curl_easy_setopt (h->ch->c, CURLOPT_HTTPHEADER, headers); if (password) - curl_easy_setopt (h->c, CURLOPT_PASSWORD, password); + curl_easy_setopt (h->ch->c, CURLOPT_PASSWORD, password); #ifndef HAVE_CURLOPT_PROTOCOLS_STR if (protocols != CURLPROTO_ALL) { - curl_easy_setopt (h->c, CURLOPT_PROTOCOLS, protocols); - curl_easy_setopt (h->c, CURLOPT_REDIR_PROTOCOLS, protocols); + curl_easy_setopt (h->ch->c, CURLOPT_PROTOCOLS, protocols); + curl_easy_setopt (h->ch->c, CURLOPT_REDIR_PROTOCOLS, protocols); } #else /* HAVE_CURLOPT_PROTOCOLS_STR (new in 7.85.0) */ if (protocols) { - curl_easy_setopt (h->c, CURLOPT_PROTOCOLS_STR, protocols); - curl_easy_setopt (h->c, CURLOPT_REDIR_PROTOCOLS_STR, protocols); + curl_easy_setopt (h->ch->c, CURLOPT_PROTOCOLS_STR, protocols); + curl_easy_setopt (h->ch->c, CURLOPT_REDIR_PROTOCOLS_STR, protocols); } #endif /* HAVE_CURLOPT_PROTOCOLS_STR */ if (proxy) - curl_easy_setopt (h->c, CURLOPT_PROXY, proxy); + curl_easy_setopt (h->ch->c, CURLOPT_PROXY, proxy); if (proxy_password) - curl_easy_setopt (h->c, CURLOPT_PROXYPASSWORD, proxy_password); + curl_easy_setopt (h->ch->c, CURLOPT_PROXYPASSWORD, proxy_password); if (proxy_user) - curl_easy_setopt (h->c, CURLOPT_PROXYUSERNAME, proxy_user); + curl_easy_setopt (h->ch->c, CURLOPT_PROXYUSERNAME, proxy_user); if (!sslverify) { - curl_easy_setopt (h->c, CURLOPT_SSL_VERIFYPEER, 0L); - curl_easy_setopt (h->c, CURLOPT_SSL_VERIFYHOST, 0L); + curl_easy_setopt (h->ch->c, CURLOPT_SSL_VERIFYPEER, 0L); + curl_easy_setopt (h->ch->c, CURLOPT_SSL_VERIFYHOST, 0L); } if (ssl_version) { if (strcmp (ssl_version, "tlsv1") == 0) - curl_easy_setopt (h->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1); + curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1); else if (strcmp (ssl_version, "sslv2") == 0) - curl_easy_setopt (h->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_SSLv2); + curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_SSLv2); else if (strcmp (ssl_version, "sslv3") == 0) - curl_easy_setopt (h->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_SSLv3); + curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_SSLv3); else if (strcmp (ssl_version, "tlsv1.0") == 0) - curl_easy_setopt (h->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_0); + curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_0); else if (strcmp (ssl_version, "tlsv1.1") == 0) - curl_easy_setopt (h->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_1); + curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_1); else if (strcmp (ssl_version, "tlsv1.2") == 0) - curl_easy_setopt (h->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2); + curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2); else if (strcmp (ssl_version, "tlsv1.3") == 0) - curl_easy_setopt (h->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_3); + curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_3); else { - display_curl_error (h, r, "curl_easy_setopt: CURLOPT_SSLVERSION [%s]", + display_curl_error (h->ch, r, "curl_easy_setopt: CURLOPT_SSLVERSION [%s]", ssl_version); goto err; } } if (ssl_cipher_list) - curl_easy_setopt (h->c, CURLOPT_SSL_CIPHER_LIST, ssl_cipher_list); + curl_easy_setopt (h->ch->c, CURLOPT_SSL_CIPHER_LIST, ssl_cipher_list); if (tls13_ciphers) { #if (LIBCURL_VERSION_MAJOR > 7) || \ (LIBCURL_VERSION_MAJOR == 7 && LIBCURL_VERSION_MINOR >= 61) - curl_easy_setopt (h->c, CURLOPT_TLS13_CIPHERS, tls13_ciphers); + curl_easy_setopt (h->ch->c, CURLOPT_TLS13_CIPHERS, tls13_ciphers); #else /* This is not available before curl-7.61 */ nbdkit_error ("tls13-ciphers is not supported in this build of " @@ -584,18 +591,18 @@ curl_open (int readonly) #endif } if (tcp_keepalive) - curl_easy_setopt (h->c, CURLOPT_TCP_KEEPALIVE, 1L); + curl_easy_setopt (h->ch->c, CURLOPT_TCP_KEEPALIVE, 1L); if (!tcp_nodelay) - curl_easy_setopt (h->c, CURLOPT_TCP_NODELAY, 0L); + curl_easy_setopt (h->ch->c, CURLOPT_TCP_NODELAY, 0L); if (timeout > 0) /* NB: The cast is required here because the parameter is varargs * treated as long, and not type safe. */ - curl_easy_setopt (h->c, CURLOPT_TIMEOUT, (long) timeout); + curl_easy_setopt (h->ch->c, CURLOPT_TIMEOUT, (long) timeout); if (user) - curl_easy_setopt (h->c, CURLOPT_USERNAME, user); + curl_easy_setopt (h->ch->c, CURLOPT_USERNAME, user); if (user_agent) - curl_easy_setopt (h->c, CURLOPT_USERAGENT, user_agent); + curl_easy_setopt (h->ch->c, CURLOPT_USERAGENT, user_agent); /* Get the file size and also whether the remote HTTP server * supports byte ranges. @@ -603,23 +610,24 @@ curl_open (int readonly) * We must run the scripts if necessary and set headers in the * handle. */ - if (do_scripts (h) == -1) goto err; - h->accept_range = false; - curl_easy_setopt (h->c, CURLOPT_NOBODY, 1L); /* No Body, not nobody! */ - curl_easy_setopt (h->c, CURLOPT_HEADERFUNCTION, header_cb); - curl_easy_setopt (h->c, CURLOPT_HEADERDATA, h); - r = curl_easy_perform (h->c); + if (do_scripts (h->ch) == -1) goto err; + h->ch->accept_range = false; + curl_easy_setopt (h->ch->c, CURLOPT_NOBODY, 1L); /* No Body, not nobody! */ + curl_easy_setopt (h->ch->c, CURLOPT_HEADERFUNCTION, header_cb); + curl_easy_setopt (h->ch->c, CURLOPT_HEADERDATA, h->ch); + r = curl_easy_perform (h->ch->c); if (r != CURLE_OK) { - display_curl_error (h, r, + display_curl_error (h->ch, r, "problem doing HEAD request to fetch size of URL [%s]", url); goto err; } #ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T - r = curl_easy_getinfo (h->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &o); + r = curl_easy_getinfo (h->ch->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &o); if (r != CURLE_OK) { - display_curl_error (h, r, "could not get length of remote file [%s]", url); + display_curl_error (h->ch, r, + "could not get length of remote file [%s]", url); goto err; } @@ -629,11 +637,12 @@ curl_open (int readonly) goto err; } - h->exportsize = o; + h->ch->exportsize = o; #else - r = curl_easy_getinfo (h->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d); + r = curl_easy_getinfo (h->ch->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d); if (r != CURLE_OK) { - display_curl_error (h, r, "could not get length of remote file [%s]", url); + display_curl_error (h->ch, r, + "could not get length of remote file [%s]", url); goto err; } @@ -643,13 +652,13 @@ curl_open (int readonly) goto err; } - h->exportsize = d; + h->ch->exportsize = d; #endif - nbdkit_debug ("content length: %" PRIi64, h->exportsize); + nbdkit_debug ("content length: %" PRIi64, h->ch->exportsize); if (ascii_strncasecmp (url, "http://", strlen ("http://")) == 0 || ascii_strncasecmp (url, "https://", strlen ("https://")) == 0) { - if (!h->accept_range) { + if (!h->ch->accept_range) { nbdkit_error ("server does not support 'range' (byte range) requests"); goto err; } @@ -658,20 +667,21 @@ curl_open (int readonly) } /* Get set up for reading and writing. */ - curl_easy_setopt (h->c, CURLOPT_HEADERFUNCTION, NULL); - curl_easy_setopt (h->c, CURLOPT_HEADERDATA, NULL); - curl_easy_setopt (h->c, CURLOPT_WRITEFUNCTION, write_cb); - curl_easy_setopt (h->c, CURLOPT_WRITEDATA, h); + curl_easy_setopt (h->ch->c, CURLOPT_HEADERFUNCTION, NULL); + curl_easy_setopt (h->ch->c, CURLOPT_HEADERDATA, NULL); + curl_easy_setopt (h->ch->c, CURLOPT_WRITEFUNCTION, write_cb); + curl_easy_setopt (h->ch->c, CURLOPT_WRITEDATA, h->ch); if (!readonly) { - curl_easy_setopt (h->c, CURLOPT_READFUNCTION, read_cb); - curl_easy_setopt (h->c, CURLOPT_READDATA, h); + curl_easy_setopt (h->ch->c, CURLOPT_READFUNCTION, read_cb); + curl_easy_setopt (h->ch->c, CURLOPT_READDATA, h->ch); } return h; err: - if (h->c) - curl_easy_cleanup (h->c); + if (h->ch->c) + curl_easy_cleanup (h->ch->c); + free (h->ch); free (h); return NULL; } @@ -724,7 +734,7 @@ debug_cb (CURL *handle, curl_infotype type, static size_t header_cb (void *ptr, size_t size, size_t nmemb, void *opaque) { - struct handle *h = opaque; + struct curl_handle *ch = opaque; size_t realsize = size * nmemb; const char *header = ptr; const char *end = header + realsize; @@ -747,7 +757,7 @@ header_cb (void *ptr, size_t size, size_t nmemb, void *opaque) p++; if (p == end || !*p) - h->accept_range = true; + ch->accept_range = true; } } @@ -760,9 +770,10 @@ curl_close (void *handle) { struct handle *h = handle; - curl_easy_cleanup (h->c); - if (h->headers_copy) - curl_slist_free_all (h->headers_copy); + curl_easy_cleanup (h->ch->c); + if (h->ch->headers_copy) + curl_slist_free_all (h->ch->headers_copy); + free (h->ch); free (h); } @@ -774,7 +785,7 @@ curl_get_size (void *handle) { struct handle *h = handle; - return h->exportsize; + return h->ch->exportsize; } /* Multi-conn is safe for read-only connections, but HTTP does not @@ -806,25 +817,25 @@ curl_pread (void *handle, void *buf, uint32_t count, uint64_t offset) char range[128]; /* Run the scripts if necessary and set headers in the handle. */ - if (do_scripts (h) == -1) return -1; + if (do_scripts (h->ch) == -1) return -1; /* Tell the write_cb where we want the data to be written. write_cb * will update this if the data comes in multiple sections. */ - h->write_buf = buf; - h->write_count = count; + h->ch->write_buf = buf; + h->ch->write_count = count; - curl_easy_setopt (h->c, CURLOPT_HTTPGET, 1L); + curl_easy_setopt (h->ch->c, CURLOPT_HTTPGET, 1L); /* Make an HTTP range request. */ snprintf (range, sizeof range, "%" PRIu64 "-%" PRIu64, offset, offset + count); - curl_easy_setopt (h->c, CURLOPT_RANGE, range); + curl_easy_setopt (h->ch->c, CURLOPT_RANGE, range); /* The assumption here is that curl will look after timeouts. */ - r = curl_easy_perform (h->c); + r = curl_easy_perform (h->ch->c); if (r != CURLE_OK) { - display_curl_error (h, r, "pread: curl_easy_perform"); + display_curl_error (h->ch, r, "pread: curl_easy_perform"); return -1; } @@ -833,7 +844,7 @@ curl_pread (void *handle, void *buf, uint32_t count, uint64_t offset) */ /* As far as I understand the cURL API, this should never happen. */ - assert (h->write_count == 0); + assert (h->ch->write_count == 0); return 0; } @@ -841,22 +852,22 @@ curl_pread (void *handle, void *buf, uint32_t count, uint64_t offset) static size_t write_cb (char *ptr, size_t size, size_t nmemb, void *opaque) { - struct handle *h = opaque; + struct curl_handle *ch = opaque; size_t orig_realsize = size * nmemb; size_t realsize = orig_realsize; - assert (h->write_buf); + assert (ch->write_buf); /* Don't read more than the requested amount of data, even if the * server or libcurl sends more. */ - if (realsize > h->write_count) - realsize = h->write_count; + if (realsize > ch->write_count) + realsize = ch->write_count; - memcpy (h->write_buf, ptr, realsize); + memcpy (ch->write_buf, ptr, realsize); - h->write_count -= realsize; - h->write_buf += realsize; + ch->write_count -= realsize; + ch->write_buf += realsize; return orig_realsize; } @@ -870,25 +881,25 @@ curl_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset) char range[128]; /* Run the scripts if necessary and set headers in the handle. */ - if (do_scripts (h) == -1) return -1; + if (do_scripts (h->ch) == -1) return -1; /* Tell the read_cb where we want the data to be read from. read_cb * will update this if the data comes in multiple sections. */ - h->read_buf = buf; - h->read_count = count; + h->ch->read_buf = buf; + h->ch->read_count = count; - curl_easy_setopt (h->c, CURLOPT_UPLOAD, 1L); + curl_easy_setopt (h->ch->c, CURLOPT_UPLOAD, 1L); /* Make an HTTP range request. */ snprintf (range, sizeof range, "%" PRIu64 "-%" PRIu64, offset, offset + count); - curl_easy_setopt (h->c, CURLOPT_RANGE, range); + curl_easy_setopt (h->ch->c, CURLOPT_RANGE, range); /* The assumption here is that curl will look after timeouts. */ - r = curl_easy_perform (h->c); + r = curl_easy_perform (h->ch->c); if (r != CURLE_OK) { - display_curl_error (h, r, "pwrite: curl_easy_perform"); + display_curl_error (h->ch, r, "pwrite: curl_easy_perform"); return -1; } @@ -897,7 +908,7 @@ curl_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset) */ /* As far as I understand the cURL API, this should never happen. */ - assert (h->read_count == 0); + assert (h->ch->read_count == 0); return 0; } @@ -905,17 +916,17 @@ curl_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset) static size_t read_cb (void *ptr, size_t size, size_t nmemb, void *opaque) { - struct handle *h = opaque; + struct curl_handle *ch = opaque; size_t realsize = size * nmemb; - assert (h->read_buf); - if (realsize > h->read_count) - realsize = h->read_count; + assert (ch->read_buf); + if (realsize > ch->read_count) + realsize = ch->read_count; - memcpy (ptr, h->read_buf, realsize); + memcpy (ptr, ch->read_buf, realsize); - h->read_count -= realsize; - h->read_buf += realsize; + ch->read_count -= realsize; + ch->read_buf += realsize; return realsize; } diff --git a/plugins/curl/scripts.c b/plugins/curl/scripts.c index 892f7da28..9b674fc83 100644 --- a/plugins/curl/scripts.c +++ b/plugins/curl/scripts.c @@ -1,5 +1,5 @@ /* nbdkit - * Copyright (C) 2014-2020 Red Hat Inc. + * Copyright (C) 2014-2023 Red Hat Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -80,8 +80,8 @@ scripts_unload (void) free (cookies_from_script); } -static int run_header_script (struct handle *); -static int run_cookie_script (struct handle *); +static int run_header_script (struct curl_handle *); +static int run_cookie_script (struct curl_handle *); static void error_from_tmpfile (const char *what, const char *tmpfile); /* This is called from any thread just before we make a curl request. @@ -90,7 +90,7 @@ static void error_from_tmpfile (const char *what, const char *tmpfile); * we can be assured of exclusive access to handle here. */ int -do_scripts (struct handle *h) +do_scripts (struct curl_handle *ch) { time_t now; struct curl_slist *p; @@ -108,7 +108,7 @@ do_scripts (struct handle *h) time (&now); if (!header_script_has_run || (header_script_renew > 0 && now - header_last >= header_script_renew)) { - if (run_header_script (h) == -1) + if (run_header_script (ch) == -1) return -1; header_last = now; header_script_has_run = true; @@ -120,7 +120,7 @@ do_scripts (struct handle *h) time (&now); if (!cookie_script_has_run || (cookie_script_renew > 0 && now - cookie_last >= cookie_script_renew)) { - if (run_cookie_script (h) == -1) + if (run_cookie_script (ch) == -1) return -1; cookie_last = now; cookie_script_has_run = true; @@ -133,28 +133,28 @@ do_scripts (struct handle *h) * because unfortunately curl doesn't take a copy. Since we don't * know which other threads might be using it, we must make a copy * of the global list (headers_from_script) per handle - * (h->headers_copy). For CURLOPT_COOKIE, curl internally takes a + * (ch->headers_copy). For CURLOPT_COOKIE, curl internally takes a * copy so we don't need to do this. */ - if (h->headers_copy) { - curl_easy_setopt (h->c, CURLOPT_HTTPHEADER, NULL); - curl_slist_free_all (h->headers_copy); - h->headers_copy = NULL; + if (ch->headers_copy) { + curl_easy_setopt (ch->c, CURLOPT_HTTPHEADER, NULL); + curl_slist_free_all (ch->headers_copy); + ch->headers_copy = NULL; } for (p = headers_from_script; p != NULL; p = p->next) { if (curl_debug_scripts) nbdkit_debug ("header-script: setting header %s", p->data); - h->headers_copy = curl_slist_append (h->headers_copy, p->data); - if (h->headers_copy == NULL) { + ch->headers_copy = curl_slist_append (ch->headers_copy, p->data); + if (ch->headers_copy == NULL) { nbdkit_error ("curl_slist_append: %m"); return -1; } } - curl_easy_setopt (h->c, CURLOPT_HTTPHEADER, h->headers_copy); + curl_easy_setopt (ch->c, CURLOPT_HTTPHEADER, ch->headers_copy); if (curl_debug_scripts && cookies_from_script) nbdkit_debug ("cookie-script: setting cookie %s", cookies_from_script); - curl_easy_setopt (h->c, CURLOPT_COOKIE, cookies_from_script); + curl_easy_setopt (ch->c, CURLOPT_COOKIE, cookies_from_script); return 0; } @@ -163,7 +163,7 @@ do_scripts (struct handle *h) * header-script. */ static int -run_header_script (struct handle *h) +run_header_script (struct curl_handle *ch) { int fd; char tmpfile[] = "/tmp/errorsXXXXXX"; @@ -243,7 +243,7 @@ run_header_script (struct handle *h) * cookie-script. */ static int -run_cookie_script (struct handle *h) +run_cookie_script (struct curl_handle *ch) { int fd; char tmpfile[] = "/tmp/errorsXXXXXX"; -- 2.39.0
Richard W.M. Jones
2023-Feb-07 16:30 UTC
[Libguestfs] [PATCH nbdkit v2 2/4] curl: Introduce concept of getting/putting handles from a common pool
Introduce the concept of a pool of curl handles from which the main code has to take a handle (get_handle()), and later return it to the pool (put_handle()). When a handle is taken from the pool it can be used exclusively by the main code until it is returned. The "pool" only has at most one handle, so this is actually a pessimization from the situation before because all NBD connections share a single curl handle. In a future commit we will add support for multiple handles in the pool. A note about the change to tests/test-retry-request-mirror.c: This test depended implicitly on a new NBD connection opening a new libcurl handle (this no longer happens), and when opening a new libcurl handle that causes a HEAD request to be made. We have to adjust the test to remove this assumption. --- plugins/curl/Makefile.am | 5 +- plugins/curl/curldefs.h | 5 + plugins/curl/curl.c | 434 +++----------------------- plugins/curl/pool.c | 486 ++++++++++++++++++++++++++++++ plugins/curl/scripts.c | 4 +- tests/test-retry-request-mirror.c | 9 - 6 files changed, 540 insertions(+), 403 deletions(-) diff --git a/plugins/curl/Makefile.am b/plugins/curl/Makefile.am index f08923e77..d6b97a821 100644 --- a/plugins/curl/Makefile.am +++ b/plugins/curl/Makefile.am @@ -1,5 +1,5 @@ # nbdkit -# Copyright (C) 2014-2022 Red Hat Inc. +# Copyright (C) 2014-2023 Red Hat Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are @@ -39,8 +39,9 @@ plugin_LTLIBRARIES = nbdkit-curl-plugin.la nbdkit_curl_plugin_la_SOURCES = \ curldefs.h \ - scripts.c \ curl.c \ + pool.c \ + scripts.c \ $(top_srcdir)/include/nbdkit-plugin.h \ $(NULL) diff --git a/plugins/curl/curldefs.h b/plugins/curl/curldefs.h index 05b710ab2..533c28c52 100644 --- a/plugins/curl/curldefs.h +++ b/plugins/curl/curldefs.h @@ -112,6 +112,11 @@ struct curl_handle { struct curl_slist *headers_copy; }; +/* pool.c */ +extern struct curl_handle *get_handle (void); +extern void put_handle (struct curl_handle *ch); +extern void free_all_handles (void); + /* scripts.c */ extern int do_scripts (struct curl_handle *ch); extern void scripts_unload (void); diff --git a/plugins/curl/curl.c b/plugins/curl/curl.c index d90438e30..105de29c6 100644 --- a/plugins/curl/curl.c +++ b/plugins/curl/curl.c @@ -48,8 +48,6 @@ #include <nbdkit-plugin.h> -#include "ascii-ctype.h" -#include "ascii-string.h" #include "cleanup.h" #include "curldefs.h" @@ -112,6 +110,7 @@ curl_unload (void) free (password); free (proxy_password); scripts_unload (); + free_all_handles (); curl_global_cleanup (); } @@ -433,23 +432,11 @@ curl_config_complete (void) curl_easy_strerror ((r)), (ch)->errbuf); \ } while (0) -static int debug_cb (CURL *handle, curl_infotype type, - const char *data, size_t size, void *); -static size_t header_cb (void *ptr, size_t size, size_t nmemb, void *opaque); -static size_t write_cb (char *ptr, size_t size, size_t nmemb, void *opaque); -static size_t read_cb (void *ptr, size_t size, size_t nmemb, void *opaque); - /* Create the per-connection handle. */ static void * curl_open (int readonly) { struct handle *h; - CURLcode r; -#ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T - curl_off_t o; -#else - double d; -#endif h = calloc (1, sizeof *h); if (h == NULL) { @@ -458,310 +445,7 @@ curl_open (int readonly) } h->readonly = readonly; - h->ch = calloc (1, sizeof *h->ch); - if (h->ch == NULL) { - nbdkit_error ("calloc: %m"); - free (h); - return NULL; - } - - h->ch->c = curl_easy_init (); - if (h->ch->c == NULL) { - nbdkit_error ("curl_easy_init: failed: %m"); - goto err; - } - - if (curl_debug_verbose) { - /* NB: Constants must be explicitly long because the parameter is - * varargs. - */ - curl_easy_setopt (h->ch->c, CURLOPT_VERBOSE, 1L); - curl_easy_setopt (h->ch->c, CURLOPT_DEBUGFUNCTION, debug_cb); - } - - curl_easy_setopt (h->ch->c, CURLOPT_ERRORBUFFER, h->ch->errbuf); - - r = CURLE_OK; - if (unix_socket_path) { -#if HAVE_CURLOPT_UNIX_SOCKET_PATH - r = curl_easy_setopt (h->ch->c, CURLOPT_UNIX_SOCKET_PATH, unix_socket_path); -#else - r = CURLE_UNKNOWN_OPTION; -#endif - } - if (r != CURLE_OK) { - display_curl_error (h->ch, r, "curl_easy_setopt: CURLOPT_UNIX_SOCKET_PATH"); - goto err; - } - - /* Set the URL. */ - r = curl_easy_setopt (h->ch->c, CURLOPT_URL, url); - if (r != CURLE_OK) { - display_curl_error (h->ch, r, "curl_easy_setopt: CURLOPT_URL [%s]", url); - goto err; - } - - /* Various options we always set. - * - * NB: Both here and below constants must be explicitly long because - * the parameter is varargs. - * - * For use of CURLOPT_NOSIGNAL see: - * https://curl.se/libcurl/c/CURLOPT_NOSIGNAL.html - */ - curl_easy_setopt (h->ch->c, CURLOPT_NOSIGNAL, 1L); - curl_easy_setopt (h->ch->c, CURLOPT_AUTOREFERER, 1L); - if (followlocation) - curl_easy_setopt (h->ch->c, CURLOPT_FOLLOWLOCATION, 1L); - curl_easy_setopt (h->ch->c, CURLOPT_FAILONERROR, 1L); - - /* Options. */ - if (cainfo) { - if (strlen (cainfo) == 0) - curl_easy_setopt (h->ch->c, CURLOPT_CAINFO, NULL); - else - curl_easy_setopt (h->ch->c, CURLOPT_CAINFO, cainfo); - } - if (capath) - curl_easy_setopt (h->ch->c, CURLOPT_CAPATH, capath); - if (cookie) - curl_easy_setopt (h->ch->c, CURLOPT_COOKIE, cookie); - if (cookiefile) - curl_easy_setopt (h->ch->c, CURLOPT_COOKIEFILE, cookiefile); - if (cookiejar) - curl_easy_setopt (h->ch->c, CURLOPT_COOKIEJAR, cookiejar); - if (headers) - curl_easy_setopt (h->ch->c, CURLOPT_HTTPHEADER, headers); - if (password) - curl_easy_setopt (h->ch->c, CURLOPT_PASSWORD, password); -#ifndef HAVE_CURLOPT_PROTOCOLS_STR - if (protocols != CURLPROTO_ALL) { - curl_easy_setopt (h->ch->c, CURLOPT_PROTOCOLS, protocols); - curl_easy_setopt (h->ch->c, CURLOPT_REDIR_PROTOCOLS, protocols); - } -#else /* HAVE_CURLOPT_PROTOCOLS_STR (new in 7.85.0) */ - if (protocols) { - curl_easy_setopt (h->ch->c, CURLOPT_PROTOCOLS_STR, protocols); - curl_easy_setopt (h->ch->c, CURLOPT_REDIR_PROTOCOLS_STR, protocols); - } -#endif /* HAVE_CURLOPT_PROTOCOLS_STR */ - if (proxy) - curl_easy_setopt (h->ch->c, CURLOPT_PROXY, proxy); - if (proxy_password) - curl_easy_setopt (h->ch->c, CURLOPT_PROXYPASSWORD, proxy_password); - if (proxy_user) - curl_easy_setopt (h->ch->c, CURLOPT_PROXYUSERNAME, proxy_user); - if (!sslverify) { - curl_easy_setopt (h->ch->c, CURLOPT_SSL_VERIFYPEER, 0L); - curl_easy_setopt (h->ch->c, CURLOPT_SSL_VERIFYHOST, 0L); - } - if (ssl_version) { - if (strcmp (ssl_version, "tlsv1") == 0) - curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1); - else if (strcmp (ssl_version, "sslv2") == 0) - curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_SSLv2); - else if (strcmp (ssl_version, "sslv3") == 0) - curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_SSLv3); - else if (strcmp (ssl_version, "tlsv1.0") == 0) - curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_0); - else if (strcmp (ssl_version, "tlsv1.1") == 0) - curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_1); - else if (strcmp (ssl_version, "tlsv1.2") == 0) - curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2); - else if (strcmp (ssl_version, "tlsv1.3") == 0) - curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_3); - else { - display_curl_error (h->ch, r, "curl_easy_setopt: CURLOPT_SSLVERSION [%s]", - ssl_version); - goto err; - } - - } - if (ssl_cipher_list) - curl_easy_setopt (h->ch->c, CURLOPT_SSL_CIPHER_LIST, ssl_cipher_list); - if (tls13_ciphers) { -#if (LIBCURL_VERSION_MAJOR > 7) || \ - (LIBCURL_VERSION_MAJOR == 7 && LIBCURL_VERSION_MINOR >= 61) - curl_easy_setopt (h->ch->c, CURLOPT_TLS13_CIPHERS, tls13_ciphers); -#else - /* This is not available before curl-7.61 */ - nbdkit_error ("tls13-ciphers is not supported in this build of " - "nbdkit-curl-plugin"); - goto err; -#endif - } - if (tcp_keepalive) - curl_easy_setopt (h->ch->c, CURLOPT_TCP_KEEPALIVE, 1L); - if (!tcp_nodelay) - curl_easy_setopt (h->ch->c, CURLOPT_TCP_NODELAY, 0L); - if (timeout > 0) - /* NB: The cast is required here because the parameter is varargs - * treated as long, and not type safe. - */ - curl_easy_setopt (h->ch->c, CURLOPT_TIMEOUT, (long) timeout); - if (user) - curl_easy_setopt (h->ch->c, CURLOPT_USERNAME, user); - if (user_agent) - curl_easy_setopt (h->ch->c, CURLOPT_USERAGENT, user_agent); - - /* Get the file size and also whether the remote HTTP server - * supports byte ranges. - * - * We must run the scripts if necessary and set headers in the - * handle. - */ - if (do_scripts (h->ch) == -1) goto err; - h->ch->accept_range = false; - curl_easy_setopt (h->ch->c, CURLOPT_NOBODY, 1L); /* No Body, not nobody! */ - curl_easy_setopt (h->ch->c, CURLOPT_HEADERFUNCTION, header_cb); - curl_easy_setopt (h->ch->c, CURLOPT_HEADERDATA, h->ch); - r = curl_easy_perform (h->ch->c); - if (r != CURLE_OK) { - display_curl_error (h->ch, r, - "problem doing HEAD request to fetch size of URL [%s]", - url); - goto err; - } - -#ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T - r = curl_easy_getinfo (h->ch->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &o); - if (r != CURLE_OK) { - display_curl_error (h->ch, r, - "could not get length of remote file [%s]", url); - goto err; - } - - if (o == -1) { - nbdkit_error ("could not get length of remote file [%s], " - "is the URL correct?", url); - goto err; - } - - h->ch->exportsize = o; -#else - r = curl_easy_getinfo (h->ch->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d); - if (r != CURLE_OK) { - display_curl_error (h->ch, r, - "could not get length of remote file [%s]", url); - goto err; - } - - if (d == -1) { - nbdkit_error ("could not get length of remote file [%s], " - "is the URL correct?", url); - goto err; - } - - h->ch->exportsize = d; -#endif - nbdkit_debug ("content length: %" PRIi64, h->ch->exportsize); - - if (ascii_strncasecmp (url, "http://", strlen ("http://")) == 0 || - ascii_strncasecmp (url, "https://", strlen ("https://")) == 0) { - if (!h->ch->accept_range) { - nbdkit_error ("server does not support 'range' (byte range) requests"); - goto err; - } - - nbdkit_debug ("accept range supported (for HTTP/HTTPS)"); - } - - /* Get set up for reading and writing. */ - curl_easy_setopt (h->ch->c, CURLOPT_HEADERFUNCTION, NULL); - curl_easy_setopt (h->ch->c, CURLOPT_HEADERDATA, NULL); - curl_easy_setopt (h->ch->c, CURLOPT_WRITEFUNCTION, write_cb); - curl_easy_setopt (h->ch->c, CURLOPT_WRITEDATA, h->ch); - if (!readonly) { - curl_easy_setopt (h->ch->c, CURLOPT_READFUNCTION, read_cb); - curl_easy_setopt (h->ch->c, CURLOPT_READDATA, h->ch); - } - return h; - - err: - if (h->ch->c) - curl_easy_cleanup (h->ch->c); - free (h->ch); - free (h); - return NULL; -} - -/* When using CURLOPT_VERBOSE, this callback is used to redirect - * messages to nbdkit_debug (instead of stderr). - */ -static int -debug_cb (CURL *handle, curl_infotype type, - const char *data, size_t size, void *opaque) -{ - size_t origsize = size; - CLEANUP_FREE char *str; - - /* The data parameter passed is NOT \0-terminated, but also it may - * have \n or \r\n line endings. The only sane way to deal with - * this is to copy the string. (The data strings may also be - * multi-line, but we don't deal with that here). - */ - str = malloc (size + 1); - if (str == NULL) - goto out; - memcpy (str, data, size); - str[size] = '\0'; - - while (size > 0 && (str[size-1] == '\n' || str[size-1] == '\r')) { - str[size-1] = '\0'; - size--; - } - - switch (type) { - case CURLINFO_TEXT: - nbdkit_debug ("%s", str); - break; - case CURLINFO_HEADER_IN: - nbdkit_debug ("S: %s", str); - break; - case CURLINFO_HEADER_OUT: - nbdkit_debug ("C: %s", str); - break; - default: - /* Assume everything else is binary data that we cannot print. */ - nbdkit_debug ("<data with size=%zu>", origsize); - } - - out: - return 0; -} - -static size_t -header_cb (void *ptr, size_t size, size_t nmemb, void *opaque) -{ - struct curl_handle *ch = opaque; - size_t realsize = size * nmemb; - const char *header = ptr; - const char *end = header + realsize; - const char *accept_ranges = "accept-ranges:"; - const char *bytes = "bytes"; - - if (realsize >= strlen (accept_ranges) && - ascii_strncasecmp (header, accept_ranges, strlen (accept_ranges)) == 0) { - const char *p = strchr (header, ':') + 1; - - /* Skip whitespace between the header name and value. */ - while (p < end && *p && ascii_isspace (*p)) - p++; - - if (end - p >= strlen (bytes) - && strncmp (p, bytes, strlen (bytes)) == 0) { - /* Check that there is nothing but whitespace after the value. */ - p += strlen (bytes); - while (p < end && *p && ascii_isspace (*p)) - p++; - - if (p == end || !*p) - ch->accept_range = true; - } - } - - return realsize; } /* Free up the per-connection handle. */ @@ -770,22 +454,35 @@ curl_close (void *handle) { struct handle *h = handle; - curl_easy_cleanup (h->ch->c); - if (h->ch->headers_copy) - curl_slist_free_all (h->ch->headers_copy); - free (h->ch); free (h); } #define THREAD_MODEL NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS +/* Calls get_handle() ... put_handle() to get a handle for the length + * of the current scope. + */ +#define GET_HANDLE_FOR_CURRENT_SCOPE(ch) \ + CLEANUP_PUT_HANDLE struct curl_handle *ch = get_handle (); +#define CLEANUP_PUT_HANDLE __attribute__((cleanup (cleanup_put_handle))) +static void +cleanup_put_handle (void *chp) +{ + struct curl_handle *ch = * (struct curl_handle **) chp; + + if (ch != NULL) + put_handle (ch); +} + /* Get the file size. */ static int64_t curl_get_size (void *handle) { - struct handle *h = handle; + GET_HANDLE_FOR_CURRENT_SCOPE (ch); + if (ch == NULL) + return -1; - return h->ch->exportsize; + return ch->exportsize; } /* Multi-conn is safe for read-only connections, but HTTP does not @@ -800,42 +497,37 @@ curl_can_multi_conn (void *handle) return !! h->readonly; } -/* NB: The terminology used by libcurl is confusing! - * - * WRITEFUNCTION / write_cb is used when reading from the remote server - * READFUNCTION / read_cb is used when writing to the remote server. - * - * We use the same terminology as libcurl here. - */ - /* Read data from the remote server. */ static int curl_pread (void *handle, void *buf, uint32_t count, uint64_t offset) { - struct handle *h = handle; CURLcode r; char range[128]; + GET_HANDLE_FOR_CURRENT_SCOPE (ch); + if (ch == NULL) + return -1; + /* Run the scripts if necessary and set headers in the handle. */ - if (do_scripts (h->ch) == -1) return -1; + if (do_scripts (ch) == -1) return -1; /* Tell the write_cb where we want the data to be written. write_cb * will update this if the data comes in multiple sections. */ - h->ch->write_buf = buf; - h->ch->write_count = count; + ch->write_buf = buf; + ch->write_count = count; - curl_easy_setopt (h->ch->c, CURLOPT_HTTPGET, 1L); + curl_easy_setopt (ch->c, CURLOPT_HTTPGET, 1L); /* Make an HTTP range request. */ snprintf (range, sizeof range, "%" PRIu64 "-%" PRIu64, offset, offset + count); - curl_easy_setopt (h->ch->c, CURLOPT_RANGE, range); + curl_easy_setopt (ch->c, CURLOPT_RANGE, range); /* The assumption here is that curl will look after timeouts. */ - r = curl_easy_perform (h->ch->c); + r = curl_easy_perform (ch->c); if (r != CURLE_OK) { - display_curl_error (h->ch, r, "pread: curl_easy_perform"); + display_curl_error (ch, r, "pread: curl_easy_perform"); return -1; } @@ -844,62 +536,42 @@ curl_pread (void *handle, void *buf, uint32_t count, uint64_t offset) */ /* As far as I understand the cURL API, this should never happen. */ - assert (h->ch->write_count == 0); + assert (ch->write_count == 0); return 0; } -static size_t -write_cb (char *ptr, size_t size, size_t nmemb, void *opaque) -{ - struct curl_handle *ch = opaque; - size_t orig_realsize = size * nmemb; - size_t realsize = orig_realsize; - - assert (ch->write_buf); - - /* Don't read more than the requested amount of data, even if the - * server or libcurl sends more. - */ - if (realsize > ch->write_count) - realsize = ch->write_count; - - memcpy (ch->write_buf, ptr, realsize); - - ch->write_count -= realsize; - ch->write_buf += realsize; - - return orig_realsize; -} - /* Write data to the remote server. */ static int curl_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset) { - struct handle *h = handle; CURLcode r; char range[128]; + GET_HANDLE_FOR_CURRENT_SCOPE (ch); + if (ch == NULL) + return -1; + /* Run the scripts if necessary and set headers in the handle. */ - if (do_scripts (h->ch) == -1) return -1; + if (do_scripts (ch) == -1) return -1; /* Tell the read_cb where we want the data to be read from. read_cb * will update this if the data comes in multiple sections. */ - h->ch->read_buf = buf; - h->ch->read_count = count; + ch->read_buf = buf; + ch->read_count = count; - curl_easy_setopt (h->ch->c, CURLOPT_UPLOAD, 1L); + curl_easy_setopt (ch->c, CURLOPT_UPLOAD, 1L); /* Make an HTTP range request. */ snprintf (range, sizeof range, "%" PRIu64 "-%" PRIu64, offset, offset + count); - curl_easy_setopt (h->ch->c, CURLOPT_RANGE, range); + curl_easy_setopt (ch->c, CURLOPT_RANGE, range); /* The assumption here is that curl will look after timeouts. */ - r = curl_easy_perform (h->ch->c); + r = curl_easy_perform (ch->c); if (r != CURLE_OK) { - display_curl_error (h->ch, r, "pwrite: curl_easy_perform"); + display_curl_error (ch, r, "pwrite: curl_easy_perform"); return -1; } @@ -908,29 +580,11 @@ curl_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset) */ /* As far as I understand the cURL API, this should never happen. */ - assert (h->ch->read_count == 0); + assert (ch->read_count == 0); return 0; } -static size_t -read_cb (void *ptr, size_t size, size_t nmemb, void *opaque) -{ - struct curl_handle *ch = opaque; - size_t realsize = size * nmemb; - - assert (ch->read_buf); - if (realsize > ch->read_count) - realsize = ch->read_count; - - memcpy (ptr, ch->read_buf, realsize); - - ch->read_count -= realsize; - ch->read_buf += realsize; - - return realsize; -} - static struct nbdkit_plugin plugin = { .name = "curl", .version = PACKAGE_VERSION, diff --git a/plugins/curl/pool.c b/plugins/curl/pool.c new file mode 100644 index 000000000..00a7d381b --- /dev/null +++ b/plugins/curl/pool.c @@ -0,0 +1,486 @@ +/* nbdkit + * Copyright (C) 2014-2023 Red Hat Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of Red Hat nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* Curl handle pool. + * + * To get a libcurl handle, call get_handle(). When you hold the + * handle, it is yours exclusively to use. After you have finished + * with the handle, put it back into the pool by calling put_handle(). + */ + +#include <config.h> + +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <stdint.h> +#include <inttypes.h> +#include <string.h> +#include <assert.h> +#include <pthread.h> + +#include <curl/curl.h> + +#include <nbdkit-plugin.h> + +#include "ascii-ctype.h" +#include "ascii-string.h" +#include "cleanup.h" + +#include "curldefs.h" + +/* Translate CURLcode to nbdkit_error. */ +#define display_curl_error(ch, r, fs, ...) \ + do { \ + nbdkit_error ((fs ": %s: %s"), ## __VA_ARGS__, \ + curl_easy_strerror ((r)), (ch)->errbuf); \ + } while (0) + +static struct curl_handle *allocate_handle (void); +static int debug_cb (CURL *handle, curl_infotype type, + const char *data, size_t size, void *); +static size_t header_cb (void *ptr, size_t size, size_t nmemb, void *opaque); +static size_t write_cb (char *ptr, size_t size, size_t nmemb, void *opaque); +static size_t read_cb (void *ptr, size_t size, size_t nmemb, void *opaque); + +/* In the current implementation there is only one handle. This lock + * prevents it from being used multiple times. + */ +static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; + +/* The single curl handle. NULL means not yet allocated. */ +static struct curl_handle *the_ch; + +/* Close and free all handles in the pool. */ +void +free_all_handles (void) +{ + if (the_ch) { + curl_easy_cleanup (the_ch->c); + if (the_ch->headers_copy) + curl_slist_free_all (the_ch->headers_copy); + free (the_ch); + } +} + +/* Get a handle from the pool. + * + * It is owned exclusively by the caller until they call put_handle. + */ +struct curl_handle * +get_handle (void) +{ + int r; + + r = pthread_mutex_lock (&lock); + assert (r == 0); + if (!the_ch) { + the_ch = allocate_handle (); + if (!the_ch) { + pthread_mutex_unlock (&lock); + return NULL; + } + } + return the_ch; +} + +/* Return the handle to the pool. */ +void +put_handle (struct curl_handle *ch) +{ + pthread_mutex_unlock (&lock); +} + +/* Allocate and initialize a new libcurl handle. */ +static struct curl_handle * +allocate_handle (void) +{ + struct curl_handle *ch; + CURLcode r; +#ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T + curl_off_t o; +#else + double d; +#endif + + ch = calloc (1, sizeof *ch); + if (ch == NULL) { + nbdkit_error ("calloc: %m"); + free (ch); + return NULL; + } + + ch->c = curl_easy_init (); + if (ch->c == NULL) { + nbdkit_error ("curl_easy_init: failed: %m"); + goto err; + } + + if (curl_debug_verbose) { + /* NB: Constants must be explicitly long because the parameter is + * varargs. + */ + curl_easy_setopt (ch->c, CURLOPT_VERBOSE, 1L); + curl_easy_setopt (ch->c, CURLOPT_DEBUGFUNCTION, debug_cb); + } + + curl_easy_setopt (ch->c, CURLOPT_ERRORBUFFER, ch->errbuf); + + r = CURLE_OK; + if (unix_socket_path) { +#if HAVE_CURLOPT_UNIX_SOCKET_PATH + r = curl_easy_setopt (ch->c, CURLOPT_UNIX_SOCKET_PATH, unix_socket_path); +#else + r = CURLE_UNKNOWN_OPTION; +#endif + } + if (r != CURLE_OK) { + display_curl_error (ch, r, "curl_easy_setopt: CURLOPT_UNIX_SOCKET_PATH"); + goto err; + } + + /* Set the URL. */ + r = curl_easy_setopt (ch->c, CURLOPT_URL, url); + if (r != CURLE_OK) { + display_curl_error (ch, r, "curl_easy_setopt: CURLOPT_URL [%s]", url); + goto err; + } + + /* Various options we always set. + * + * NB: Both here and below constants must be explicitly long because + * the parameter is varargs. + * + * For use of CURLOPT_NOSIGNAL see: + * https://curl.se/libcurl/c/CURLOPT_NOSIGNAL.html + */ + curl_easy_setopt (ch->c, CURLOPT_NOSIGNAL, 1L); + curl_easy_setopt (ch->c, CURLOPT_AUTOREFERER, 1L); + if (followlocation) + curl_easy_setopt (ch->c, CURLOPT_FOLLOWLOCATION, 1L); + curl_easy_setopt (ch->c, CURLOPT_FAILONERROR, 1L); + + /* Options. */ + if (cainfo) { + if (strlen (cainfo) == 0) + curl_easy_setopt (ch->c, CURLOPT_CAINFO, NULL); + else + curl_easy_setopt (ch->c, CURLOPT_CAINFO, cainfo); + } + if (capath) + curl_easy_setopt (ch->c, CURLOPT_CAPATH, capath); + if (cookie) + curl_easy_setopt (ch->c, CURLOPT_COOKIE, cookie); + if (cookiefile) + curl_easy_setopt (ch->c, CURLOPT_COOKIEFILE, cookiefile); + if (cookiejar) + curl_easy_setopt (ch->c, CURLOPT_COOKIEJAR, cookiejar); + if (headers) + curl_easy_setopt (ch->c, CURLOPT_HTTPHEADER, headers); + if (password) + curl_easy_setopt (ch->c, CURLOPT_PASSWORD, password); +#ifndef HAVE_CURLOPT_PROTOCOLS_STR + if (protocols != CURLPROTO_ALL) { + curl_easy_setopt (ch->c, CURLOPT_PROTOCOLS, protocols); + curl_easy_setopt (ch->c, CURLOPT_REDIR_PROTOCOLS, protocols); + } +#else /* HAVE_CURLOPT_PROTOCOLS_STR (new in 7.85.0) */ + if (protocols) { + curl_easy_setopt (ch->c, CURLOPT_PROTOCOLS_STR, protocols); + curl_easy_setopt (ch->c, CURLOPT_REDIR_PROTOCOLS_STR, protocols); + } +#endif /* HAVE_CURLOPT_PROTOCOLS_STR */ + if (proxy) + curl_easy_setopt (ch->c, CURLOPT_PROXY, proxy); + if (proxy_password) + curl_easy_setopt (ch->c, CURLOPT_PROXYPASSWORD, proxy_password); + if (proxy_user) + curl_easy_setopt (ch->c, CURLOPT_PROXYUSERNAME, proxy_user); + if (!sslverify) { + curl_easy_setopt (ch->c, CURLOPT_SSL_VERIFYPEER, 0L); + curl_easy_setopt (ch->c, CURLOPT_SSL_VERIFYHOST, 0L); + } + if (ssl_version) { + if (strcmp (ssl_version, "tlsv1") == 0) + curl_easy_setopt (ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1); + else if (strcmp (ssl_version, "sslv2") == 0) + curl_easy_setopt (ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_SSLv2); + else if (strcmp (ssl_version, "sslv3") == 0) + curl_easy_setopt (ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_SSLv3); + else if (strcmp (ssl_version, "tlsv1.0") == 0) + curl_easy_setopt (ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_0); + else if (strcmp (ssl_version, "tlsv1.1") == 0) + curl_easy_setopt (ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_1); + else if (strcmp (ssl_version, "tlsv1.2") == 0) + curl_easy_setopt (ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2); + else if (strcmp (ssl_version, "tlsv1.3") == 0) + curl_easy_setopt (ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_3); + else { + display_curl_error (ch, r, "curl_easy_setopt: CURLOPT_SSLVERSION [%s]", + ssl_version); + goto err; + } + + } + if (ssl_cipher_list) + curl_easy_setopt (ch->c, CURLOPT_SSL_CIPHER_LIST, ssl_cipher_list); + if (tls13_ciphers) { +#if (LIBCURL_VERSION_MAJOR > 7) || \ + (LIBCURL_VERSION_MAJOR == 7 && LIBCURL_VERSION_MINOR >= 61) + curl_easy_setopt (ch->c, CURLOPT_TLS13_CIPHERS, tls13_ciphers); +#else + /* This is not available before curl-7.61 */ + nbdkit_error ("tls13-ciphers is not supported in this build of " + "nbdkit-curl-plugin"); + goto err; +#endif + } + if (tcp_keepalive) + curl_easy_setopt (ch->c, CURLOPT_TCP_KEEPALIVE, 1L); + if (!tcp_nodelay) + curl_easy_setopt (ch->c, CURLOPT_TCP_NODELAY, 0L); + if (timeout > 0) + /* NB: The cast is required here because the parameter is varargs + * treated as long, and not type safe. + */ + curl_easy_setopt (ch->c, CURLOPT_TIMEOUT, (long) timeout); + if (user) + curl_easy_setopt (ch->c, CURLOPT_USERNAME, user); + if (user_agent) + curl_easy_setopt (ch->c, CURLOPT_USERAGENT, user_agent); + + /* Get the file size and also whether the remote HTTP server + * supports byte ranges. + * + * We must run the scripts if necessary and set headers in the + * handle. + */ + if (do_scripts (ch) == -1) goto err; + ch->accept_range = false; + curl_easy_setopt (ch->c, CURLOPT_NOBODY, 1L); /* No Body, not nobody! */ + curl_easy_setopt (ch->c, CURLOPT_HEADERFUNCTION, header_cb); + curl_easy_setopt (ch->c, CURLOPT_HEADERDATA, ch); + r = curl_easy_perform (ch->c); + if (r != CURLE_OK) { + display_curl_error (ch, r, + "problem doing HEAD request to fetch size of URL [%s]", + url); + goto err; + } + +#ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T + r = curl_easy_getinfo (ch->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &o); + if (r != CURLE_OK) { + display_curl_error (ch, r, + "could not get length of remote file [%s]", url); + goto err; + } + + if (o == -1) { + nbdkit_error ("could not get length of remote file [%s], " + "is the URL correct?", url); + goto err; + } + + ch->exportsize = o; +#else + r = curl_easy_getinfo (ch->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d); + if (r != CURLE_OK) { + display_curl_error (ch, r, + "could not get length of remote file [%s]", url); + goto err; + } + + if (d == -1) { + nbdkit_error ("could not get length of remote file [%s], " + "is the URL correct?", url); + goto err; + } + + ch->exportsize = d; +#endif + nbdkit_debug ("content length: %" PRIi64, ch->exportsize); + + if (ascii_strncasecmp (url, "http://", strlen ("http://")) == 0 || + ascii_strncasecmp (url, "https://", strlen ("https://")) == 0) { + if (!ch->accept_range) { + nbdkit_error ("server does not support 'range' (byte range) requests"); + goto err; + } + + nbdkit_debug ("accept range supported (for HTTP/HTTPS)"); + } + + /* Get set up for reading and writing. */ + curl_easy_setopt (ch->c, CURLOPT_HEADERFUNCTION, NULL); + curl_easy_setopt (ch->c, CURLOPT_HEADERDATA, NULL); + curl_easy_setopt (ch->c, CURLOPT_WRITEFUNCTION, write_cb); + curl_easy_setopt (ch->c, CURLOPT_WRITEDATA, ch); + /* These are only used if !readonly but we always register them. */ + curl_easy_setopt (ch->c, CURLOPT_READFUNCTION, read_cb); + curl_easy_setopt (ch->c, CURLOPT_READDATA, ch); + + return ch; + + err: + if (ch->c) + curl_easy_cleanup (ch->c); + free (ch); + return NULL; +} + +/* When using CURLOPT_VERBOSE, this callback is used to redirect + * messages to nbdkit_debug (instead of stderr). + */ +static int +debug_cb (CURL *handle, curl_infotype type, + const char *data, size_t size, void *opaque) +{ + size_t origsize = size; + CLEANUP_FREE char *str; + + /* The data parameter passed is NOT \0-terminated, but also it may + * have \n or \r\n line endings. The only sane way to deal with + * this is to copy the string. (The data strings may also be + * multi-line, but we don't deal with that here). + */ + str = malloc (size + 1); + if (str == NULL) + goto out; + memcpy (str, data, size); + str[size] = '\0'; + + while (size > 0 && (str[size-1] == '\n' || str[size-1] == '\r')) { + str[size-1] = '\0'; + size--; + } + + switch (type) { + case CURLINFO_TEXT: + nbdkit_debug ("%s", str); + break; + case CURLINFO_HEADER_IN: + nbdkit_debug ("S: %s", str); + break; + case CURLINFO_HEADER_OUT: + nbdkit_debug ("C: %s", str); + break; + default: + /* Assume everything else is binary data that we cannot print. */ + nbdkit_debug ("<data with size=%zu>", origsize); + } + + out: + return 0; +} + +static size_t +header_cb (void *ptr, size_t size, size_t nmemb, void *opaque) +{ + struct curl_handle *ch = opaque; + size_t realsize = size * nmemb; + const char *header = ptr; + const char *end = header + realsize; + const char *accept_ranges = "accept-ranges:"; + const char *bytes = "bytes"; + + if (realsize >= strlen (accept_ranges) && + ascii_strncasecmp (header, accept_ranges, strlen (accept_ranges)) == 0) { + const char *p = strchr (header, ':') + 1; + + /* Skip whitespace between the header name and value. */ + while (p < end && *p && ascii_isspace (*p)) + p++; + + if (end - p >= strlen (bytes) + && strncmp (p, bytes, strlen (bytes)) == 0) { + /* Check that there is nothing but whitespace after the value. */ + p += strlen (bytes); + while (p < end && *p && ascii_isspace (*p)) + p++; + + if (p == end || !*p) + ch->accept_range = true; + } + } + + return realsize; +} + +/* NB: The terminology used by libcurl is confusing! + * + * WRITEFUNCTION / write_cb is used when reading from the remote server + * READFUNCTION / read_cb is used when writing to the remote server. + * + * We use the same terminology as libcurl here. + */ + +static size_t +write_cb (char *ptr, size_t size, size_t nmemb, void *opaque) +{ + struct curl_handle *ch = opaque; + size_t orig_realsize = size * nmemb; + size_t realsize = orig_realsize; + + assert (ch->write_buf); + + /* Don't read more than the requested amount of data, even if the + * server or libcurl sends more. + */ + if (realsize > ch->write_count) + realsize = ch->write_count; + + memcpy (ch->write_buf, ptr, realsize); + + ch->write_count -= realsize; + ch->write_buf += realsize; + + return orig_realsize; +} + +static size_t +read_cb (void *ptr, size_t size, size_t nmemb, void *opaque) +{ + struct curl_handle *ch = opaque; + size_t realsize = size * nmemb; + + assert (ch->read_buf); + if (realsize > ch->read_count) + realsize = ch->read_count; + + memcpy (ptr, ch->read_buf, realsize); + + ch->read_count -= realsize; + ch->read_buf += realsize; + + return realsize; +} diff --git a/plugins/curl/scripts.c b/plugins/curl/scripts.c index 9b674fc83..786ff7503 100644 --- a/plugins/curl/scripts.c +++ b/plugins/curl/scripts.c @@ -86,8 +86,8 @@ static void error_from_tmpfile (const char *what, const char *tmpfile); /* This is called from any thread just before we make a curl request. * - * Because the thread model is NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS - * we can be assured of exclusive access to handle here. + * Because the curl handle must be obtained through get_handle() we + * can be assured of exclusive access here. */ int do_scripts (struct curl_handle *ch) diff --git a/tests/test-retry-request-mirror.c b/tests/test-retry-request-mirror.c index b1eb8af02..e7157184f 100644 --- a/tests/test-retry-request-mirror.c +++ b/tests/test-retry-request-mirror.c @@ -135,15 +135,6 @@ main (int argc, char *argv[]) } nbd_close (nbd); - - /* Reconnection in the next iteration of the loop will read from - * the mirror (because the curl plugin always makes a HEAD request - * in curl_open to read the size) which will flip the state, so we - * have to do it here. - */ - state++; - if (state == 3) - state = 1; } exit (EXIT_SUCCESS); -- 2.39.0
Richard W.M. Jones
2023-Feb-07 16:30 UTC
[Libguestfs] [PATCH nbdkit v2 3/4] curl: Complete implementation of the curl handle pool
This commit implements the curl handle pool. By default it can grow to up to 4 curl handles, shared between all NBD connections. You can change this using the new connections=N parameter. --- plugins/curl/nbdkit-curl-plugin.pod | 7 +++ plugins/curl/curldefs.h | 5 +- plugins/curl/curl.c | 11 ++++ plugins/curl/pool.c | 97 ++++++++++++++++++++++------- 4 files changed, 98 insertions(+), 22 deletions(-) diff --git a/plugins/curl/nbdkit-curl-plugin.pod b/plugins/curl/nbdkit-curl-plugin.pod index a396de027..f4cd07068 100644 --- a/plugins/curl/nbdkit-curl-plugin.pod +++ b/plugins/curl/nbdkit-curl-plugin.pod @@ -52,6 +52,13 @@ that libcurl is compiled with. Set CA certificates directory location for libcurl. See L<CURLOPT_CAPATH(3)> for more information. +=item B<connections=>N + +Open up to C<N> curl connections to the web server. The default is 4. +Curl connections are shared between all NBD clients, so you may wish +to increase this if you expect many simultaneous clients (or a single +client using many multi-conn connections). + =item B<cookie=>COOKIE =item B<cookie=+>FILENAME diff --git a/plugins/curl/curldefs.h b/plugins/curl/curldefs.h index 533c28c52..d3c616f0e 100644 --- a/plugins/curl/curldefs.h +++ b/plugins/curl/curldefs.h @@ -50,6 +50,7 @@ extern const char *url; extern const char *cainfo; extern const char *capath; +extern unsigned connections; extern char *cookie; extern const char *cookiefile; extern const char *cookiejar; @@ -84,7 +85,6 @@ extern int curl_debug_verbose; /* The per-connection handle. */ struct handle { int readonly; - struct curl_handle *ch; }; /* The libcurl handle and some associated fields and buffers. */ @@ -92,6 +92,9 @@ struct curl_handle { /* The underlying curl handle. */ CURL *c; + /* True if the handle is in use by a thread. */ + bool in_use; + /* These fields are used/initialized when we create the handle. */ bool accept_range; int64_t exportsize; diff --git a/plugins/curl/curl.c b/plugins/curl/curl.c index 105de29c6..4634c7006 100644 --- a/plugins/curl/curl.c +++ b/plugins/curl/curl.c @@ -57,6 +57,7 @@ const char *url = NULL; /* required */ const char *cainfo = NULL; const char *capath = NULL; +unsigned connections = 4; char *cookie = NULL; const char *cookiefile = NULL; const char *cookiejar = NULL; @@ -206,6 +207,16 @@ curl_config (const char *key, const char *value) capath = value; } + else if (strcmp (key, "connections") == 0) { + if (nbdkit_parse_unsigned ("connections", value, + &connections) == -1) + return -1; + if (connections == 0) { + nbdkit_error ("connections parameter must not be 0"); + return -1; + } + } + else if (strcmp (key, "cookie") == 0) { free (cookie); if (nbdkit_read_password (value, &cookie) == -1) diff --git a/plugins/curl/pool.c b/plugins/curl/pool.c index 00a7d381b..de02fea56 100644 --- a/plugins/curl/pool.c +++ b/plugins/curl/pool.c @@ -55,6 +55,7 @@ #include "ascii-ctype.h" #include "ascii-string.h" #include "cleanup.h" +#include "vector.h" #include "curldefs.h" @@ -66,30 +67,41 @@ } while (0) static struct curl_handle *allocate_handle (void); +static void free_handle (struct curl_handle *); static int debug_cb (CURL *handle, curl_infotype type, const char *data, size_t size, void *); static size_t header_cb (void *ptr, size_t size, size_t nmemb, void *opaque); static size_t write_cb (char *ptr, size_t size, size_t nmemb, void *opaque); static size_t read_cb (void *ptr, size_t size, size_t nmemb, void *opaque); -/* In the current implementation there is only one handle. This lock - * prevents it from being used multiple times. - */ +/* This lock protects access to the curl_handles vector below. */ static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; -/* The single curl handle. NULL means not yet allocated. */ -static struct curl_handle *the_ch; +/* List of curl handles. This is allocated dynamically as more + * handles are requested. Currently it does not shrink. It may grow + * up to 'connections' in length. + */ +DEFINE_VECTOR_TYPE(curl_handle_list, struct curl_handle *) +static curl_handle_list curl_handles = empty_vector; + +/* The condition is used when the curl handles vector is full and + * we're waiting for a thread to put_handle. + */ +static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; +static size_t in_use = 0, waiting = 0; /* Close and free all handles in the pool. */ void free_all_handles (void) { - if (the_ch) { - curl_easy_cleanup (the_ch->c); - if (the_ch->headers_copy) - curl_slist_free_all (the_ch->headers_copy); - free (the_ch); - } + size_t i; + + nbdkit_debug ("free_all_handles: number of curl handles allocated: %zu", + curl_handles.len); + + for (i = 0; i < curl_handles.len; ++i) + free_handle (curl_handles.ptr[i]); + curl_handle_list_reset (&curl_handles); } /* Get a handle from the pool. @@ -99,25 +111,59 @@ free_all_handles (void) struct curl_handle * get_handle (void) { - int r; - - r = pthread_mutex_lock (&lock); - assert (r == 0); - if (!the_ch) { - the_ch = allocate_handle (); - if (!the_ch) { - pthread_mutex_unlock (&lock); + ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock); + size_t i; + struct curl_handle *ch; + + again: + /* Look for a handle which is not in_use. */ + for (i = 0; i < curl_handles.len; ++i) { + ch = curl_handles.ptr[i]; + if (!ch->in_use) { + ch->in_use = true; + in_use++; + return ch; + } + } + + /* If more connections are allowed, then allocate a new handle. */ + if (curl_handles.len < connections) { + ch = allocate_handle (); + if (ch == NULL) + return NULL; + if (curl_handle_list_append (&curl_handles, ch) == -1) { + free_handle (ch); return NULL; } + ch->in_use = true; + in_use++; + return ch; } - return the_ch; + + /* Otherwise we have run out of connections so we must wait until + * another thread calls put_handle. + */ + assert (in_use == connections); + waiting++; + while (in_use == connections) + pthread_cond_wait (&cond, &lock); + waiting--; + + goto again; } /* Return the handle to the pool. */ void put_handle (struct curl_handle *ch) { - pthread_mutex_unlock (&lock); + ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock); + + ch->in_use = false; + in_use--; + + /* Signal the next thread which is waiting. */ + if (waiting > 0) + pthread_cond_signal (&cond); } /* Allocate and initialize a new libcurl handle. */ @@ -484,3 +530,12 @@ read_cb (void *ptr, size_t size, size_t nmemb, void *opaque) return realsize; } + +static void +free_handle (struct curl_handle *ch) +{ + curl_easy_cleanup (ch->c); + if (ch->headers_copy) + curl_slist_free_all (ch->headers_copy); + free (ch); +} -- 2.39.0
Richard W.M. Jones
2023-Feb-07 16:30 UTC
[Libguestfs] [PATCH nbdkit v2 4/4] curl: Use the parallel thread model
NOTE! THIS PESSIMIZES PERFORMANCE: https://listman.redhat.com/archives/libguestfs/2023-February/030618.html After previous changes, it is now safe to use the parallel thread model in this plugin. The locking in pool.c protects a single curl handle from being used from multiple threads. An advantage of this is we can now combine the curl plugin with filters such as readahead and scan. --- plugins/curl/curl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/curl/curl.c b/plugins/curl/curl.c index 4634c7006..e7c0e18da 100644 --- a/plugins/curl/curl.c +++ b/plugins/curl/curl.c @@ -468,7 +468,7 @@ curl_close (void *handle) free (h); } -#define THREAD_MODEL NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS +#define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL /* Calls get_handle() ... put_handle() to get a handle for the length * of the current scope. -- 2.39.0
Laszlo Ersek
2023-Feb-09 14:44 UTC
[Libguestfs] [PATCH nbdkit v2 0/4] curl: Use a curl handle pool
On 2/7/23 17:29, Richard W.M. Jones wrote:> NOTE! At least patch 4 should not be applied, and maybe the whole > series is a bust. I am mainly posting this on the list for discussion > and maybe to archive it. > > Version 1 was here: > https://listman.redhat.com/archives/libguestfs/2023-February/030610.html > > This patch series introduces the concept of a pool of libcurl handles, > instead of always associating one libcurl handle with one NBD > connection. This gives you a bit more flexibility, eg. you can have a > highly concurrent multi-conn NBD connection, but not overwhelm the > remote web server with HTTP connections. Or vice versa. > > Compared to the earlier version, I have pushed a couple of simple > patches from the old series upstream. The remaining patches are > tidied up a bit and better tested, but are largely the same. > > Patches 1-3 on their own are performance neutral in the cases I tested > where you have approximately the same number of NBD connections as web > server connections (as expected). > > Patch 4 kills performance, for reasons discussed here: > https://listman.redhat.com/archives/libguestfs/2023-February/030618.html > I was not able to fix this although I have tried several approaches.(patches 1 through 3 are now upstream: 4ff1021ed85a..6c6b8c225ad3)