Richard W.M. Jones
2023-Jul-28 17:17 UTC
[Libguestfs] [PATCH nbdkit v2 0/9] curl: Use curl multi interface
This is a slightly cleaner version of the patch previously posted here: https://listman.redhat.com/archives/libguestfs/2023-July/032167.html Patch #8 is still irreducibly complex, but I can't see an easy way around that. I spent more time refining the commands that get passed to the worker thread so that now there are only two (EASY_HANDLE and STOP), since all operations can be expressed in terms of just those two commands. Test & valgrind all pass, and there are no missing features. Rich.
Richard W.M. Jones
2023-Jul-28 17:17 UTC
[Libguestfs] [PATCH nbdkit v2 1/9] curl: Make times seconds field slightly wider
Updates: commit 68dddbeb584fb9385915846d259563f74338ffe8 --- plugins/curl/nbdkit-curl-plugin.pod | 14 +++++++------- plugins/curl/times.c | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/plugins/curl/nbdkit-curl-plugin.pod b/plugins/curl/nbdkit-curl-plugin.pod index 0fd688edd..0774adadc 100644 --- a/plugins/curl/nbdkit-curl-plugin.pod +++ b/plugins/curl/nbdkit-curl-plugin.pod @@ -551,13 +551,13 @@ information is printed in the debug output before nbdkit exits. The output will look like: nbdkit: debug: times (-D curl.times=1): - nbdkit: debug: name resolution : 0.128442 s - nbdkit: debug: connection : 4.945213 s - nbdkit: debug: SSL negotiation : 4.291362 s - nbdkit: debug: pretransfer : 0.104137 s - nbdkit: debug: first byte received : 56.115269 s - nbdkit: debug: data transfer : 222.633831 s - nbdkit: debug: redirection time : 0.000000 s + nbdkit: debug: name resolution : 0.128442 s + nbdkit: debug: connection : 4.945213 s + nbdkit: debug: SSL negotiation : 4.291362 s + nbdkit: debug: pretransfer : 0.104137 s + nbdkit: debug: first byte received : 56.115269 s + nbdkit: debug: data transfer : 222.633831 s + nbdkit: debug: redirection time : 0.000000 s The cumulative time taken to perform each step is shown (summed across all HTTP connections). The redirection time is the total time taken diff --git a/plugins/curl/times.c b/plugins/curl/times.c index 8cc4cf278..e752a0a9e 100644 --- a/plugins/curl/times.c +++ b/plugins/curl/times.c @@ -123,7 +123,7 @@ display_times (void) v = t; prev_t = t; - nbdkit_debug ("%-30s: %3" PRIi64 ".%06" PRIi64 " s", + nbdkit_debug ("%-30s: %4" PRIi64 ".%06" PRIi64 " s", times[i].name, v / 1000000, v % 1000000); } -- 2.41.0
Richard W.M. Jones
2023-Jul-28 17:17 UTC
[Libguestfs] [PATCH nbdkit v2 2/9] curl: Rename unload_config, unload_pool -> config_unload, pool_unload
For consistency with scripts_unload. There is still a function called load_pool, but that inconsistency will be removed in a follow-on commit. --- plugins/curl/curldefs.h | 4 ++-- plugins/curl/config.c | 2 +- plugins/curl/curl.c | 4 ++-- plugins/curl/pool.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/plugins/curl/curldefs.h b/plugins/curl/curldefs.h index 9169b256d..eb168c524 100644 --- a/plugins/curl/curldefs.h +++ b/plugins/curl/curldefs.h @@ -106,13 +106,13 @@ struct curl_handle { extern int curl_config (const char *key, const char *value); extern int curl_config_complete (void); extern const char *curl_config_help; -extern void unload_config (void); +extern void config_unload (void); extern struct curl_handle *allocate_handle (void); extern void free_handle (struct curl_handle *); /* pool.c */ extern void load_pool (void); -extern void unload_pool (void); +extern void pool_unload (void); extern struct curl_handle *get_handle (void); extern void put_handle (struct curl_handle *ch); diff --git a/plugins/curl/config.c b/plugins/curl/config.c index 742d60809..b6e02b85a 100644 --- a/plugins/curl/config.c +++ b/plugins/curl/config.c @@ -100,7 +100,7 @@ static size_t error_cb (char *ptr, size_t size, size_t nmemb, void *opaque); NBDKIT_DLL_PUBLIC int curl_debug_verbose = 0; void -unload_config (void) +config_unload (void) { free (cookie); if (headers) diff --git a/plugins/curl/curl.c b/plugins/curl/curl.c index 99a7e00b5..729710935 100644 --- a/plugins/curl/curl.c +++ b/plugins/curl/curl.c @@ -74,9 +74,9 @@ curl_load (void) static void curl_unload (void) { - unload_config (); + config_unload (); scripts_unload (); - unload_pool (); + pool_unload (); display_times (); curl_global_cleanup (); } diff --git a/plugins/curl/pool.c b/plugins/curl/pool.c index 91e56f070..50a623a4e 100644 --- a/plugins/curl/pool.c +++ b/plugins/curl/pool.c @@ -86,7 +86,7 @@ load_pool (void) /* Close and free all handles in the pool. */ void -unload_pool (void) +pool_unload (void) { size_t i; -- 2.41.0
Richard W.M. Jones
2023-Jul-28 17:17 UTC
[Libguestfs] [PATCH nbdkit v2 3/9] pool: Add outline get_ready and after_fork functions
In a forthcoming commit we will need to create a multi handle and a background thread, requiring use of the .get_ready (for multi) and .after_fork (for the thread) plugin methods. This commit removes the empty load_pool function and adds pool_get_ready and pool_after_fork, and the associated machinery in curl.c. This commit on its own does nothing, future commits will fill in these functions with useful work. --- plugins/curl/curldefs.h | 3 ++- plugins/curl/curl.c | 14 +++++++++++++- plugins/curl/pool.c | 12 +++++++++--- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/plugins/curl/curldefs.h b/plugins/curl/curldefs.h index eb168c524..3c54b7c3c 100644 --- a/plugins/curl/curldefs.h +++ b/plugins/curl/curldefs.h @@ -111,7 +111,8 @@ extern struct curl_handle *allocate_handle (void); extern void free_handle (struct curl_handle *); /* pool.c */ -extern void load_pool (void); +extern int pool_get_ready (void); +extern int pool_after_fork (void); extern void pool_unload (void); extern struct curl_handle *get_handle (void); extern void put_handle (struct curl_handle *ch); diff --git a/plugins/curl/curl.c b/plugins/curl/curl.c index 729710935..4e727b861 100644 --- a/plugins/curl/curl.c +++ b/plugins/curl/curl.c @@ -67,8 +67,18 @@ curl_load (void) nbdkit_error ("libcurl initialization failed: %d", (int) r); exit (EXIT_FAILURE); } +} - load_pool (); +int +curl_get_ready (void) +{ + return pool_get_ready (); +} + +int +curl_after_fork (void) +{ + return pool_after_fork (); } static void @@ -249,6 +259,8 @@ static struct nbdkit_plugin plugin = { */ //.config_help = curl_config_help, .magic_config_key = "url", + .get_ready = curl_get_ready, + .after_fork = curl_after_fork, .open = curl_open, .close = curl_close, .get_size = curl_get_size, diff --git a/plugins/curl/pool.c b/plugins/curl/pool.c index 50a623a4e..eb2d330e1 100644 --- a/plugins/curl/pool.c +++ b/plugins/curl/pool.c @@ -78,10 +78,16 @@ static curl_handle_list curl_handles = empty_vector; static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; static size_t in_use = 0, waiting = 0; -/* Initialize pool structures. */ -void -load_pool (void) +int +pool_get_ready (void) { + return 0; +} + +int +pool_after_fork (void) +{ + return 0; } /* Close and free all handles in the pool. */ -- 2.41.0
Richard W.M. Jones
2023-Jul-28 17:17 UTC
[Libguestfs] [PATCH nbdkit v2 4/9] curl: Do pool_unload before config_unload
Since config.c deals with handles (and contains free_handle), and since pool_unload calls free_handle to free handles, it's better to do pool_unload first. I don't believe this is a correctness issue now, but it will be in subsequent commits. --- plugins/curl/curl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/curl/curl.c b/plugins/curl/curl.c index 4e727b861..be42de36f 100644 --- a/plugins/curl/curl.c +++ b/plugins/curl/curl.c @@ -84,9 +84,9 @@ curl_after_fork (void) static void curl_unload (void) { + pool_unload (); config_unload (); scripts_unload (); - pool_unload (); display_times (); curl_global_cleanup (); } -- 2.41.0
Richard W.M. Jones
2023-Jul-28 17:17 UTC
[Libguestfs] [PATCH nbdkit v2 5/9] retry-request: Print operation we are retrying in debug messages
--- filters/retry-request/retry-request.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/filters/retry-request/retry-request.c b/filters/retry-request/retry-request.c index ed566080d..e5b8344cd 100644 --- a/filters/retry-request/retry-request.c +++ b/filters/retry-request/retry-request.c @@ -100,15 +100,15 @@ retry_request_config (nbdkit_next_config *next, nbdkit_backend *nxdata, * The code between RETRY_START...RETRY_END must set r to 0 or -1 on * success or failure. *err may also be implicitly assigned. */ -#define RETRY_START \ +#define RETRY_START(what) \ { \ unsigned i; \ \ r = -1; \ for (i = 0; r == -1 && i <= retries; ++i) { \ if (i > 0) { \ - nbdkit_debug ("retry %u: waiting %u seconds before retrying", \ - i, delay); \ + nbdkit_debug ("retry %u: waiting %u seconds before retrying %s",\ + i, delay, what); \ if (nbdkit_nanosleep (delay, 0) == -1) { \ if (*err == 0) \ *err = errno; \ @@ -130,7 +130,7 @@ retry_request_open (nbdkit_next_open *next, nbdkit_context *nxdata, if (retry_open_call) { int *err = &errno; /* used by the RETRY_* macros */ - RETRY_START + RETRY_START("open") r = next (nxdata, readonly, exportname); RETRY_END; } @@ -148,7 +148,7 @@ retry_request_pread (nbdkit_next *next, { int r; - RETRY_START + RETRY_START("pread") r = next->pread (next, buf, count, offset, flags, err); RETRY_END; return r; @@ -162,7 +162,7 @@ retry_request_pwrite (nbdkit_next *next, { int r; - RETRY_START + RETRY_START("pwrite") r = next->pwrite (next, buf, count, offset, flags, err); RETRY_END; return r; @@ -176,7 +176,7 @@ retry_request_trim (nbdkit_next *next, { int r; - RETRY_START + RETRY_START("trim") r = next->trim (next, count, offset, flags, err); RETRY_END; return r; @@ -189,7 +189,7 @@ retry_request_flush (nbdkit_next *next, { int r; - RETRY_START + RETRY_START("flush") r = next->flush (next, flags, err); RETRY_END; return r; @@ -203,7 +203,7 @@ retry_request_zero (nbdkit_next *next, { int r; - RETRY_START + RETRY_START("zero") r = next->zero (next, count, offset, flags, err); RETRY_END; return r; @@ -218,7 +218,7 @@ retry_request_extents (nbdkit_next *next, CLEANUP_EXTENTS_FREE struct nbdkit_extents *extents2 = NULL; int r; - RETRY_START { + RETRY_START("extents") { /* Each retry must begin with extents reset to the right beginning. */ nbdkit_extents_free (extents2); extents2 = nbdkit_extents_new (offset, next->get_size (next)); @@ -254,7 +254,7 @@ retry_request_cache (nbdkit_next *next, { int r; - RETRY_START + RETRY_START("cache") r = next->cache (next, count, offset, flags, err); RETRY_END; return r; -- 2.41.0
Richard W.M. Jones
2023-Jul-28 17:17 UTC
[Libguestfs] [PATCH nbdkit v2 6/9] retry-request: Allow get_size operation to be retried
This plugin operation might need to do some real work (instead of just fetching a number from memory), and so it might have to be retried. In particular, changes to the curl plugin make .get_size into a heavyweight operation, where previously it was done as a side-effect of .open. And so we must allow .get_size to be retried independent of .open. --- filters/retry-request/retry-request.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/filters/retry-request/retry-request.c b/filters/retry-request/retry-request.c index e5b8344cd..8e3dd8246 100644 --- a/filters/retry-request/retry-request.c +++ b/filters/retry-request/retry-request.c @@ -141,6 +141,18 @@ retry_request_open (nbdkit_next_open *next, nbdkit_context *nxdata, return r == 0 ? NBDKIT_HANDLE_NOT_NEEDED : NULL; } +static int64_t +retry_request_get_size (nbdkit_next *next, void *handle) +{ + int64_t r; + int *err = &errno; /* used by the RETRY_* macros */ + + RETRY_START("get_size") + r = next->get_size (next); + RETRY_END; + return r; +} + static int retry_request_pread (nbdkit_next *next, void *handle, void *buf, uint32_t count, uint64_t offset, @@ -267,6 +279,7 @@ static struct nbdkit_filter filter = { .config = retry_request_config, .config_help = retry_request_config_help, .open = retry_request_open, + .get_size = retry_request_get_size, .pread = retry_request_pread, .pwrite = retry_request_pwrite, .trim = retry_request_trim, -- 2.41.0
Richard W.M. Jones
2023-Jul-28 17:17 UTC
[Libguestfs] [PATCH nbdkit v2 7/9] tests/test-retry-request-mirror.c: Don't assume state after connect
After forthcoming changes to the curl plugin we cannot assume the exact mirror we will be connected to after making the NBD connection. So remove that assumption. See: commit 38dccd848bd40cccdf012df7a606e13282aaeecb --- tests/test-retry-request-mirror.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test-retry-request-mirror.c b/tests/test-retry-request-mirror.c index cf42c5964..65440d2e5 100644 --- a/tests/test-retry-request-mirror.c +++ b/tests/test-retry-request-mirror.c @@ -58,7 +58,7 @@ main (int argc, char *argv[]) const char *sockpath; CLEANUP_FREE char *usp_param = NULL; int i, j; - char state = 0; + char state; struct nbd_handle *nbd = NULL; #ifndef HAVE_CURLOPT_UNIX_SOCKET_PATH @@ -105,6 +105,8 @@ main (int argc, char *argv[]) if (nbd_connect_unix (nbd, sock /* NBD socket */) == -1) goto nbd_error; + state = 0; + for (i = 0; i < 7 /* not divisible by 2 or 3 */; ++i) { char buf[512]; -- 2.41.0
Richard W.M. Jones
2023-Jul-28 17:17 UTC
[Libguestfs] [PATCH nbdkit v2 8/9] curl: Use curl multi interface
See the comment at the top of plugins/curl/pool.c for general information about how this works. This makes a very large difference to performance over the previous implementation. Note for the tests below I also applied the next commit changing the behaviour of the connections parameter. Using this test case: $ uri=https://cloud-images.ubuntu.com/lunar/current/lunar-server-cloudimg-amd64.img $ nbdkit -r -U - curl $uri ipresolve=v4 --run 'nbdcopy -p $uri null' The times are as follows: multi, connections=64 21.5s multi, connections=32 30.2s multi, connections=16 56.0s before this commit 166s --- plugins/curl/curldefs.h | 35 ++-- plugins/curl/config.c | 246 --------------------------- plugins/curl/curl.c | 366 +++++++++++++++++++++++++++++++++++----- plugins/curl/pool.c | 346 ++++++++++++++++++++++++++++--------- 4 files changed, 616 insertions(+), 377 deletions(-) diff --git a/plugins/curl/curldefs.h b/plugins/curl/curldefs.h index 3c54b7c3c..02b28a133 100644 --- a/plugins/curl/curldefs.h +++ b/plugins/curl/curldefs.h @@ -76,16 +76,6 @@ struct curl_handle { /* The underlying curl handle. */ CURL *c; - /* Index of this handle in the pool (for debugging). */ - size_t i; - - /* True if the handle is in use by a thread. */ - bool in_use; - - /* These fields are used/initialized when we create the handle. */ - bool accept_range; - int64_t exportsize; - char errbuf[CURL_ERROR_SIZE]; /* Before doing a read or write operation, set these to point to the @@ -98,8 +88,30 @@ struct curl_handle { const char *read_buf; uint32_t read_count; + /* This field is used by curl_get_size. */ + bool accept_range; + /* Used by scripts.c */ struct curl_slist *headers_copy; + + /* Used by pool.c */ + struct command *cmd; +}; + +/* Asynchronous commands that can be sent to the pool thread. */ +enum command_type { EASY_HANDLE, STOP }; +struct command { + /* These fields are set by the caller. */ + enum command_type type; /* command */ + struct curl_handle *ch; /* for EASY_HANDLE, the easy handle */ + + /* This field is set to a unique value by send_command_and_wait. */ + uint64_t id; /* serial number */ + + /* These fields are used to signal back that the command finished. */ + pthread_mutex_t mutex; /* completion mutex */ + pthread_cond_t cond; /* completion condition */ + CURLcode status; /* status code (CURLE_OK = succeeded) */ }; /* config.c */ @@ -114,8 +126,7 @@ extern void free_handle (struct curl_handle *); extern int pool_get_ready (void); extern int pool_after_fork (void); extern void pool_unload (void); -extern struct curl_handle *get_handle (void); -extern void put_handle (struct curl_handle *ch); +extern CURLcode send_command_and_wait (struct command *cmd); /* scripts.c */ extern int do_scripts (struct curl_handle *ch); diff --git a/plugins/curl/config.c b/plugins/curl/config.c index b6e02b85a..46bec2bbb 100644 --- a/plugins/curl/config.c +++ b/plugins/curl/config.c @@ -48,8 +48,6 @@ #include <nbdkit-plugin.h> -#include "ascii-ctype.h" -#include "ascii-string.h" #include "cleanup.h" #include "curldefs.h" @@ -89,12 +87,6 @@ static const char *user_agent = NULL; static int debug_cb (CURL *handle, curl_infotype type, const char *data, size_t size, void *); -static size_t write_cb (char *ptr, size_t size, size_t nmemb, void *opaque); -static size_t read_cb (void *ptr, size_t size, size_t nmemb, void *opaque); -static int get_content_length_accept_range (struct curl_handle *ch); -static bool try_fallback_GET_method (struct curl_handle *ch); -static size_t header_cb (void *ptr, size_t size, size_t nmemb, void *opaque); -static size_t error_cb (char *ptr, size_t size, size_t nmemb, void *opaque); /* Use '-D curl.verbose=1' to set. */ NBDKIT_DLL_PUBLIC int curl_debug_verbose = 0; @@ -668,17 +660,9 @@ allocate_handle (void) if (user_agent) curl_easy_setopt (ch->c, CURLOPT_USERAGENT, user_agent); - if (get_content_length_accept_range (ch) == -1) - goto err; - /* Get set up for reading and writing. */ curl_easy_setopt (ch->c, CURLOPT_HEADERFUNCTION, NULL); curl_easy_setopt (ch->c, CURLOPT_HEADERDATA, NULL); - curl_easy_setopt (ch->c, CURLOPT_WRITEFUNCTION, write_cb); - curl_easy_setopt (ch->c, CURLOPT_WRITEDATA, ch); - /* These are only used if !readonly but we always register them. */ - curl_easy_setopt (ch->c, CURLOPT_READFUNCTION, read_cb); - curl_easy_setopt (ch->c, CURLOPT_READDATA, ch); return ch; @@ -742,233 +726,3 @@ debug_cb (CURL *handle, curl_infotype type, out: return 0; } - -/* NB: The terminology used by libcurl is confusing! - * - * WRITEFUNCTION / write_cb is used when reading from the remote server - * READFUNCTION / read_cb is used when writing to the remote server. - * - * We use the same terminology as libcurl here. - */ - -static size_t -write_cb (char *ptr, size_t size, size_t nmemb, void *opaque) -{ - struct curl_handle *ch = opaque; - size_t orig_realsize = size * nmemb; - size_t realsize = orig_realsize; - - assert (ch->write_buf); - - /* Don't read more than the requested amount of data, even if the - * server or libcurl sends more. - */ - if (realsize > ch->write_count) - realsize = ch->write_count; - - memcpy (ch->write_buf, ptr, realsize); - - ch->write_count -= realsize; - ch->write_buf += realsize; - - return orig_realsize; -} - -static size_t -read_cb (void *ptr, size_t size, size_t nmemb, void *opaque) -{ - struct curl_handle *ch = opaque; - size_t realsize = size * nmemb; - - assert (ch->read_buf); - if (realsize > ch->read_count) - realsize = ch->read_count; - - memcpy (ptr, ch->read_buf, realsize); - - ch->read_count -= realsize; - ch->read_buf += realsize; - - return realsize; -} - -/* Get the file size and also whether the remote HTTP server - * supports byte ranges. - */ -static int -get_content_length_accept_range (struct curl_handle *ch) -{ - CURLcode r; - long code; -#ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T - curl_off_t o; -#else - double d; -#endif - - /* We must run the scripts if necessary and set headers in the - * handle. - */ - if (do_scripts (ch) == -1) - return -1; - - /* Set this flag in the handle to false. The callback should set it - * to true if byte ranges are supported, which we check below. - */ - ch->accept_range = false; - - /* No Body, not nobody! This forces a HEAD request. */ - curl_easy_setopt (ch->c, CURLOPT_NOBODY, 1L); - curl_easy_setopt (ch->c, CURLOPT_HEADERFUNCTION, header_cb); - curl_easy_setopt (ch->c, CURLOPT_HEADERDATA, ch); - r = curl_easy_perform (ch->c); - update_times (ch->c); - if (r != CURLE_OK) { - display_curl_error (ch, r, - "problem doing HEAD request to fetch size of URL [%s]", - url); - - /* Get the HTTP status code, if available. */ - r = curl_easy_getinfo (ch->c, CURLINFO_RESPONSE_CODE, &code); - if (r == CURLE_OK) - nbdkit_debug ("HTTP status code: %ld", code); - else - code = -1; - - /* See comment on try_fallback_GET_method below. */ - if (code != 403 || !try_fallback_GET_method (ch)) - return -1; - } - - /* Get the content length. - * - * Note there is some subtlety here: For web servers using chunked - * encoding, either the Content-Length header will not be present, - * or if present it should be ignored. (For such servers the only - * way to find out the true length would be to read all of the - * content, which we don't want to do). - * - * Curl itself resolves this for us. It will ignore the - * Content-Length header if chunked encoding is used, returning the - * length as -1 which we check below (see also - * curl:lib/http.c:Curl_http_size). - */ -#ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T - r = curl_easy_getinfo (ch->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &o); - if (r != CURLE_OK) { - display_curl_error (ch, r, - "could not get length of remote file [%s]", url); - return -1; - } - - if (o == -1) { - nbdkit_error ("could not get length of remote file [%s], " - "is the URL correct?", url); - return -1; - } - - ch->exportsize = o; -#else - r = curl_easy_getinfo (ch->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d); - if (r != CURLE_OK) { - display_curl_error (ch, r, - "could not get length of remote file [%s]", url); - return -1; - } - - if (d == -1) { - nbdkit_error ("could not get length of remote file [%s], " - "is the URL correct?", url); - return -1; - } - - ch->exportsize = d; -#endif - nbdkit_debug ("content length: %" PRIi64, ch->exportsize); - - /* If this is HTTP, check that byte ranges are supported. */ - if (ascii_strncasecmp (url, "http://", strlen ("http://")) == 0 || - ascii_strncasecmp (url, "https://", strlen ("https://")) == 0) { - if (!ch->accept_range) { - nbdkit_error ("server does not support 'range' (byte range) requests"); - return -1; - } - - nbdkit_debug ("accept range supported (for HTTP/HTTPS)"); - } - - return 0; -} - -/* S3 servers can return 403 Forbidden for HEAD but still respond - * to GET, so we give it a second chance in that case. - * https://github.com/kubevirt/containerized-data-importer/issues/2737 - * - * This function issues a GET request with a writefunction that always - * returns an error, thus effectively getting the headers but - * abandoning the transfer as soon as possible after. - */ -static bool -try_fallback_GET_method (struct curl_handle *ch) -{ - CURLcode r; - - nbdkit_debug ("attempting to fetch headers using GET method"); - - curl_easy_setopt (ch->c, CURLOPT_HTTPGET, 1L); - curl_easy_setopt (ch->c, CURLOPT_HEADERFUNCTION, header_cb); - curl_easy_setopt (ch->c, CURLOPT_HEADERDATA, ch); - curl_easy_setopt (ch->c, CURLOPT_WRITEFUNCTION, error_cb); - curl_easy_setopt (ch->c, CURLOPT_WRITEDATA, ch); - r = curl_easy_perform (ch->c); - update_times (ch->c); - - /* We expect CURLE_WRITE_ERROR here, but CURLE_OK is possible too - * (eg if the remote has zero length). Other errors might happen - * but we ignore them since it is a fallback path. - */ - return r == CURLE_OK || r == CURLE_WRITE_ERROR; -} - -static size_t -header_cb (void *ptr, size_t size, size_t nmemb, void *opaque) -{ - struct curl_handle *ch = opaque; - size_t realsize = size * nmemb; - const char *header = ptr; - const char *end = header + realsize; - const char *accept_ranges = "accept-ranges:"; - const char *bytes = "bytes"; - - if (realsize >= strlen (accept_ranges) && - ascii_strncasecmp (header, accept_ranges, strlen (accept_ranges)) == 0) { - const char *p = strchr (header, ':') + 1; - - /* Skip whitespace between the header name and value. */ - while (p < end && *p && ascii_isspace (*p)) - p++; - - if (end - p >= strlen (bytes) - && strncmp (p, bytes, strlen (bytes)) == 0) { - /* Check that there is nothing but whitespace after the value. */ - p += strlen (bytes); - while (p < end && *p && ascii_isspace (*p)) - p++; - - if (p == end || !*p) - ch->accept_range = true; - } - } - - return realsize; -} - -static size_t -error_cb (char *ptr, size_t size, size_t nmemb, void *opaque) -{ -#ifdef CURL_WRITEFUNC_ERROR - return CURL_WRITEFUNC_ERROR; -#else - return 0; /* in older curl, any size < requested will also be an error */ -#endif -} diff --git a/plugins/curl/curl.c b/plugins/curl/curl.c index be42de36f..28cc7bbe6 100644 --- a/plugins/curl/curl.c +++ b/plugins/curl/curl.c @@ -48,7 +48,8 @@ #include <nbdkit-plugin.h> -#include "cleanup.h" +#include "ascii-ctype.h" +#include "ascii-string.h" #include "curldefs.h" @@ -118,32 +119,6 @@ curl_close (void *handle) #define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL -/* Calls get_handle() ... put_handle() to get a handle for the length - * of the current scope. - */ -#define GET_HANDLE_FOR_CURRENT_SCOPE(ch) \ - CLEANUP_PUT_HANDLE struct curl_handle *ch = get_handle (); -#define CLEANUP_PUT_HANDLE __attribute__ ((cleanup (cleanup_put_handle))) -static void -cleanup_put_handle (void *chp) -{ - struct curl_handle *ch = * (struct curl_handle **) chp; - - if (ch != NULL) - put_handle (ch); -} - -/* Get the file size. */ -static int64_t -curl_get_size (void *handle) -{ - GET_HANDLE_FOR_CURRENT_SCOPE (ch); - if (ch == NULL) - return -1; - - return ch->exportsize; -} - /* Multi-conn is safe for read-only connections, but HTTP does not * have any concept of flushing so we cannot use it for read-write * connections. @@ -156,23 +131,253 @@ curl_can_multi_conn (void *handle) return !! h->readonly; } +/* Get the file size. */ +static int get_content_length_accept_range (struct curl_handle *ch); +static bool try_fallback_GET_method (struct curl_handle *ch); +static size_t header_cb (void *ptr, size_t size, size_t nmemb, void *opaque); +static size_t error_cb (char *ptr, size_t size, size_t nmemb, void *opaque); + +static int64_t +curl_get_size (void *handle) +{ + struct curl_handle *ch; + CURLcode r; + long code; +#ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T + curl_off_t o; +#else + double d; +#endif + int64_t exportsize; + + /* Get a curl easy handle. */ + ch = allocate_handle (); + if (ch == NULL) goto err; + + /* Prepare to read the headers. */ + if (get_content_length_accept_range (ch) == -1) + goto err; + + /* Send the command to the worker thread and wait. */ + struct command cmd = { + .type = EASY_HANDLE, + .ch = ch, + }; + + r = send_command_and_wait (&cmd); + update_times (ch->c); + if (r != CURLE_OK) { + display_curl_error (ch, r, + "problem doing HEAD request to fetch size of URL [%s]", + url); + + /* Get the HTTP status code, if available. */ + r = curl_easy_getinfo (ch->c, CURLINFO_RESPONSE_CODE, &code); + if (r == CURLE_OK) + nbdkit_debug ("HTTP status code: %ld", code); + else + code = -1; + + /* See comment on try_fallback_GET_method below. */ + if (code != 403 || !try_fallback_GET_method (ch)) + goto err; + } + + /* Get the content length. + * + * Note there is some subtlety here: For web servers using chunked + * encoding, either the Content-Length header will not be present, + * or if present it should be ignored. (For such servers the only + * way to find out the true length would be to read all of the + * content, which we don't want to do). + * + * Curl itself resolves this for us. It will ignore the + * Content-Length header if chunked encoding is used, returning the + * length as -1 which we check below (see also + * curl:lib/http.c:Curl_http_size). + */ +#ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T + r = curl_easy_getinfo (ch->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &o); + if (r != CURLE_OK) { + display_curl_error (ch, r, + "could not get length of remote file [%s]", url); + goto err; + } + + if (o == -1) { + nbdkit_error ("could not get length of remote file [%s], " + "is the URL correct?", url); + goto err; + } + + exportsize = o; +#else + r = curl_easy_getinfo (ch->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d); + if (r != CURLE_OK) { + display_curl_error (ch, r, + "could not get length of remote file [%s]", url); + goto err; + } + + if (d == -1) { + nbdkit_error ("could not get length of remote file [%s], " + "is the URL correct?", url); + goto err; + } + + exportsize = d; +#endif + nbdkit_debug ("content length: %" PRIi64, exportsize); + + /* If this is HTTP, check that byte ranges are supported. */ + if (ascii_strncasecmp (url, "http://", strlen ("http://")) == 0 || + ascii_strncasecmp (url, "https://", strlen ("https://")) == 0) { + if (!ch->accept_range) { + nbdkit_error ("server does not support 'range' (byte range) requests"); + goto err; + } + + nbdkit_debug ("accept range supported (for HTTP/HTTPS)"); + } + + free_handle (ch); + return exportsize; + + err: + if (ch) + free_handle (ch); + return -1; +} + +/* Get the file size and also whether the remote HTTP server + * supports byte ranges. + */ +static int +get_content_length_accept_range (struct curl_handle *ch) +{ + /* We must run the scripts if necessary and set headers in the + * handle. + */ + if (do_scripts (ch) == -1) + return -1; + + /* Set this flag in the handle to false. The callback should set it + * to true if byte ranges are supported, which we check below. + */ + ch->accept_range = false; + + /* No Body, not nobody! This forces a HEAD request. */ + curl_easy_setopt (ch->c, CURLOPT_NOBODY, 1L); + curl_easy_setopt (ch->c, CURLOPT_HEADERFUNCTION, header_cb); + curl_easy_setopt (ch->c, CURLOPT_HEADERDATA, ch); + curl_easy_setopt (ch->c, CURLOPT_WRITEFUNCTION, NULL); + curl_easy_setopt (ch->c, CURLOPT_WRITEDATA, NULL); + curl_easy_setopt (ch->c, CURLOPT_READFUNCTION, NULL); + curl_easy_setopt (ch->c, CURLOPT_READDATA, NULL); + return 0; +} + +/* S3 servers can return 403 Forbidden for HEAD but still respond + * to GET, so we give it a second chance in that case. + * https://github.com/kubevirt/containerized-data-importer/issues/2737 + * + * This function issues a GET request with a writefunction that always + * returns an error, thus effectively getting the headers but + * abandoning the transfer as soon as possible after. + */ +static bool +try_fallback_GET_method (struct curl_handle *ch) +{ + CURLcode r; + + nbdkit_debug ("attempting to fetch headers using GET method"); + + curl_easy_setopt (ch->c, CURLOPT_HTTPGET, 1L); + curl_easy_setopt (ch->c, CURLOPT_HEADERFUNCTION, header_cb); + curl_easy_setopt (ch->c, CURLOPT_HEADERDATA, ch); + curl_easy_setopt (ch->c, CURLOPT_WRITEFUNCTION, error_cb); + curl_easy_setopt (ch->c, CURLOPT_WRITEDATA, ch); + + struct command cmd = { + .type = EASY_HANDLE, + .ch = ch, + }; + + r = send_command_and_wait (&cmd); + update_times (ch->c); + + /* We expect CURLE_WRITE_ERROR here, but CURLE_OK is possible too + * (eg if the remote has zero length). Other errors might happen + * but we ignore them since it is a fallback path. + */ + return r == CURLE_OK || r == CURLE_WRITE_ERROR; +} + +static size_t +header_cb (void *ptr, size_t size, size_t nmemb, void *opaque) +{ + struct curl_handle *ch = opaque; + size_t realsize = size * nmemb; + const char *header = ptr; + const char *end = header + realsize; + const char *accept_ranges = "accept-ranges:"; + const char *bytes = "bytes"; + + if (realsize >= strlen (accept_ranges) && + ascii_strncasecmp (header, accept_ranges, strlen (accept_ranges)) == 0) { + const char *p = strchr (header, ':') + 1; + + /* Skip whitespace between the header name and value. */ + while (p < end && *p && ascii_isspace (*p)) + p++; + + if (end - p >= strlen (bytes) + && strncmp (p, bytes, strlen (bytes)) == 0) { + /* Check that there is nothing but whitespace after the value. */ + p += strlen (bytes); + while (p < end && *p && ascii_isspace (*p)) + p++; + + if (p == end || !*p) + ch->accept_range = true; + } + } + + return realsize; +} + +static size_t +error_cb (char *ptr, size_t size, size_t nmemb, void *opaque) +{ +#ifdef CURL_WRITEFUNC_ERROR + return CURL_WRITEFUNC_ERROR; +#else + return 0; /* in older curl, any size < requested will also be an error */ +#endif +} + /* Read data from the remote server. */ +static size_t write_cb (char *ptr, size_t size, size_t nmemb, void *opaque); + static int curl_pread (void *handle, void *buf, uint32_t count, uint64_t offset) { CURLcode r; + struct curl_handle *ch; char range[128]; - GET_HANDLE_FOR_CURRENT_SCOPE (ch); - if (ch == NULL) - return -1; + /* Get a curl easy handle. */ + ch = allocate_handle (); + if (ch == NULL) goto err; /* Run the scripts if necessary and set headers in the handle. */ - if (do_scripts (ch) == -1) return -1; + if (do_scripts (ch) == -1) goto err; /* Tell the write_cb where we want the data to be written. write_cb * will update this if the data comes in multiple sections. */ + curl_easy_setopt (ch->c, CURLOPT_WRITEFUNCTION, write_cb); + curl_easy_setopt (ch->c, CURLOPT_WRITEDATA, ch); ch->write_buf = buf; ch->write_count = count; @@ -183,11 +388,16 @@ curl_pread (void *handle, void *buf, uint32_t count, uint64_t offset) offset, offset + count); curl_easy_setopt (ch->c, CURLOPT_RANGE, range); - /* The assumption here is that curl will look after timeouts. */ - r = curl_easy_perform (ch->c); + /* Send the command to the worker thread and wait. */ + struct command cmd = { + .type = EASY_HANDLE, + .ch = ch, + }; + + r = send_command_and_wait (&cmd); if (r != CURLE_OK) { - display_curl_error (ch, r, "pread: curl_easy_perform"); - return -1; + display_curl_error (ch, r, "pread"); + goto err; } update_times (ch->c); @@ -198,26 +408,67 @@ curl_pread (void *handle, void *buf, uint32_t count, uint64_t offset) /* As far as I understand the cURL API, this should never happen. */ assert (ch->write_count == 0); + free_handle (ch); return 0; + + err: + if (ch) + free_handle (ch); + return -1; +} + +/* NB: The terminology used by libcurl is confusing! + * + * WRITEFUNCTION / write_cb is used when reading from the remote server + * READFUNCTION / read_cb is used when writing to the remote server. + * + * We use the same terminology as libcurl here. + */ +static size_t +write_cb (char *ptr, size_t size, size_t nmemb, void *opaque) +{ + struct curl_handle *ch = opaque; + size_t orig_realsize = size * nmemb; + size_t realsize = orig_realsize; + + assert (ch->write_buf); + + /* Don't read more than the requested amount of data, even if the + * server or libcurl sends more. + */ + if (realsize > ch->write_count) + realsize = ch->write_count; + + memcpy (ch->write_buf, ptr, realsize); + + ch->write_count -= realsize; + ch->write_buf += realsize; + + return orig_realsize; } /* Write data to the remote server. */ +static size_t read_cb (void *ptr, size_t size, size_t nmemb, void *opaque); + static int curl_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset) { CURLcode r; + struct curl_handle *ch; char range[128]; - GET_HANDLE_FOR_CURRENT_SCOPE (ch); - if (ch == NULL) - return -1; + /* Get a curl easy handle. */ + ch = allocate_handle (); + if (ch == NULL) goto err; /* Run the scripts if necessary and set headers in the handle. */ - if (do_scripts (ch) == -1) return -1; + if (do_scripts (ch) == -1) goto err; /* Tell the read_cb where we want the data to be read from. read_cb * will update this if the data comes in multiple sections. */ + curl_easy_setopt (ch->c, CURLOPT_READFUNCTION, read_cb); + curl_easy_setopt (ch->c, CURLOPT_READDATA, ch); ch->read_buf = buf; ch->read_count = count; @@ -228,11 +479,16 @@ curl_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset) offset, offset + count); curl_easy_setopt (ch->c, CURLOPT_RANGE, range); - /* The assumption here is that curl will look after timeouts. */ - r = curl_easy_perform (ch->c); + /* Send the command to the worker thread and wait. */ + struct command cmd = { + .type = EASY_HANDLE, + .ch = ch, + }; + + r = send_command_and_wait (&cmd); if (r != CURLE_OK) { - display_curl_error (ch, r, "pwrite: curl_easy_perform"); - return -1; + display_curl_error (ch, r, "pwrite"); + goto err; } update_times (ch->c); @@ -243,7 +499,31 @@ curl_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset) /* As far as I understand the cURL API, this should never happen. */ assert (ch->read_count == 0); + free_handle (ch); return 0; + + err: + if (ch) + free_handle (ch); + return -1; +} + +static size_t +read_cb (void *ptr, size_t size, size_t nmemb, void *opaque) +{ + struct curl_handle *ch = opaque; + size_t realsize = size * nmemb; + + assert (ch->read_buf); + if (realsize > ch->read_count) + realsize = ch->read_count; + + memcpy (ptr, ch->read_buf, realsize); + + ch->read_count -= realsize; + ch->read_buf += realsize; + + return realsize; } static struct nbdkit_plugin plugin = { diff --git a/plugins/curl/pool.c b/plugins/curl/pool.c index eb2d330e1..2974cda3f 100644 --- a/plugins/curl/pool.c +++ b/plugins/curl/pool.c @@ -30,11 +30,29 @@ * SUCH DAMAGE. */ -/* Curl handle pool. +/* Worker thread which processes the curl multi interface. * - * To get a libcurl handle, call get_handle(). When you hold the - * handle, it is yours exclusively to use. After you have finished - * with the handle, put it back into the pool by calling put_handle(). + * The main nbdkit threads (see curl.c) create curl easy handles + * initialized with the work they want to carry out. Note there is + * one easy handle per task (eg. per pread/pwrite request). The easy + * handles are not reused. + * + * The commands + optional easy handle are submitted to the worker + * thread over a self-pipe (it's easy to use a pipe here because the + * way curl multi works is it can listen on an extra fd, but not on + * anything else like a pthread condition). The curl multi performs + * the work of the outstanding easy handles. + * + * When an easy handle finishes work or errors, we retire the command + * by signalling back to the waiting nbdkit thread using a pthread + * condition. + * + * In my experiments, we're almost always I/O bound so I haven't seen + * any strong need to use more than one curl multi / worker thread, + * although it would be possible to add more in future. + * + * See also this extremely useful thread: + * https://curl.se/mail/lib-2019-03/0100.html */ #include <config.h> @@ -45,9 +63,19 @@ #include <stdint.h> #include <inttypes.h> #include <string.h> +#include <unistd.h> #include <assert.h> #include <pthread.h> +#ifdef HAVE_STDATOMIC_H +#include <stdatomic.h> +#else +/* Some old platforms lack atomic types, but 32 bit ints are usually + * "atomic enough". + */ +#define _Atomic /**/ +#endif + #include <curl/curl.h> #include <nbdkit-plugin.h> @@ -62,115 +90,281 @@ NBDKIT_DLL_PUBLIC int curl_debug_pool = 0; unsigned connections = 4; -/* This lock protects access to the curl_handles vector below. */ -static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; +/* Pipe used to notify background thread that a command is pending in + * the queue. A pointer to the 'struct command' is sent over the + * pipe. + */ +static int self_pipe[2] = { -1, -1 }; -/* List of curl handles. This is allocated dynamically as more - * handles are requested. Currently it does not shrink. It may grow - * up to 'connections' in length. +/* The curl multi handle. */ +static CURLM *multi; + +/* List of running easy handles. We only need to maintain this so we + * can remove them from the multi handle when cleaning up. */ DEFINE_VECTOR_TYPE (curl_handle_list, struct curl_handle *); static curl_handle_list curl_handles = empty_vector; -/* The condition is used when the curl handles vector is full and - * we're waiting for a thread to put_handle. - */ -static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; -static size_t in_use = 0, waiting = 0; +static const char * +command_type_to_string (enum command_type type) +{ + switch (type) { + case EASY_HANDLE: return "EASY_HANDLE"; + case STOP: return "STOP"; + default: abort (); + } +} int pool_get_ready (void) { + multi = curl_multi_init (); + if (multi == NULL) { + nbdkit_error ("curl_multi_init failed: %m"); + return -1; + } + return 0; } +/* Start and stop the background thread. */ +static pthread_t thread; +static bool thread_running; +static void *pool_worker (void *); + int pool_after_fork (void) { + int err; + + if (pipe (self_pipe) == -1) { + nbdkit_error ("pipe: %m"); + return -1; + } + + /* Start the pool background thread where all the curl work is done. */ + err = pthread_create (&thread, NULL, pool_worker, NULL); + if (err != 0) { + errno = err; + nbdkit_error ("pthread_create: %m"); + return -1; + } + thread_running = true; + return 0; } -/* Close and free all handles in the pool. */ +/* Unload the background thread. */ void pool_unload (void) { - size_t i; + if (thread_running) { + /* Stop the background thread. */ + struct command cmd = { .type = STOP }; + send_command_and_wait (&cmd); + pthread_join (thread, NULL); + thread_running = false; + } - if (curl_debug_pool) - nbdkit_debug ("unload_pool: number of curl handles allocated: %zu", - curl_handles.len); + if (self_pipe[0] >= 0) { + close (self_pipe[0]); + self_pipe[0] = -1; + } + if (self_pipe[1] >= 0) { + close (self_pipe[1]); + self_pipe[1] = -1; + } - for (i = 0; i < curl_handles.len; ++i) - free_handle (curl_handles.ptr[i]); - curl_handle_list_reset (&curl_handles); + if (multi) { + size_t i; + + /* Remove and free any easy handles in the multi. */ + for (i = 0; i < curl_handles.len; ++i) { + curl_multi_remove_handle (multi, curl_handles.ptr[i]->c); + free_handle (curl_handles.ptr[i]); + } + + curl_multi_cleanup (multi); + multi = NULL; + } } -/* Get a handle from the pool. - * - * It is owned exclusively by the caller until they call put_handle. +/* Command queue. */ +static _Atomic uint64_t id; /* next command ID */ + +/* Send command to the background thread and wait for completion. + * This is only called by one of the nbdkit threads. */ -struct curl_handle * -get_handle (void) +CURLcode +send_command_and_wait (struct command *cmd) { - ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock); - size_t i; - struct curl_handle *ch; - - again: - /* Look for a handle which is not in_use. */ - for (i = 0; i < curl_handles.len; ++i) { - ch = curl_handles.ptr[i]; - if (!ch->in_use) { - ch->in_use = true; - in_use++; + cmd->id = id++; + + /* CURLcode is 0 (CURLE_OK) or > 0, so use -1 as a sentinel to + * indicate that the command has not yet been completed and status + * set. + */ + cmd->status = -1; + + /* This will be used to signal command completion back to us. */ + pthread_mutex_init (&cmd->mutex, NULL); + pthread_cond_init (&cmd->cond, NULL); + + /* Send the command to the background thread. */ + if (write (self_pipe[1], &cmd, sizeof cmd) != sizeof cmd) + abort (); + + /* Wait for the command to be completed by the background thread. */ + { + ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&cmd->mutex); + while (cmd->status == -1) /* for -1, see above */ + pthread_cond_wait (&cmd->cond, &cmd->mutex); + } + + pthread_mutex_destroy (&cmd->mutex); + pthread_cond_destroy (&cmd->cond); + + /* Note the main thread must call nbdkit_error on error! */ + return cmd->status; +} + +/* The background thread. */ +static void check_for_finished_handles (void); +static void retire_command (struct command *cmd, CURLcode code); +static void do_easy_handle (struct command *cmd); + +static void * +pool_worker (void *vp) +{ + bool stop = false; + + if (curl_debug_pool) + nbdkit_debug ("curl: background thread started"); + + while (!stop) { + struct command *cmd = NULL; + struct curl_waitfd extra_fds[1] + { { .fd = self_pipe[0], .events = CURL_WAIT_POLLIN } }; + CURLMcode mc; + int numfds, running_handles, repeats = 0; + + do { + /* Process the multi handle. */ + mc = curl_multi_perform (multi, &running_handles); + if (mc != CURLM_OK) { + nbdkit_error ("curl_multi_perform: %s", curl_multi_strerror (mc)); + abort (); /* XXX We don't expect this to happen */ + } + + check_for_finished_handles (); + + mc = curl_multi_wait (multi, extra_fds, 1, 1000000, &numfds); + if (mc != CURLM_OK) { + nbdkit_error ("curl_multi_wait: %s", curl_multi_strerror (mc)); + abort (); /* XXX We don't expect this to happen */ + } + if (curl_debug_pool) - nbdkit_debug ("get_handle: %zu", ch->i); - return ch; - } - } + nbdkit_debug ("curl_multi_wait returned: running_handles=%d numfds=%d", + running_handles, numfds); + + if (numfds == 0) { + repeats++; + if (repeats > 1) + nbdkit_nanosleep (1, 0); + } + else { + repeats = 0; + if (extra_fds[0].revents == CURL_WAIT_POLLIN) { + /* There's a command waiting. */ + if (read (self_pipe[0], &cmd, sizeof cmd) != sizeof cmd) + abort (); + } + } + } while (!cmd); - /* If more connections are allowed, then allocate a new handle. */ - if (curl_handles.len < connections) { - ch = allocate_handle (); - if (ch == NULL) - return NULL; - if (curl_handle_list_append (&curl_handles, ch) == -1) { - free_handle (ch); - return NULL; - } - ch->i = curl_handles.len - 1; - ch->in_use = true; - in_use++; if (curl_debug_pool) - nbdkit_debug ("get_handle: %zu", ch->i); - return ch; - } + nbdkit_debug ("curl: dispatching %s command %" PRIu64, + command_type_to_string (cmd->type), cmd->id); + + switch (cmd->type) { + case STOP: + stop = true; + retire_command (cmd, CURLE_OK); + break; - /* Otherwise we have run out of connections so we must wait until - * another thread calls put_handle. - */ - assert (in_use == connections); - waiting++; - while (in_use == connections) - pthread_cond_wait (&cond, &lock); - waiting--; + case EASY_HANDLE: + do_easy_handle (cmd); + break; + } + } /* while (!stop) */ - goto again; + if (curl_debug_pool) + nbdkit_debug ("curl: background thread stopped"); + + return NULL; } -/* Return the handle to the pool. */ -void -put_handle (struct curl_handle *ch) +/* This checks if any easy handles in the multi have + * finished and retires the associated commands. + */ +static void +check_for_finished_handles (void) { - ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock); + CURLMsg *msg; + int msgs_in_queue; + + while ((msg = curl_multi_info_read (multi, &msgs_in_queue)) != NULL) { + size_t i; + struct curl_handle *ch = NULL; + + if (msg->msg == CURLMSG_DONE) { + /* Find this curl_handle. */ + for (i = 0; i < curl_handles.len; ++i) { + if (curl_handles.ptr[i]->c == msg->easy_handle) { + ch = curl_handles.ptr[i]; + curl_handle_list_remove (&curl_handles, i); + } + } + if (ch == NULL) abort (); + curl_multi_remove_handle (multi, ch->c); + + retire_command (ch->cmd, msg->data.result); + } + } +} +/* Retire a command. status is a CURLcode. */ +static void +retire_command (struct command *cmd, CURLcode status) +{ if (curl_debug_pool) - nbdkit_debug ("put_handle: %zu", ch->i); + nbdkit_debug ("curl: retiring %s command %" PRIu64, + command_type_to_string (cmd->type), cmd->id); + + ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&cmd->mutex); + cmd->status = status; + pthread_cond_signal (&cmd->cond); +} + +static void +do_easy_handle (struct command *cmd) +{ + CURLMcode mc; + + cmd->ch->cmd = cmd; + + /* Add the handle to the multi. */ + mc = curl_multi_add_handle (multi, cmd->ch->c); + if (mc != CURLM_OK) { + nbdkit_error ("curl_multi_add_handle: %s", curl_multi_strerror (mc)); + goto err; + } - ch->in_use = false; - in_use--; + if (curl_handle_list_append (&curl_handles, cmd->ch) == -1) + goto err; + return; - /* Signal the next thread which is waiting. */ - if (waiting > 0) - pthread_cond_signal (&cond); + err: + retire_command (cmd, CURLE_OUT_OF_MEMORY); } -- 2.41.0
Richard W.M. Jones
2023-Jul-28 17:17 UTC
[Libguestfs] [PATCH nbdkit v2 9/9] curl: Redefine connections=<N> parameter as number of HTTP connections
Previously (nbdkit 1.34) this was the number of easy handles. However it turns out that easy handles can open multiple HTTP connections, and in fact there's no good way to tell how many (and they are not shared). Now that we are using a curl multi, curl >= 7.30 provides a way to limit the total number of actual HTTP connections, so we should just use it. This is closer to what I intended this parameter to mean. Link: https://curl.se/mail/lib-2019-03/0102.html Link: https://curl.se/mail/lib-2019-12/0044.html --- plugins/curl/nbdkit-curl-plugin.pod | 13 +++++++++---- plugins/curl/curldefs.h | 3 +++ plugins/curl/config.c | 2 +- plugins/curl/pool.c | 6 +++++- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/plugins/curl/nbdkit-curl-plugin.pod b/plugins/curl/nbdkit-curl-plugin.pod index 0774adadc..8223d6da6 100644 --- a/plugins/curl/nbdkit-curl-plugin.pod +++ b/plugins/curl/nbdkit-curl-plugin.pod @@ -58,10 +58,10 @@ L<CURLOPT_CAPATH(3)> for more information. (nbdkit E<ge> 1.34) -Open up to C<N> curl connections to the web server. The default is 4. -Curl connections are shared between all NBD clients, so you may wish -to increase this if you expect many simultaneous NBD clients (or a -single client using many multi-conn connections). +Open up to C<N> connections to the web server. The default is 16. +Connections are shared between all NBD clients, so you may wish to +increase this if you expect many simultaneous NBD clients (or a single +client using many multi-conn connections). See L</NBD CONNECTIONS AND CURL HANDLES> below. @@ -407,6 +407,11 @@ of curl handles in the pool with the C<connections> parameter (default 4). Note that if there are more than 4 NBD connections, they will share the 4 web server connections, unless you adjust C<connections>. +nbdkit E<ge> 1.36 changed this again to use a curl multi handle, which +is more efficient especially with HTTP/2 and HTTP/3. Now the +C<connections> parameter controls the maximum number of HTTP +connections made to the remote server, and the default is 16. + =head1 HEADER AND COOKIE SCRIPTS While the C<header> and C<cookie> parameters can be used to specify diff --git a/plugins/curl/curldefs.h b/plugins/curl/curldefs.h index 02b28a133..5a2cfab8e 100644 --- a/plugins/curl/curldefs.h +++ b/plugins/curl/curldefs.h @@ -41,6 +41,9 @@ * macro isn't present then Curl is very old. */ #ifdef CURL_AT_LEAST_VERSION +#if CURL_AT_LEAST_VERSION (7, 30, 0) +#define HAVE_CURLMOPT_MAX_TOTAL_CONNECTIONS +#endif #if CURL_AT_LEAST_VERSION (7, 55, 0) #define HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T #endif diff --git a/plugins/curl/config.c b/plugins/curl/config.c index 46bec2bbb..3e55cf15e 100644 --- a/plugins/curl/config.c +++ b/plugins/curl/config.c @@ -491,7 +491,7 @@ curl_config_complete (void) const char *curl_config_help "cainfo=<CAINFO> Path to Certificate Authority file.\n" "capath=<CAPATH> Path to directory with CA certificates.\n" - "connections=<N> Number of libcurl connections to use.\n" + "connections=<N> Number of HTTP connections to use.\n" "cookie=<COOKIE> Set HTTP/HTTPS cookies.\n" "cookiefile= Enable cookie processing.\n" "cookiefile=<FILENAME> Read cookies from file.\n" diff --git a/plugins/curl/pool.c b/plugins/curl/pool.c index 2974cda3f..99bd160a9 100644 --- a/plugins/curl/pool.c +++ b/plugins/curl/pool.c @@ -88,7 +88,7 @@ /* Use '-D curl.pool=1' to debug handle pool. */ NBDKIT_DLL_PUBLIC int curl_debug_pool = 0; -unsigned connections = 4; +unsigned connections = 16; /* Pipe used to notify background thread that a command is pending in * the queue. A pointer to the 'struct command' is sent over the @@ -124,6 +124,10 @@ pool_get_ready (void) return -1; } +#ifdef HAVE_CURLMOPT_MAX_TOTAL_CONNECTIONS + curl_multi_setopt(multi, CURLMOPT_MAX_TOTAL_CONNECTIONS, (long) connections); +#endif + return 0; } -- 2.41.0