Richard W.M. Jones
2023-Feb-04 12:34 UTC
[Libguestfs] [PATCH nbdkit 0/6] curl: Use a curl handle pool
This experimental series changes the way that the curl plugin deals with libcurl handles. It also changes the thread model of the plugin from SERIALIZE_REQUESTS to PARALLEL. Currently one NBD connection opens one libcurl handle. This also implies one TCP connection to the web server. If you want to open multiple libcurl handles (and multiple TCP connections), the client must open multiple NBD connections, eg. using multi-conn. After this series, there is a pool of libcurl handles shared across all NBD connections. The pool defaults to 4 handles, but this can be changed using the connections=N parameter. Previously the plugin relied on nbdkit SERIALIZE_REQUESTS to ensure that a curl handle could not be used from multiple threads at the same time (https://curl.se/libcurl/c/threadsafe.html). After this change it is possible to use the PARALLEL thread model. This change is quite valuable because it means we can use filters like readahead and scan. Anyway, this all seems to work, but it actually reduces performance :-( In particular this simple test slows down quite substantially: time ./nbdkit -r -U - curl file:/var/tmp/fedora-36.img --run 'nbdcopy --no-extents -p "$uri" null:' (where /var/tmp/fedora-36.img is a 10G file). I've been looking at flamegraphs all morning and I can't really see what the problem is (except that lots more time is spent with libcurl calling sigaction?!?) I'm wondering if it might be a locality issue, since curl handles are now being scattered randomly across threads. (It might mean in the file: case that Linux kernel readahead is ineffective). I can't easily see a way to change the implementation to encourage handles to be reused by the same thread. Well, here we are ... Rich.
Richard W.M. Jones
2023-Feb-04 12:34 UTC
[Libguestfs] [PATCH nbdkit 1/6] curl: Rename curl_handle -> handle
Trivial refactoring to make future changes easier. --- plugins/curl/curldefs.h | 4 ++-- plugins/curl/curl.c | 18 +++++++++--------- plugins/curl/scripts.c | 14 +++++++------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/plugins/curl/curldefs.h b/plugins/curl/curldefs.h index 16b6071cc..3d0efb718 100644 --- a/plugins/curl/curldefs.h +++ b/plugins/curl/curldefs.h @@ -66,7 +66,7 @@ extern const char *user; extern const char *user_agent; /* The per-connection handle. */ -struct curl_handle { +struct handle { CURL *c; int readonly; bool accept_range; @@ -80,7 +80,7 @@ struct curl_handle { }; /* scripts.c */ -extern int do_scripts (struct curl_handle *h); +extern int do_scripts (struct handle *h); extern void scripts_unload (void); #endif /* NBDKIT_CURLDEFS_H */ diff --git a/plugins/curl/curl.c b/plugins/curl/curl.c index 2f34d1dc1..9d7ba9d6d 100644 --- a/plugins/curl/curl.c +++ b/plugins/curl/curl.c @@ -452,7 +452,7 @@ static size_t read_cb (void *ptr, size_t size, size_t nmemb, void *opaque); static void * curl_open (int readonly) { - struct curl_handle *h; + struct handle *h; CURLcode r; #ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T curl_off_t o; @@ -729,7 +729,7 @@ debug_cb (CURL *handle, curl_infotype type, static size_t header_cb (void *ptr, size_t size, size_t nmemb, void *opaque) { - struct curl_handle *h = opaque; + struct handle *h = opaque; size_t realsize = size * nmemb; const char *header = ptr; const char *end = header + realsize; @@ -763,7 +763,7 @@ header_cb (void *ptr, size_t size, size_t nmemb, void *opaque) static void curl_close (void *handle) { - struct curl_handle *h = handle; + struct handle *h = handle; curl_easy_cleanup (h->c); if (h->headers_copy) @@ -777,7 +777,7 @@ curl_close (void *handle) static int64_t curl_get_size (void *handle) { - struct curl_handle *h = handle; + struct handle *h = handle; return h->exportsize; } @@ -789,7 +789,7 @@ curl_get_size (void *handle) static int curl_can_multi_conn (void *handle) { - struct curl_handle *h = handle; + struct handle *h = handle; return !! h->readonly; } @@ -806,7 +806,7 @@ curl_can_multi_conn (void *handle) static int curl_pread (void *handle, void *buf, uint32_t count, uint64_t offset) { - struct curl_handle *h = handle; + struct handle *h = handle; CURLcode r; char range[128]; @@ -846,7 +846,7 @@ curl_pread (void *handle, void *buf, uint32_t count, uint64_t offset) static size_t write_cb (char *ptr, size_t size, size_t nmemb, void *opaque) { - struct curl_handle *h = opaque; + struct handle *h = opaque; size_t orig_realsize = size * nmemb; size_t realsize = orig_realsize; @@ -870,7 +870,7 @@ write_cb (char *ptr, size_t size, size_t nmemb, void *opaque) static int curl_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset) { - struct curl_handle *h = handle; + struct handle *h = handle; CURLcode r; char range[128]; @@ -910,7 +910,7 @@ curl_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset) static size_t read_cb (void *ptr, size_t size, size_t nmemb, void *opaque) { - struct curl_handle *h = opaque; + struct handle *h = opaque; size_t realsize = size * nmemb; assert (h->read_buf); diff --git a/plugins/curl/scripts.c b/plugins/curl/scripts.c index fefdc0374..892f7da28 100644 --- a/plugins/curl/scripts.c +++ b/plugins/curl/scripts.c @@ -80,17 +80,17 @@ scripts_unload (void) free (cookies_from_script); } -static int run_header_script (struct curl_handle *); -static int run_cookie_script (struct curl_handle *); +static int run_header_script (struct handle *); +static int run_cookie_script (struct handle *); static void error_from_tmpfile (const char *what, const char *tmpfile); /* This is called from any thread just before we make a curl request. * * Because the thread model is NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS - * we can be assured of exclusive access to curl_handle here. + * we can be assured of exclusive access to handle here. */ int -do_scripts (struct curl_handle *h) +do_scripts (struct handle *h) { time_t now; struct curl_slist *p; @@ -163,7 +163,7 @@ do_scripts (struct curl_handle *h) * header-script. */ static int -run_header_script (struct curl_handle *h) +run_header_script (struct handle *h) { int fd; char tmpfile[] = "/tmp/errorsXXXXXX"; @@ -243,7 +243,7 @@ run_header_script (struct curl_handle *h) * cookie-script. */ static int -run_cookie_script (struct curl_handle *h) +run_cookie_script (struct handle *h) { int fd; char tmpfile[] = "/tmp/errorsXXXXXX"; @@ -352,7 +352,7 @@ scripts_unload (void) } int -do_scripts (struct curl_handle *h) +do_scripts (struct handle *h) { if (!header_script && !cookie_script) return 0; -- 2.39.0
Richard W.M. Jones
2023-Feb-04 12:34 UTC
[Libguestfs] [PATCH nbdkit 2/6] curl: Split out the libcurl handle from the plugin handle
In a future commit we will create a pool of libcurl handles. In order to prepare for that, split out the libcurl handle and associated fields and buffers, from the plugin handle. The plugin handle (for the moment) contains a pointer to the libcurl handle so there is still a 1-1 relationship between them. This is mostly just refactoring/renaming, except for the callback and scripts functions. The callback functions (read_cb etc) only operate on libcurl handles, so pass the libcurl handle (not the plugin handle) as the opaque parameter. The scripts also only care about libcurl handles, so pass the libcurl handle to do_scripts. --- plugins/curl/curldefs.h | 11 +- plugins/curl/curl.c | 223 +++++++++++++++++++++------------------- plugins/curl/scripts.c | 34 +++--- 3 files changed, 142 insertions(+), 126 deletions(-) diff --git a/plugins/curl/curldefs.h b/plugins/curl/curldefs.h index 3d0efb718..fc377d1d3 100644 --- a/plugins/curl/curldefs.h +++ b/plugins/curl/curldefs.h @@ -1,5 +1,5 @@ /* nbdkit - * Copyright (C) 2014-2020 Red Hat Inc. + * Copyright (C) 2014-2023 Red Hat Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -67,8 +67,13 @@ extern const char *user_agent; /* The per-connection handle. */ struct handle { - CURL *c; int readonly; + struct curl_handle *ch; +}; + +/* The libcurl handle and some associated fields and buffers. */ +struct curl_handle { + CURL *c; bool accept_range; int64_t exportsize; char errbuf[CURL_ERROR_SIZE]; @@ -80,7 +85,7 @@ struct handle { }; /* scripts.c */ -extern int do_scripts (struct handle *h); +extern int do_scripts (struct curl_handle *ch); extern void scripts_unload (void); #endif /* NBDKIT_CURLDEFS_H */ diff --git a/plugins/curl/curl.c b/plugins/curl/curl.c index 9d7ba9d6d..2c2620bfe 100644 --- a/plugins/curl/curl.c +++ b/plugins/curl/curl.c @@ -1,5 +1,5 @@ /* nbdkit - * Copyright (C) 2014-2020 Red Hat Inc. + * Copyright (C) 2014-2023 Red Hat Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -436,10 +436,10 @@ curl_config_complete (void) "user-agent=<USER-AGENT> Send user-agent header for HTTP/HTTPS." /* Translate CURLcode to nbdkit_error. */ -#define display_curl_error(h, r, fs, ...) \ +#define display_curl_error(ch, r, fs, ...) \ do { \ nbdkit_error ((fs ": %s: %s"), ## __VA_ARGS__, \ - curl_easy_strerror ((r)), (h)->errbuf); \ + curl_easy_strerror ((r)), (ch)->errbuf); \ } while (0) static int debug_cb (CURL *handle, curl_infotype type, @@ -467,8 +467,15 @@ curl_open (int readonly) } h->readonly = readonly; - h->c = curl_easy_init (); - if (h->c == NULL) { + h->ch = calloc (1, sizeof *h->ch); + if (h->ch == NULL) { + nbdkit_error ("calloc: %m"); + free (h); + return NULL; + } + + h->ch->c = curl_easy_init (); + if (h->ch->c == NULL) { nbdkit_error ("curl_easy_init: failed: %m"); goto err; } @@ -477,29 +484,29 @@ curl_open (int readonly) /* NB: Constants must be explicitly long because the parameter is * varargs. */ - curl_easy_setopt (h->c, CURLOPT_VERBOSE, 1L); - curl_easy_setopt (h->c, CURLOPT_DEBUGFUNCTION, debug_cb); + curl_easy_setopt (h->ch->c, CURLOPT_VERBOSE, 1L); + curl_easy_setopt (h->ch->c, CURLOPT_DEBUGFUNCTION, debug_cb); } - curl_easy_setopt (h->c, CURLOPT_ERRORBUFFER, h->errbuf); + curl_easy_setopt (h->ch->c, CURLOPT_ERRORBUFFER, h->ch->errbuf); r = CURLE_OK; if (unix_socket_path) { #if HAVE_CURLOPT_UNIX_SOCKET_PATH - r = curl_easy_setopt (h->c, CURLOPT_UNIX_SOCKET_PATH, unix_socket_path); + r = curl_easy_setopt (h->ch->c, CURLOPT_UNIX_SOCKET_PATH, unix_socket_path); #else r = CURLE_UNKNOWN_OPTION; #endif } if (r != CURLE_OK) { - display_curl_error (h, r, "curl_easy_setopt: CURLOPT_UNIX_SOCKET_PATH"); + display_curl_error (h->ch, r, "curl_easy_setopt: CURLOPT_UNIX_SOCKET_PATH"); goto err; } /* Set the URL. */ - r = curl_easy_setopt (h->c, CURLOPT_URL, url); + r = curl_easy_setopt (h->ch->c, CURLOPT_URL, url); if (r != CURLE_OK) { - display_curl_error (h, r, "curl_easy_setopt: CURLOPT_URL [%s]", url); + display_curl_error (h->ch, r, "curl_easy_setopt: CURLOPT_URL [%s]", url); goto err; } @@ -508,79 +515,79 @@ curl_open (int readonly) * NB: Both here and below constants must be explicitly long because * the parameter is varargs. */ - curl_easy_setopt (h->c, CURLOPT_AUTOREFERER, 1L); + curl_easy_setopt (h->ch->c, CURLOPT_AUTOREFERER, 1L); if (followlocation) - curl_easy_setopt (h->c, CURLOPT_FOLLOWLOCATION, 1L); - curl_easy_setopt (h->c, CURLOPT_FAILONERROR, 1L); + curl_easy_setopt (h->ch->c, CURLOPT_FOLLOWLOCATION, 1L); + curl_easy_setopt (h->ch->c, CURLOPT_FAILONERROR, 1L); /* Options. */ if (cainfo) { if (strlen (cainfo) == 0) - curl_easy_setopt (h->c, CURLOPT_CAINFO, NULL); + curl_easy_setopt (h->ch->c, CURLOPT_CAINFO, NULL); else - curl_easy_setopt (h->c, CURLOPT_CAINFO, cainfo); + curl_easy_setopt (h->ch->c, CURLOPT_CAINFO, cainfo); } if (capath) - curl_easy_setopt (h->c, CURLOPT_CAPATH, capath); + curl_easy_setopt (h->ch->c, CURLOPT_CAPATH, capath); if (cookie) - curl_easy_setopt (h->c, CURLOPT_COOKIE, cookie); + curl_easy_setopt (h->ch->c, CURLOPT_COOKIE, cookie); if (cookiefile) - curl_easy_setopt (h->c, CURLOPT_COOKIEFILE, cookiefile); + curl_easy_setopt (h->ch->c, CURLOPT_COOKIEFILE, cookiefile); if (cookiejar) - curl_easy_setopt (h->c, CURLOPT_COOKIEJAR, cookiejar); + curl_easy_setopt (h->ch->c, CURLOPT_COOKIEJAR, cookiejar); if (headers) - curl_easy_setopt (h->c, CURLOPT_HTTPHEADER, headers); + curl_easy_setopt (h->ch->c, CURLOPT_HTTPHEADER, headers); if (password) - curl_easy_setopt (h->c, CURLOPT_PASSWORD, password); + curl_easy_setopt (h->ch->c, CURLOPT_PASSWORD, password); #ifndef HAVE_CURLOPT_PROTOCOLS_STR if (protocols != CURLPROTO_ALL) { - curl_easy_setopt (h->c, CURLOPT_PROTOCOLS, protocols); - curl_easy_setopt (h->c, CURLOPT_REDIR_PROTOCOLS, protocols); + curl_easy_setopt (h->ch->c, CURLOPT_PROTOCOLS, protocols); + curl_easy_setopt (h->ch->c, CURLOPT_REDIR_PROTOCOLS, protocols); } #else /* HAVE_CURLOPT_PROTOCOLS_STR (new in 7.85.0) */ if (protocols) { - curl_easy_setopt (h->c, CURLOPT_PROTOCOLS_STR, protocols); - curl_easy_setopt (h->c, CURLOPT_REDIR_PROTOCOLS_STR, protocols); + curl_easy_setopt (h->ch->c, CURLOPT_PROTOCOLS_STR, protocols); + curl_easy_setopt (h->ch->c, CURLOPT_REDIR_PROTOCOLS_STR, protocols); } #endif /* HAVE_CURLOPT_PROTOCOLS_STR */ if (proxy) - curl_easy_setopt (h->c, CURLOPT_PROXY, proxy); + curl_easy_setopt (h->ch->c, CURLOPT_PROXY, proxy); if (proxy_password) - curl_easy_setopt (h->c, CURLOPT_PROXYPASSWORD, proxy_password); + curl_easy_setopt (h->ch->c, CURLOPT_PROXYPASSWORD, proxy_password); if (proxy_user) - curl_easy_setopt (h->c, CURLOPT_PROXYUSERNAME, proxy_user); + curl_easy_setopt (h->ch->c, CURLOPT_PROXYUSERNAME, proxy_user); if (!sslverify) { - curl_easy_setopt (h->c, CURLOPT_SSL_VERIFYPEER, 0L); - curl_easy_setopt (h->c, CURLOPT_SSL_VERIFYHOST, 0L); + curl_easy_setopt (h->ch->c, CURLOPT_SSL_VERIFYPEER, 0L); + curl_easy_setopt (h->ch->c, CURLOPT_SSL_VERIFYHOST, 0L); } if (ssl_version) { if (strcmp (ssl_version, "tlsv1") == 0) - curl_easy_setopt (h->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1); + curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1); else if (strcmp (ssl_version, "sslv2") == 0) - curl_easy_setopt (h->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_SSLv2); + curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_SSLv2); else if (strcmp (ssl_version, "sslv3") == 0) - curl_easy_setopt (h->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_SSLv3); + curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_SSLv3); else if (strcmp (ssl_version, "tlsv1.0") == 0) - curl_easy_setopt (h->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_0); + curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_0); else if (strcmp (ssl_version, "tlsv1.1") == 0) - curl_easy_setopt (h->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_1); + curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_1); else if (strcmp (ssl_version, "tlsv1.2") == 0) - curl_easy_setopt (h->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2); + curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2); else if (strcmp (ssl_version, "tlsv1.3") == 0) - curl_easy_setopt (h->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_3); + curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_3); else { - display_curl_error (h, r, "curl_easy_setopt: CURLOPT_SSLVERSION [%s]", + display_curl_error (h->ch, r, "curl_easy_setopt: CURLOPT_SSLVERSION [%s]", ssl_version); goto err; } } if (ssl_cipher_list) - curl_easy_setopt (h->c, CURLOPT_SSL_CIPHER_LIST, ssl_cipher_list); + curl_easy_setopt (h->ch->c, CURLOPT_SSL_CIPHER_LIST, ssl_cipher_list); if (tls13_ciphers) { #if (LIBCURL_VERSION_MAJOR > 7) || \ (LIBCURL_VERSION_MAJOR == 7 && LIBCURL_VERSION_MINOR >= 61) - curl_easy_setopt (h->c, CURLOPT_TLS13_CIPHERS, tls13_ciphers); + curl_easy_setopt (h->ch->c, CURLOPT_TLS13_CIPHERS, tls13_ciphers); #else /* This is not available before curl-7.61 */ nbdkit_error ("tls13-ciphers is not supported in this build of " @@ -589,18 +596,18 @@ curl_open (int readonly) #endif } if (tcp_keepalive) - curl_easy_setopt (h->c, CURLOPT_TCP_KEEPALIVE, 1L); + curl_easy_setopt (h->ch->c, CURLOPT_TCP_KEEPALIVE, 1L); if (!tcp_nodelay) - curl_easy_setopt (h->c, CURLOPT_TCP_NODELAY, 0L); + curl_easy_setopt (h->ch->c, CURLOPT_TCP_NODELAY, 0L); if (timeout > 0) /* NB: The cast is required here because the parameter is varargs * treated as long, and not type safe. */ - curl_easy_setopt (h->c, CURLOPT_TIMEOUT, (long) timeout); + curl_easy_setopt (h->ch->c, CURLOPT_TIMEOUT, (long) timeout); if (user) - curl_easy_setopt (h->c, CURLOPT_USERNAME, user); + curl_easy_setopt (h->ch->c, CURLOPT_USERNAME, user); if (user_agent) - curl_easy_setopt (h->c, CURLOPT_USERAGENT, user_agent); + curl_easy_setopt (h->ch->c, CURLOPT_USERAGENT, user_agent); /* Get the file size and also whether the remote HTTP server * supports byte ranges. @@ -608,23 +615,24 @@ curl_open (int readonly) * We must run the scripts if necessary and set headers in the * handle. */ - if (do_scripts (h) == -1) goto err; - h->accept_range = false; - curl_easy_setopt (h->c, CURLOPT_NOBODY, 1L); /* No Body, not nobody! */ - curl_easy_setopt (h->c, CURLOPT_HEADERFUNCTION, header_cb); - curl_easy_setopt (h->c, CURLOPT_HEADERDATA, h); - r = curl_easy_perform (h->c); + if (do_scripts (h->ch) == -1) goto err; + h->ch->accept_range = false; + curl_easy_setopt (h->ch->c, CURLOPT_NOBODY, 1L); /* No Body, not nobody! */ + curl_easy_setopt (h->ch->c, CURLOPT_HEADERFUNCTION, header_cb); + curl_easy_setopt (h->ch->c, CURLOPT_HEADERDATA, h->ch); + r = curl_easy_perform (h->ch->c); if (r != CURLE_OK) { - display_curl_error (h, r, + display_curl_error (h->ch, r, "problem doing HEAD request to fetch size of URL [%s]", url); goto err; } #ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T - r = curl_easy_getinfo (h->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &o); + r = curl_easy_getinfo (h->ch->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &o); if (r != CURLE_OK) { - display_curl_error (h, r, "could not get length of remote file [%s]", url); + display_curl_error (h->ch, r, + "could not get length of remote file [%s]", url); goto err; } @@ -634,11 +642,12 @@ curl_open (int readonly) goto err; } - h->exportsize = o; + h->ch->exportsize = o; #else - r = curl_easy_getinfo (h->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d); + r = curl_easy_getinfo (h->ch->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d); if (r != CURLE_OK) { - display_curl_error (h, r, "could not get length of remote file [%s]", url); + display_curl_error (h->ch, r, + "could not get length of remote file [%s]", url); goto err; } @@ -648,13 +657,13 @@ curl_open (int readonly) goto err; } - h->exportsize = d; + h->ch->exportsize = d; #endif - nbdkit_debug ("content length: %" PRIi64, h->exportsize); + nbdkit_debug ("content length: %" PRIi64, h->ch->exportsize); if (ascii_strncasecmp (url, "http://", strlen ("http://")) == 0 || ascii_strncasecmp (url, "https://", strlen ("https://")) == 0) { - if (!h->accept_range) { + if (!h->ch->accept_range) { nbdkit_error ("server does not support 'range' (byte range) requests"); goto err; } @@ -663,20 +672,21 @@ curl_open (int readonly) } /* Get set up for reading and writing. */ - curl_easy_setopt (h->c, CURLOPT_HEADERFUNCTION, NULL); - curl_easy_setopt (h->c, CURLOPT_HEADERDATA, NULL); - curl_easy_setopt (h->c, CURLOPT_WRITEFUNCTION, write_cb); - curl_easy_setopt (h->c, CURLOPT_WRITEDATA, h); + curl_easy_setopt (h->ch->c, CURLOPT_HEADERFUNCTION, NULL); + curl_easy_setopt (h->ch->c, CURLOPT_HEADERDATA, NULL); + curl_easy_setopt (h->ch->c, CURLOPT_WRITEFUNCTION, write_cb); + curl_easy_setopt (h->ch->c, CURLOPT_WRITEDATA, h->ch); if (!readonly) { - curl_easy_setopt (h->c, CURLOPT_READFUNCTION, read_cb); - curl_easy_setopt (h->c, CURLOPT_READDATA, h); + curl_easy_setopt (h->ch->c, CURLOPT_READFUNCTION, read_cb); + curl_easy_setopt (h->ch->c, CURLOPT_READDATA, h->ch); } return h; err: - if (h->c) - curl_easy_cleanup (h->c); + if (h->ch->c) + curl_easy_cleanup (h->ch->c); + free (h->ch); free (h); return NULL; } @@ -729,7 +739,7 @@ debug_cb (CURL *handle, curl_infotype type, static size_t header_cb (void *ptr, size_t size, size_t nmemb, void *opaque) { - struct handle *h = opaque; + struct curl_handle *ch = opaque; size_t realsize = size * nmemb; const char *header = ptr; const char *end = header + realsize; @@ -752,7 +762,7 @@ header_cb (void *ptr, size_t size, size_t nmemb, void *opaque) p++; if (p == end || !*p) - h->accept_range = true; + ch->accept_range = true; } } @@ -765,9 +775,10 @@ curl_close (void *handle) { struct handle *h = handle; - curl_easy_cleanup (h->c); - if (h->headers_copy) - curl_slist_free_all (h->headers_copy); + curl_easy_cleanup (h->ch->c); + if (h->ch->headers_copy) + curl_slist_free_all (h->ch->headers_copy); + free (h->ch); free (h); } @@ -779,7 +790,7 @@ curl_get_size (void *handle) { struct handle *h = handle; - return h->exportsize; + return h->ch->exportsize; } /* Multi-conn is safe for read-only connections, but HTTP does not @@ -811,25 +822,25 @@ curl_pread (void *handle, void *buf, uint32_t count, uint64_t offset) char range[128]; /* Run the scripts if necessary and set headers in the handle. */ - if (do_scripts (h) == -1) return -1; + if (do_scripts (h->ch) == -1) return -1; /* Tell the write_cb where we want the data to be written. write_cb * will update this if the data comes in multiple sections. */ - h->write_buf = buf; - h->write_count = count; + h->ch->write_buf = buf; + h->ch->write_count = count; - curl_easy_setopt (h->c, CURLOPT_HTTPGET, 1L); + curl_easy_setopt (h->ch->c, CURLOPT_HTTPGET, 1L); /* Make an HTTP range request. */ snprintf (range, sizeof range, "%" PRIu64 "-%" PRIu64, offset, offset + count); - curl_easy_setopt (h->c, CURLOPT_RANGE, range); + curl_easy_setopt (h->ch->c, CURLOPT_RANGE, range); /* The assumption here is that curl will look after timeouts. */ - r = curl_easy_perform (h->c); + r = curl_easy_perform (h->ch->c); if (r != CURLE_OK) { - display_curl_error (h, r, "pread: curl_easy_perform"); + display_curl_error (h->ch, r, "pread: curl_easy_perform"); return -1; } @@ -838,7 +849,7 @@ curl_pread (void *handle, void *buf, uint32_t count, uint64_t offset) */ /* As far as I understand the cURL API, this should never happen. */ - assert (h->write_count == 0); + assert (h->ch->write_count == 0); return 0; } @@ -846,22 +857,22 @@ curl_pread (void *handle, void *buf, uint32_t count, uint64_t offset) static size_t write_cb (char *ptr, size_t size, size_t nmemb, void *opaque) { - struct handle *h = opaque; + struct curl_handle *ch = opaque; size_t orig_realsize = size * nmemb; size_t realsize = orig_realsize; - assert (h->write_buf); + assert (ch->write_buf); /* Don't read more than the requested amount of data, even if the * server or libcurl sends more. */ - if (realsize > h->write_count) - realsize = h->write_count; + if (realsize > ch->write_count) + realsize = ch->write_count; - memcpy (h->write_buf, ptr, realsize); + memcpy (ch->write_buf, ptr, realsize); - h->write_count -= realsize; - h->write_buf += realsize; + ch->write_count -= realsize; + ch->write_buf += realsize; return orig_realsize; } @@ -875,25 +886,25 @@ curl_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset) char range[128]; /* Run the scripts if necessary and set headers in the handle. */ - if (do_scripts (h) == -1) return -1; + if (do_scripts (h->ch) == -1) return -1; /* Tell the read_cb where we want the data to be read from. read_cb * will update this if the data comes in multiple sections. */ - h->read_buf = buf; - h->read_count = count; + h->ch->read_buf = buf; + h->ch->read_count = count; - curl_easy_setopt (h->c, CURLOPT_UPLOAD, 1L); + curl_easy_setopt (h->ch->c, CURLOPT_UPLOAD, 1L); /* Make an HTTP range request. */ snprintf (range, sizeof range, "%" PRIu64 "-%" PRIu64, offset, offset + count); - curl_easy_setopt (h->c, CURLOPT_RANGE, range); + curl_easy_setopt (h->ch->c, CURLOPT_RANGE, range); /* The assumption here is that curl will look after timeouts. */ - r = curl_easy_perform (h->c); + r = curl_easy_perform (h->ch->c); if (r != CURLE_OK) { - display_curl_error (h, r, "pwrite: curl_easy_perform"); + display_curl_error (h->ch, r, "pwrite: curl_easy_perform"); return -1; } @@ -902,7 +913,7 @@ curl_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset) */ /* As far as I understand the cURL API, this should never happen. */ - assert (h->read_count == 0); + assert (h->ch->read_count == 0); return 0; } @@ -910,17 +921,17 @@ curl_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset) static size_t read_cb (void *ptr, size_t size, size_t nmemb, void *opaque) { - struct handle *h = opaque; + struct curl_handle *ch = opaque; size_t realsize = size * nmemb; - assert (h->read_buf); - if (realsize > h->read_count) - realsize = h->read_count; + assert (ch->read_buf); + if (realsize > ch->read_count) + realsize = ch->read_count; - memcpy (ptr, h->read_buf, realsize); + memcpy (ptr, ch->read_buf, realsize); - h->read_count -= realsize; - h->read_buf += realsize; + ch->read_count -= realsize; + ch->read_buf += realsize; return realsize; } diff --git a/plugins/curl/scripts.c b/plugins/curl/scripts.c index 892f7da28..9b674fc83 100644 --- a/plugins/curl/scripts.c +++ b/plugins/curl/scripts.c @@ -1,5 +1,5 @@ /* nbdkit - * Copyright (C) 2014-2020 Red Hat Inc. + * Copyright (C) 2014-2023 Red Hat Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -80,8 +80,8 @@ scripts_unload (void) free (cookies_from_script); } -static int run_header_script (struct handle *); -static int run_cookie_script (struct handle *); +static int run_header_script (struct curl_handle *); +static int run_cookie_script (struct curl_handle *); static void error_from_tmpfile (const char *what, const char *tmpfile); /* This is called from any thread just before we make a curl request. @@ -90,7 +90,7 @@ static void error_from_tmpfile (const char *what, const char *tmpfile); * we can be assured of exclusive access to handle here. */ int -do_scripts (struct handle *h) +do_scripts (struct curl_handle *ch) { time_t now; struct curl_slist *p; @@ -108,7 +108,7 @@ do_scripts (struct handle *h) time (&now); if (!header_script_has_run || (header_script_renew > 0 && now - header_last >= header_script_renew)) { - if (run_header_script (h) == -1) + if (run_header_script (ch) == -1) return -1; header_last = now; header_script_has_run = true; @@ -120,7 +120,7 @@ do_scripts (struct handle *h) time (&now); if (!cookie_script_has_run || (cookie_script_renew > 0 && now - cookie_last >= cookie_script_renew)) { - if (run_cookie_script (h) == -1) + if (run_cookie_script (ch) == -1) return -1; cookie_last = now; cookie_script_has_run = true; @@ -133,28 +133,28 @@ do_scripts (struct handle *h) * because unfortunately curl doesn't take a copy. Since we don't * know which other threads might be using it, we must make a copy * of the global list (headers_from_script) per handle - * (h->headers_copy). For CURLOPT_COOKIE, curl internally takes a + * (ch->headers_copy). For CURLOPT_COOKIE, curl internally takes a * copy so we don't need to do this. */ - if (h->headers_copy) { - curl_easy_setopt (h->c, CURLOPT_HTTPHEADER, NULL); - curl_slist_free_all (h->headers_copy); - h->headers_copy = NULL; + if (ch->headers_copy) { + curl_easy_setopt (ch->c, CURLOPT_HTTPHEADER, NULL); + curl_slist_free_all (ch->headers_copy); + ch->headers_copy = NULL; } for (p = headers_from_script; p != NULL; p = p->next) { if (curl_debug_scripts) nbdkit_debug ("header-script: setting header %s", p->data); - h->headers_copy = curl_slist_append (h->headers_copy, p->data); - if (h->headers_copy == NULL) { + ch->headers_copy = curl_slist_append (ch->headers_copy, p->data); + if (ch->headers_copy == NULL) { nbdkit_error ("curl_slist_append: %m"); return -1; } } - curl_easy_setopt (h->c, CURLOPT_HTTPHEADER, h->headers_copy); + curl_easy_setopt (ch->c, CURLOPT_HTTPHEADER, ch->headers_copy); if (curl_debug_scripts && cookies_from_script) nbdkit_debug ("cookie-script: setting cookie %s", cookies_from_script); - curl_easy_setopt (h->c, CURLOPT_COOKIE, cookies_from_script); + curl_easy_setopt (ch->c, CURLOPT_COOKIE, cookies_from_script); return 0; } @@ -163,7 +163,7 @@ do_scripts (struct handle *h) * header-script. */ static int -run_header_script (struct handle *h) +run_header_script (struct curl_handle *ch) { int fd; char tmpfile[] = "/tmp/errorsXXXXXX"; @@ -243,7 +243,7 @@ run_header_script (struct handle *h) * cookie-script. */ static int -run_cookie_script (struct handle *h) +run_cookie_script (struct curl_handle *ch) { int fd; char tmpfile[] = "/tmp/errorsXXXXXX"; -- 2.39.0
Richard W.M. Jones
2023-Feb-04 12:34 UTC
[Libguestfs] [PATCH nbdkit 3/6] curl: Fix declarations of globals in "curldefs.h"
Tidy up previous work on this plugin so that every global is declared in "curldefs.h". Move the HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T macro to this header file too. --- plugins/curl/curldefs.h | 16 ++++++++++++++++ plugins/curl/curl.c | 11 +---------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/plugins/curl/curldefs.h b/plugins/curl/curldefs.h index fc377d1d3..36f5fd5bb 100644 --- a/plugins/curl/curldefs.h +++ b/plugins/curl/curldefs.h @@ -33,8 +33,19 @@ #ifndef NBDKIT_CURLDEFS_H #define NBDKIT_CURLDEFS_H +#include <stdbool.h> + #include "windows-compat.h" +/* Macro CURL_AT_LEAST_VERSION was added in 2015 (Curl 7.43) so if the + * macro isn't present then Curl is very old. + */ +#ifdef CURL_AT_LEAST_VERSION +#if CURL_AT_LEAST_VERSION(7, 55, 0) +#define HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T +#endif +#endif + extern const char *url; extern const char *cainfo; @@ -58,6 +69,9 @@ extern const char *proxy; extern char *proxy_password; extern const char *proxy_user; extern bool sslverify; +extern const char *ssl_cipher_list; +extern const char *ssl_version; +extern const char *tls13_ciphers; extern bool tcp_keepalive; extern bool tcp_nodelay; extern uint32_t timeout; @@ -65,6 +79,8 @@ extern const char *unix_socket_path; extern const char *user; extern const char *user_agent; +extern int curl_debug_verbose; + /* The per-connection handle. */ struct handle { int readonly; diff --git a/plugins/curl/curl.c b/plugins/curl/curl.c index 2c2620bfe..86d953432 100644 --- a/plugins/curl/curl.c +++ b/plugins/curl/curl.c @@ -54,15 +54,6 @@ #include "curldefs.h" -/* Macro CURL_AT_LEAST_VERSION was added in 2015 (Curl 7.43) so if the - * macro isn't present then Curl is very old. - */ -#ifdef CURL_AT_LEAST_VERSION -#if CURL_AT_LEAST_VERSION(7, 55, 0) -#define HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T -#endif -#endif - /* Plugin configuration. */ const char *url = NULL; /* required */ @@ -87,8 +78,8 @@ const char *proxy = NULL; char *proxy_password = NULL; const char *proxy_user = NULL; bool sslverify = true; -const char *ssl_version = NULL; const char *ssl_cipher_list = NULL; +const char *ssl_version = NULL; const char *tls13_ciphers = NULL; bool tcp_keepalive = false; bool tcp_nodelay = true; -- 2.39.0
Richard W.M. Jones
2023-Feb-04 12:34 UTC
[Libguestfs] [PATCH nbdkit 4/6] curl: Introduce concept of getting/putting handles from a common pool
Introduce the concept of a pool of curl handles from which the main code has to take a handle (get_handle()), and later return it to the pool (put_handle()). When a handle is taken from the pool it can be used exclusively by the main code until it is returned. The "pool" only has at most one handle, so this is actually a pessimization from the situation before because all NBD connections share a single curl handle. In a future commit we will add support for multiple handles in the pool. A note about the change to tests/test-retry-request-mirror.c: This test depended implicitly on a new NBD connection opening a new libcurl handle (this no longer happens), and when opening a new libcurl handle that causes a HEAD request to be made. We have to adjust the test to remove this assumption. --- plugins/curl/Makefile.am | 5 +- plugins/curl/curldefs.h | 5 + plugins/curl/curl.c | 430 +++----------------------- plugins/curl/pool.c | 482 ++++++++++++++++++++++++++++++ tests/test-retry-request-mirror.c | 9 - 5 files changed, 534 insertions(+), 397 deletions(-) diff --git a/plugins/curl/Makefile.am b/plugins/curl/Makefile.am index f08923e77..d6b97a821 100644 --- a/plugins/curl/Makefile.am +++ b/plugins/curl/Makefile.am @@ -1,5 +1,5 @@ # nbdkit -# Copyright (C) 2014-2022 Red Hat Inc. +# Copyright (C) 2014-2023 Red Hat Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are @@ -39,8 +39,9 @@ plugin_LTLIBRARIES = nbdkit-curl-plugin.la nbdkit_curl_plugin_la_SOURCES = \ curldefs.h \ - scripts.c \ curl.c \ + pool.c \ + scripts.c \ $(top_srcdir)/include/nbdkit-plugin.h \ $(NULL) diff --git a/plugins/curl/curldefs.h b/plugins/curl/curldefs.h index 36f5fd5bb..fabd1e537 100644 --- a/plugins/curl/curldefs.h +++ b/plugins/curl/curldefs.h @@ -100,6 +100,11 @@ struct curl_handle { struct curl_slist *headers_copy; }; +/* pool.c */ +extern struct curl_handle *get_handle (void); +extern void put_handle (struct curl_handle *ch); +extern void free_all_handles (void); + /* scripts.c */ extern int do_scripts (struct curl_handle *ch); extern void scripts_unload (void); diff --git a/plugins/curl/curl.c b/plugins/curl/curl.c index 86d953432..105de29c6 100644 --- a/plugins/curl/curl.c +++ b/plugins/curl/curl.c @@ -48,8 +48,6 @@ #include <nbdkit-plugin.h> -#include "ascii-ctype.h" -#include "ascii-string.h" #include "cleanup.h" #include "curldefs.h" @@ -112,6 +110,7 @@ curl_unload (void) free (password); free (proxy_password); scripts_unload (); + free_all_handles (); curl_global_cleanup (); } @@ -433,23 +432,11 @@ curl_config_complete (void) curl_easy_strerror ((r)), (ch)->errbuf); \ } while (0) -static int debug_cb (CURL *handle, curl_infotype type, - const char *data, size_t size, void *); -static size_t header_cb (void *ptr, size_t size, size_t nmemb, void *opaque); -static size_t write_cb (char *ptr, size_t size, size_t nmemb, void *opaque); -static size_t read_cb (void *ptr, size_t size, size_t nmemb, void *opaque); - /* Create the per-connection handle. */ static void * curl_open (int readonly) { struct handle *h; - CURLcode r; -#ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T - curl_off_t o; -#else - double d; -#endif h = calloc (1, sizeof *h); if (h == NULL) { @@ -458,306 +445,7 @@ curl_open (int readonly) } h->readonly = readonly; - h->ch = calloc (1, sizeof *h->ch); - if (h->ch == NULL) { - nbdkit_error ("calloc: %m"); - free (h); - return NULL; - } - - h->ch->c = curl_easy_init (); - if (h->ch->c == NULL) { - nbdkit_error ("curl_easy_init: failed: %m"); - goto err; - } - - if (curl_debug_verbose) { - /* NB: Constants must be explicitly long because the parameter is - * varargs. - */ - curl_easy_setopt (h->ch->c, CURLOPT_VERBOSE, 1L); - curl_easy_setopt (h->ch->c, CURLOPT_DEBUGFUNCTION, debug_cb); - } - - curl_easy_setopt (h->ch->c, CURLOPT_ERRORBUFFER, h->ch->errbuf); - - r = CURLE_OK; - if (unix_socket_path) { -#if HAVE_CURLOPT_UNIX_SOCKET_PATH - r = curl_easy_setopt (h->ch->c, CURLOPT_UNIX_SOCKET_PATH, unix_socket_path); -#else - r = CURLE_UNKNOWN_OPTION; -#endif - } - if (r != CURLE_OK) { - display_curl_error (h->ch, r, "curl_easy_setopt: CURLOPT_UNIX_SOCKET_PATH"); - goto err; - } - - /* Set the URL. */ - r = curl_easy_setopt (h->ch->c, CURLOPT_URL, url); - if (r != CURLE_OK) { - display_curl_error (h->ch, r, "curl_easy_setopt: CURLOPT_URL [%s]", url); - goto err; - } - - /* Various options we always set. - * - * NB: Both here and below constants must be explicitly long because - * the parameter is varargs. - */ - curl_easy_setopt (h->ch->c, CURLOPT_AUTOREFERER, 1L); - if (followlocation) - curl_easy_setopt (h->ch->c, CURLOPT_FOLLOWLOCATION, 1L); - curl_easy_setopt (h->ch->c, CURLOPT_FAILONERROR, 1L); - - /* Options. */ - if (cainfo) { - if (strlen (cainfo) == 0) - curl_easy_setopt (h->ch->c, CURLOPT_CAINFO, NULL); - else - curl_easy_setopt (h->ch->c, CURLOPT_CAINFO, cainfo); - } - if (capath) - curl_easy_setopt (h->ch->c, CURLOPT_CAPATH, capath); - if (cookie) - curl_easy_setopt (h->ch->c, CURLOPT_COOKIE, cookie); - if (cookiefile) - curl_easy_setopt (h->ch->c, CURLOPT_COOKIEFILE, cookiefile); - if (cookiejar) - curl_easy_setopt (h->ch->c, CURLOPT_COOKIEJAR, cookiejar); - if (headers) - curl_easy_setopt (h->ch->c, CURLOPT_HTTPHEADER, headers); - if (password) - curl_easy_setopt (h->ch->c, CURLOPT_PASSWORD, password); -#ifndef HAVE_CURLOPT_PROTOCOLS_STR - if (protocols != CURLPROTO_ALL) { - curl_easy_setopt (h->ch->c, CURLOPT_PROTOCOLS, protocols); - curl_easy_setopt (h->ch->c, CURLOPT_REDIR_PROTOCOLS, protocols); - } -#else /* HAVE_CURLOPT_PROTOCOLS_STR (new in 7.85.0) */ - if (protocols) { - curl_easy_setopt (h->ch->c, CURLOPT_PROTOCOLS_STR, protocols); - curl_easy_setopt (h->ch->c, CURLOPT_REDIR_PROTOCOLS_STR, protocols); - } -#endif /* HAVE_CURLOPT_PROTOCOLS_STR */ - if (proxy) - curl_easy_setopt (h->ch->c, CURLOPT_PROXY, proxy); - if (proxy_password) - curl_easy_setopt (h->ch->c, CURLOPT_PROXYPASSWORD, proxy_password); - if (proxy_user) - curl_easy_setopt (h->ch->c, CURLOPT_PROXYUSERNAME, proxy_user); - if (!sslverify) { - curl_easy_setopt (h->ch->c, CURLOPT_SSL_VERIFYPEER, 0L); - curl_easy_setopt (h->ch->c, CURLOPT_SSL_VERIFYHOST, 0L); - } - if (ssl_version) { - if (strcmp (ssl_version, "tlsv1") == 0) - curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1); - else if (strcmp (ssl_version, "sslv2") == 0) - curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_SSLv2); - else if (strcmp (ssl_version, "sslv3") == 0) - curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_SSLv3); - else if (strcmp (ssl_version, "tlsv1.0") == 0) - curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_0); - else if (strcmp (ssl_version, "tlsv1.1") == 0) - curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_1); - else if (strcmp (ssl_version, "tlsv1.2") == 0) - curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2); - else if (strcmp (ssl_version, "tlsv1.3") == 0) - curl_easy_setopt (h->ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_3); - else { - display_curl_error (h->ch, r, "curl_easy_setopt: CURLOPT_SSLVERSION [%s]", - ssl_version); - goto err; - } - - } - if (ssl_cipher_list) - curl_easy_setopt (h->ch->c, CURLOPT_SSL_CIPHER_LIST, ssl_cipher_list); - if (tls13_ciphers) { -#if (LIBCURL_VERSION_MAJOR > 7) || \ - (LIBCURL_VERSION_MAJOR == 7 && LIBCURL_VERSION_MINOR >= 61) - curl_easy_setopt (h->ch->c, CURLOPT_TLS13_CIPHERS, tls13_ciphers); -#else - /* This is not available before curl-7.61 */ - nbdkit_error ("tls13-ciphers is not supported in this build of " - "nbdkit-curl-plugin"); - goto err; -#endif - } - if (tcp_keepalive) - curl_easy_setopt (h->ch->c, CURLOPT_TCP_KEEPALIVE, 1L); - if (!tcp_nodelay) - curl_easy_setopt (h->ch->c, CURLOPT_TCP_NODELAY, 0L); - if (timeout > 0) - /* NB: The cast is required here because the parameter is varargs - * treated as long, and not type safe. - */ - curl_easy_setopt (h->ch->c, CURLOPT_TIMEOUT, (long) timeout); - if (user) - curl_easy_setopt (h->ch->c, CURLOPT_USERNAME, user); - if (user_agent) - curl_easy_setopt (h->ch->c, CURLOPT_USERAGENT, user_agent); - - /* Get the file size and also whether the remote HTTP server - * supports byte ranges. - * - * We must run the scripts if necessary and set headers in the - * handle. - */ - if (do_scripts (h->ch) == -1) goto err; - h->ch->accept_range = false; - curl_easy_setopt (h->ch->c, CURLOPT_NOBODY, 1L); /* No Body, not nobody! */ - curl_easy_setopt (h->ch->c, CURLOPT_HEADERFUNCTION, header_cb); - curl_easy_setopt (h->ch->c, CURLOPT_HEADERDATA, h->ch); - r = curl_easy_perform (h->ch->c); - if (r != CURLE_OK) { - display_curl_error (h->ch, r, - "problem doing HEAD request to fetch size of URL [%s]", - url); - goto err; - } - -#ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T - r = curl_easy_getinfo (h->ch->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &o); - if (r != CURLE_OK) { - display_curl_error (h->ch, r, - "could not get length of remote file [%s]", url); - goto err; - } - - if (o == -1) { - nbdkit_error ("could not get length of remote file [%s], " - "is the URL correct?", url); - goto err; - } - - h->ch->exportsize = o; -#else - r = curl_easy_getinfo (h->ch->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d); - if (r != CURLE_OK) { - display_curl_error (h->ch, r, - "could not get length of remote file [%s]", url); - goto err; - } - - if (d == -1) { - nbdkit_error ("could not get length of remote file [%s], " - "is the URL correct?", url); - goto err; - } - - h->ch->exportsize = d; -#endif - nbdkit_debug ("content length: %" PRIi64, h->ch->exportsize); - - if (ascii_strncasecmp (url, "http://", strlen ("http://")) == 0 || - ascii_strncasecmp (url, "https://", strlen ("https://")) == 0) { - if (!h->ch->accept_range) { - nbdkit_error ("server does not support 'range' (byte range) requests"); - goto err; - } - - nbdkit_debug ("accept range supported (for HTTP/HTTPS)"); - } - - /* Get set up for reading and writing. */ - curl_easy_setopt (h->ch->c, CURLOPT_HEADERFUNCTION, NULL); - curl_easy_setopt (h->ch->c, CURLOPT_HEADERDATA, NULL); - curl_easy_setopt (h->ch->c, CURLOPT_WRITEFUNCTION, write_cb); - curl_easy_setopt (h->ch->c, CURLOPT_WRITEDATA, h->ch); - if (!readonly) { - curl_easy_setopt (h->ch->c, CURLOPT_READFUNCTION, read_cb); - curl_easy_setopt (h->ch->c, CURLOPT_READDATA, h->ch); - } - return h; - - err: - if (h->ch->c) - curl_easy_cleanup (h->ch->c); - free (h->ch); - free (h); - return NULL; -} - -/* When using CURLOPT_VERBOSE, this callback is used to redirect - * messages to nbdkit_debug (instead of stderr). - */ -static int -debug_cb (CURL *handle, curl_infotype type, - const char *data, size_t size, void *opaque) -{ - size_t origsize = size; - CLEANUP_FREE char *str; - - /* The data parameter passed is NOT \0-terminated, but also it may - * have \n or \r\n line endings. The only sane way to deal with - * this is to copy the string. (The data strings may also be - * multi-line, but we don't deal with that here). - */ - str = malloc (size + 1); - if (str == NULL) - goto out; - memcpy (str, data, size); - str[size] = '\0'; - - while (size > 0 && (str[size-1] == '\n' || str[size-1] == '\r')) { - str[size-1] = '\0'; - size--; - } - - switch (type) { - case CURLINFO_TEXT: - nbdkit_debug ("%s", str); - break; - case CURLINFO_HEADER_IN: - nbdkit_debug ("S: %s", str); - break; - case CURLINFO_HEADER_OUT: - nbdkit_debug ("C: %s", str); - break; - default: - /* Assume everything else is binary data that we cannot print. */ - nbdkit_debug ("<data with size=%zu>", origsize); - } - - out: - return 0; -} - -static size_t -header_cb (void *ptr, size_t size, size_t nmemb, void *opaque) -{ - struct curl_handle *ch = opaque; - size_t realsize = size * nmemb; - const char *header = ptr; - const char *end = header + realsize; - const char *accept_ranges = "accept-ranges:"; - const char *bytes = "bytes"; - - if (realsize >= strlen (accept_ranges) && - ascii_strncasecmp (header, accept_ranges, strlen (accept_ranges)) == 0) { - const char *p = strchr (header, ':') + 1; - - /* Skip whitespace between the header name and value. */ - while (p < end && *p && ascii_isspace (*p)) - p++; - - if (end - p >= strlen (bytes) - && strncmp (p, bytes, strlen (bytes)) == 0) { - /* Check that there is nothing but whitespace after the value. */ - p += strlen (bytes); - while (p < end && *p && ascii_isspace (*p)) - p++; - - if (p == end || !*p) - ch->accept_range = true; - } - } - - return realsize; } /* Free up the per-connection handle. */ @@ -766,22 +454,35 @@ curl_close (void *handle) { struct handle *h = handle; - curl_easy_cleanup (h->ch->c); - if (h->ch->headers_copy) - curl_slist_free_all (h->ch->headers_copy); - free (h->ch); free (h); } #define THREAD_MODEL NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS +/* Calls get_handle() ... put_handle() to get a handle for the length + * of the current scope. + */ +#define GET_HANDLE_FOR_CURRENT_SCOPE(ch) \ + CLEANUP_PUT_HANDLE struct curl_handle *ch = get_handle (); +#define CLEANUP_PUT_HANDLE __attribute__((cleanup (cleanup_put_handle))) +static void +cleanup_put_handle (void *chp) +{ + struct curl_handle *ch = * (struct curl_handle **) chp; + + if (ch != NULL) + put_handle (ch); +} + /* Get the file size. */ static int64_t curl_get_size (void *handle) { - struct handle *h = handle; + GET_HANDLE_FOR_CURRENT_SCOPE (ch); + if (ch == NULL) + return -1; - return h->ch->exportsize; + return ch->exportsize; } /* Multi-conn is safe for read-only connections, but HTTP does not @@ -796,42 +497,37 @@ curl_can_multi_conn (void *handle) return !! h->readonly; } -/* NB: The terminology used by libcurl is confusing! - * - * WRITEFUNCTION / write_cb is used when reading from the remote server - * READFUNCTION / read_cb is used when writing to the remote server. - * - * We use the same terminology as libcurl here. - */ - /* Read data from the remote server. */ static int curl_pread (void *handle, void *buf, uint32_t count, uint64_t offset) { - struct handle *h = handle; CURLcode r; char range[128]; + GET_HANDLE_FOR_CURRENT_SCOPE (ch); + if (ch == NULL) + return -1; + /* Run the scripts if necessary and set headers in the handle. */ - if (do_scripts (h->ch) == -1) return -1; + if (do_scripts (ch) == -1) return -1; /* Tell the write_cb where we want the data to be written. write_cb * will update this if the data comes in multiple sections. */ - h->ch->write_buf = buf; - h->ch->write_count = count; + ch->write_buf = buf; + ch->write_count = count; - curl_easy_setopt (h->ch->c, CURLOPT_HTTPGET, 1L); + curl_easy_setopt (ch->c, CURLOPT_HTTPGET, 1L); /* Make an HTTP range request. */ snprintf (range, sizeof range, "%" PRIu64 "-%" PRIu64, offset, offset + count); - curl_easy_setopt (h->ch->c, CURLOPT_RANGE, range); + curl_easy_setopt (ch->c, CURLOPT_RANGE, range); /* The assumption here is that curl will look after timeouts. */ - r = curl_easy_perform (h->ch->c); + r = curl_easy_perform (ch->c); if (r != CURLE_OK) { - display_curl_error (h->ch, r, "pread: curl_easy_perform"); + display_curl_error (ch, r, "pread: curl_easy_perform"); return -1; } @@ -840,62 +536,42 @@ curl_pread (void *handle, void *buf, uint32_t count, uint64_t offset) */ /* As far as I understand the cURL API, this should never happen. */ - assert (h->ch->write_count == 0); + assert (ch->write_count == 0); return 0; } -static size_t -write_cb (char *ptr, size_t size, size_t nmemb, void *opaque) -{ - struct curl_handle *ch = opaque; - size_t orig_realsize = size * nmemb; - size_t realsize = orig_realsize; - - assert (ch->write_buf); - - /* Don't read more than the requested amount of data, even if the - * server or libcurl sends more. - */ - if (realsize > ch->write_count) - realsize = ch->write_count; - - memcpy (ch->write_buf, ptr, realsize); - - ch->write_count -= realsize; - ch->write_buf += realsize; - - return orig_realsize; -} - /* Write data to the remote server. */ static int curl_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset) { - struct handle *h = handle; CURLcode r; char range[128]; + GET_HANDLE_FOR_CURRENT_SCOPE (ch); + if (ch == NULL) + return -1; + /* Run the scripts if necessary and set headers in the handle. */ - if (do_scripts (h->ch) == -1) return -1; + if (do_scripts (ch) == -1) return -1; /* Tell the read_cb where we want the data to be read from. read_cb * will update this if the data comes in multiple sections. */ - h->ch->read_buf = buf; - h->ch->read_count = count; + ch->read_buf = buf; + ch->read_count = count; - curl_easy_setopt (h->ch->c, CURLOPT_UPLOAD, 1L); + curl_easy_setopt (ch->c, CURLOPT_UPLOAD, 1L); /* Make an HTTP range request. */ snprintf (range, sizeof range, "%" PRIu64 "-%" PRIu64, offset, offset + count); - curl_easy_setopt (h->ch->c, CURLOPT_RANGE, range); + curl_easy_setopt (ch->c, CURLOPT_RANGE, range); /* The assumption here is that curl will look after timeouts. */ - r = curl_easy_perform (h->ch->c); + r = curl_easy_perform (ch->c); if (r != CURLE_OK) { - display_curl_error (h->ch, r, "pwrite: curl_easy_perform"); + display_curl_error (ch, r, "pwrite: curl_easy_perform"); return -1; } @@ -904,29 +580,11 @@ curl_pwrite (void *handle, const void *buf, uint32_t count, uint64_t offset) */ /* As far as I understand the cURL API, this should never happen. */ - assert (h->ch->read_count == 0); + assert (ch->read_count == 0); return 0; } -static size_t -read_cb (void *ptr, size_t size, size_t nmemb, void *opaque) -{ - struct curl_handle *ch = opaque; - size_t realsize = size * nmemb; - - assert (ch->read_buf); - if (realsize > ch->read_count) - realsize = ch->read_count; - - memcpy (ptr, ch->read_buf, realsize); - - ch->read_count -= realsize; - ch->read_buf += realsize; - - return realsize; -} - static struct nbdkit_plugin plugin = { .name = "curl", .version = PACKAGE_VERSION, diff --git a/plugins/curl/pool.c b/plugins/curl/pool.c new file mode 100644 index 000000000..8fd1dc147 --- /dev/null +++ b/plugins/curl/pool.c @@ -0,0 +1,482 @@ +/* nbdkit + * Copyright (C) 2014-2023 Red Hat Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of Red Hat nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* Curl handle pool. + * + * To get a libcurl handle, call get_handle(). When you hold the + * handle, it is yours exclusively to use. After you have finished + * with the handle, put it back into the pool by calling put_handle(). + */ + +#include <config.h> + +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <stdint.h> +#include <inttypes.h> +#include <string.h> +#include <assert.h> +#include <pthread.h> + +#include <curl/curl.h> + +#include <nbdkit-plugin.h> + +#include "ascii-ctype.h" +#include "ascii-string.h" +#include "cleanup.h" + +#include "curldefs.h" + +/* Translate CURLcode to nbdkit_error. */ +#define display_curl_error(ch, r, fs, ...) \ + do { \ + nbdkit_error ((fs ": %s: %s"), ## __VA_ARGS__, \ + curl_easy_strerror ((r)), (ch)->errbuf); \ + } while (0) + +static struct curl_handle *allocate_handle (void); +static int debug_cb (CURL *handle, curl_infotype type, + const char *data, size_t size, void *); +static size_t header_cb (void *ptr, size_t size, size_t nmemb, void *opaque); +static size_t write_cb (char *ptr, size_t size, size_t nmemb, void *opaque); +static size_t read_cb (void *ptr, size_t size, size_t nmemb, void *opaque); + +/* In the current implementation there is only one handle. This lock + * prevents it from being used multiple times. + */ +static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; + +/* The single curl handle. NULL means not yet allocated. */ +static struct curl_handle *the_ch; + +/* Close and free all handles in the pool. */ +void +free_all_handles (void) +{ + if (the_ch) { + curl_easy_cleanup (the_ch->c); + if (the_ch->headers_copy) + curl_slist_free_all (the_ch->headers_copy); + free (the_ch); + } +} + +/* Get a handle from the pool. + * + * It is owned exclusively by the caller until they call put_handle. + */ +struct curl_handle * +get_handle (void) +{ + int r; + + r = pthread_mutex_lock (&lock); + assert (r == 0); + if (!the_ch) { + the_ch = allocate_handle (); + if (!the_ch) { + pthread_mutex_unlock (&lock); + return NULL; + } + } + return the_ch; +} + +/* Return the handle to the pool. */ +void +put_handle (struct curl_handle *ch) +{ + pthread_mutex_unlock (&lock); +} + +/* Allocate and initialize a new libcurl handle. */ +static struct curl_handle * +allocate_handle (void) +{ + struct curl_handle *ch; + CURLcode r; +#ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T + curl_off_t o; +#else + double d; +#endif + + ch = calloc (1, sizeof *ch); + if (ch == NULL) { + nbdkit_error ("calloc: %m"); + free (ch); + return NULL; + } + + ch->c = curl_easy_init (); + if (ch->c == NULL) { + nbdkit_error ("curl_easy_init: failed: %m"); + goto err; + } + + if (curl_debug_verbose) { + /* NB: Constants must be explicitly long because the parameter is + * varargs. + */ + curl_easy_setopt (ch->c, CURLOPT_VERBOSE, 1L); + curl_easy_setopt (ch->c, CURLOPT_DEBUGFUNCTION, debug_cb); + } + + curl_easy_setopt (ch->c, CURLOPT_ERRORBUFFER, ch->errbuf); + + r = CURLE_OK; + if (unix_socket_path) { +#if HAVE_CURLOPT_UNIX_SOCKET_PATH + r = curl_easy_setopt (ch->c, CURLOPT_UNIX_SOCKET_PATH, unix_socket_path); +#else + r = CURLE_UNKNOWN_OPTION; +#endif + } + if (r != CURLE_OK) { + display_curl_error (ch, r, "curl_easy_setopt: CURLOPT_UNIX_SOCKET_PATH"); + goto err; + } + + /* Set the URL. */ + r = curl_easy_setopt (ch->c, CURLOPT_URL, url); + if (r != CURLE_OK) { + display_curl_error (ch, r, "curl_easy_setopt: CURLOPT_URL [%s]", url); + goto err; + } + + /* Various options we always set. + * + * NB: Both here and below constants must be explicitly long because + * the parameter is varargs. + */ + curl_easy_setopt (ch->c, CURLOPT_AUTOREFERER, 1L); + if (followlocation) + curl_easy_setopt (ch->c, CURLOPT_FOLLOWLOCATION, 1L); + curl_easy_setopt (ch->c, CURLOPT_FAILONERROR, 1L); + + /* Options. */ + if (cainfo) { + if (strlen (cainfo) == 0) + curl_easy_setopt (ch->c, CURLOPT_CAINFO, NULL); + else + curl_easy_setopt (ch->c, CURLOPT_CAINFO, cainfo); + } + if (capath) + curl_easy_setopt (ch->c, CURLOPT_CAPATH, capath); + if (cookie) + curl_easy_setopt (ch->c, CURLOPT_COOKIE, cookie); + if (cookiefile) + curl_easy_setopt (ch->c, CURLOPT_COOKIEFILE, cookiefile); + if (cookiejar) + curl_easy_setopt (ch->c, CURLOPT_COOKIEJAR, cookiejar); + if (headers) + curl_easy_setopt (ch->c, CURLOPT_HTTPHEADER, headers); + if (password) + curl_easy_setopt (ch->c, CURLOPT_PASSWORD, password); +#ifndef HAVE_CURLOPT_PROTOCOLS_STR + if (protocols != CURLPROTO_ALL) { + curl_easy_setopt (ch->c, CURLOPT_PROTOCOLS, protocols); + curl_easy_setopt (ch->c, CURLOPT_REDIR_PROTOCOLS, protocols); + } +#else /* HAVE_CURLOPT_PROTOCOLS_STR (new in 7.85.0) */ + if (protocols) { + curl_easy_setopt (ch->c, CURLOPT_PROTOCOLS_STR, protocols); + curl_easy_setopt (ch->c, CURLOPT_REDIR_PROTOCOLS_STR, protocols); + } +#endif /* HAVE_CURLOPT_PROTOCOLS_STR */ + if (proxy) + curl_easy_setopt (ch->c, CURLOPT_PROXY, proxy); + if (proxy_password) + curl_easy_setopt (ch->c, CURLOPT_PROXYPASSWORD, proxy_password); + if (proxy_user) + curl_easy_setopt (ch->c, CURLOPT_PROXYUSERNAME, proxy_user); + if (!sslverify) { + curl_easy_setopt (ch->c, CURLOPT_SSL_VERIFYPEER, 0L); + curl_easy_setopt (ch->c, CURLOPT_SSL_VERIFYHOST, 0L); + } + if (ssl_version) { + if (strcmp (ssl_version, "tlsv1") == 0) + curl_easy_setopt (ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1); + else if (strcmp (ssl_version, "sslv2") == 0) + curl_easy_setopt (ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_SSLv2); + else if (strcmp (ssl_version, "sslv3") == 0) + curl_easy_setopt (ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_SSLv3); + else if (strcmp (ssl_version, "tlsv1.0") == 0) + curl_easy_setopt (ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_0); + else if (strcmp (ssl_version, "tlsv1.1") == 0) + curl_easy_setopt (ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_1); + else if (strcmp (ssl_version, "tlsv1.2") == 0) + curl_easy_setopt (ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2); + else if (strcmp (ssl_version, "tlsv1.3") == 0) + curl_easy_setopt (ch->c, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_3); + else { + display_curl_error (ch, r, "curl_easy_setopt: CURLOPT_SSLVERSION [%s]", + ssl_version); + goto err; + } + + } + if (ssl_cipher_list) + curl_easy_setopt (ch->c, CURLOPT_SSL_CIPHER_LIST, ssl_cipher_list); + if (tls13_ciphers) { +#if (LIBCURL_VERSION_MAJOR > 7) || \ + (LIBCURL_VERSION_MAJOR == 7 && LIBCURL_VERSION_MINOR >= 61) + curl_easy_setopt (ch->c, CURLOPT_TLS13_CIPHERS, tls13_ciphers); +#else + /* This is not available before curl-7.61 */ + nbdkit_error ("tls13-ciphers is not supported in this build of " + "nbdkit-curl-plugin"); + goto err; +#endif + } + if (tcp_keepalive) + curl_easy_setopt (ch->c, CURLOPT_TCP_KEEPALIVE, 1L); + if (!tcp_nodelay) + curl_easy_setopt (ch->c, CURLOPT_TCP_NODELAY, 0L); + if (timeout > 0) + /* NB: The cast is required here because the parameter is varargs + * treated as long, and not type safe. + */ + curl_easy_setopt (ch->c, CURLOPT_TIMEOUT, (long) timeout); + if (user) + curl_easy_setopt (ch->c, CURLOPT_USERNAME, user); + if (user_agent) + curl_easy_setopt (ch->c, CURLOPT_USERAGENT, user_agent); + + /* Get the file size and also whether the remote HTTP server + * supports byte ranges. + * + * We must run the scripts if necessary and set headers in the + * handle. + */ + if (do_scripts (ch) == -1) goto err; + ch->accept_range = false; + curl_easy_setopt (ch->c, CURLOPT_NOBODY, 1L); /* No Body, not nobody! */ + curl_easy_setopt (ch->c, CURLOPT_HEADERFUNCTION, header_cb); + curl_easy_setopt (ch->c, CURLOPT_HEADERDATA, ch); + r = curl_easy_perform (ch->c); + if (r != CURLE_OK) { + display_curl_error (ch, r, + "problem doing HEAD request to fetch size of URL [%s]", + url); + goto err; + } + +#ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T + r = curl_easy_getinfo (ch->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &o); + if (r != CURLE_OK) { + display_curl_error (ch, r, + "could not get length of remote file [%s]", url); + goto err; + } + + if (o == -1) { + nbdkit_error ("could not get length of remote file [%s], " + "is the URL correct?", url); + goto err; + } + + ch->exportsize = o; +#else + r = curl_easy_getinfo (ch->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d); + if (r != CURLE_OK) { + display_curl_error (ch, r, + "could not get length of remote file [%s]", url); + goto err; + } + + if (d == -1) { + nbdkit_error ("could not get length of remote file [%s], " + "is the URL correct?", url); + goto err; + } + + ch->exportsize = d; +#endif + nbdkit_debug ("content length: %" PRIi64, ch->exportsize); + + if (ascii_strncasecmp (url, "http://", strlen ("http://")) == 0 || + ascii_strncasecmp (url, "https://", strlen ("https://")) == 0) { + if (!ch->accept_range) { + nbdkit_error ("server does not support 'range' (byte range) requests"); + goto err; + } + + nbdkit_debug ("accept range supported (for HTTP/HTTPS)"); + } + + /* Get set up for reading and writing. */ + curl_easy_setopt (ch->c, CURLOPT_HEADERFUNCTION, NULL); + curl_easy_setopt (ch->c, CURLOPT_HEADERDATA, NULL); + curl_easy_setopt (ch->c, CURLOPT_WRITEFUNCTION, write_cb); + curl_easy_setopt (ch->c, CURLOPT_WRITEDATA, ch); + /* These are only used if !readonly but we always register them. */ + curl_easy_setopt (ch->c, CURLOPT_READFUNCTION, read_cb); + curl_easy_setopt (ch->c, CURLOPT_READDATA, ch); + + return ch; + + err: + if (ch->c) + curl_easy_cleanup (ch->c); + free (ch); + return NULL; +} + +/* When using CURLOPT_VERBOSE, this callback is used to redirect + * messages to nbdkit_debug (instead of stderr). + */ +static int +debug_cb (CURL *handle, curl_infotype type, + const char *data, size_t size, void *opaque) +{ + size_t origsize = size; + CLEANUP_FREE char *str; + + /* The data parameter passed is NOT \0-terminated, but also it may + * have \n or \r\n line endings. The only sane way to deal with + * this is to copy the string. (The data strings may also be + * multi-line, but we don't deal with that here). + */ + str = malloc (size + 1); + if (str == NULL) + goto out; + memcpy (str, data, size); + str[size] = '\0'; + + while (size > 0 && (str[size-1] == '\n' || str[size-1] == '\r')) { + str[size-1] = '\0'; + size--; + } + + switch (type) { + case CURLINFO_TEXT: + nbdkit_debug ("%s", str); + break; + case CURLINFO_HEADER_IN: + nbdkit_debug ("S: %s", str); + break; + case CURLINFO_HEADER_OUT: + nbdkit_debug ("C: %s", str); + break; + default: + /* Assume everything else is binary data that we cannot print. */ + nbdkit_debug ("<data with size=%zu>", origsize); + } + + out: + return 0; +} + +static size_t +header_cb (void *ptr, size_t size, size_t nmemb, void *opaque) +{ + struct curl_handle *ch = opaque; + size_t realsize = size * nmemb; + const char *header = ptr; + const char *end = header + realsize; + const char *accept_ranges = "accept-ranges:"; + const char *bytes = "bytes"; + + if (realsize >= strlen (accept_ranges) && + ascii_strncasecmp (header, accept_ranges, strlen (accept_ranges)) == 0) { + const char *p = strchr (header, ':') + 1; + + /* Skip whitespace between the header name and value. */ + while (p < end && *p && ascii_isspace (*p)) + p++; + + if (end - p >= strlen (bytes) + && strncmp (p, bytes, strlen (bytes)) == 0) { + /* Check that there is nothing but whitespace after the value. */ + p += strlen (bytes); + while (p < end && *p && ascii_isspace (*p)) + p++; + + if (p == end || !*p) + ch->accept_range = true; + } + } + + return realsize; +} + +/* NB: The terminology used by libcurl is confusing! + * + * WRITEFUNCTION / write_cb is used when reading from the remote server + * READFUNCTION / read_cb is used when writing to the remote server. + * + * We use the same terminology as libcurl here. + */ + +static size_t +write_cb (char *ptr, size_t size, size_t nmemb, void *opaque) +{ + struct curl_handle *ch = opaque; + size_t orig_realsize = size * nmemb; + size_t realsize = orig_realsize; + + assert (ch->write_buf); + + /* Don't read more than the requested amount of data, even if the + * server or libcurl sends more. + */ + if (realsize > ch->write_count) + realsize = ch->write_count; + + memcpy (ch->write_buf, ptr, realsize); + + ch->write_count -= realsize; + ch->write_buf += realsize; + + return orig_realsize; +} + +static size_t +read_cb (void *ptr, size_t size, size_t nmemb, void *opaque) +{ + struct curl_handle *ch = opaque; + size_t realsize = size * nmemb; + + assert (ch->read_buf); + if (realsize > ch->read_count) + realsize = ch->read_count; + + memcpy (ptr, ch->read_buf, realsize); + + ch->read_count -= realsize; + ch->read_buf += realsize; + + return realsize; +} diff --git a/tests/test-retry-request-mirror.c b/tests/test-retry-request-mirror.c index b1eb8af02..e7157184f 100644 --- a/tests/test-retry-request-mirror.c +++ b/tests/test-retry-request-mirror.c @@ -135,15 +135,6 @@ main (int argc, char *argv[]) } nbd_close (nbd); - - /* Reconnection in the next iteration of the loop will read from - * the mirror (because the curl plugin always makes a HEAD request - * in curl_open to read the size) which will flip the state, so we - * have to do it here. - */ - state++; - if (state == 3) - state = 1; } exit (EXIT_SUCCESS); -- 2.39.0
Richard W.M. Jones
2023-Feb-04 12:34 UTC
[Libguestfs] [PATCH nbdkit 5/6] curl: Use the parallel thread model
After previous changes, it is now safe to use the parallel thread model in this plugin. The locking in pool.c protects a single curl handle from being used from multiple threads. An advantage of this is we can now combine the curl plugin with filters such as readahead and scan. --- plugins/curl/curl.c | 2 +- plugins/curl/scripts.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/plugins/curl/curl.c b/plugins/curl/curl.c index 105de29c6..c1d9df746 100644 --- a/plugins/curl/curl.c +++ b/plugins/curl/curl.c @@ -457,7 +457,7 @@ curl_close (void *handle) free (h); } -#define THREAD_MODEL NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS +#define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL /* Calls get_handle() ... put_handle() to get a handle for the length * of the current scope. diff --git a/plugins/curl/scripts.c b/plugins/curl/scripts.c index 9b674fc83..786ff7503 100644 --- a/plugins/curl/scripts.c +++ b/plugins/curl/scripts.c @@ -86,8 +86,8 @@ static void error_from_tmpfile (const char *what, const char *tmpfile); /* This is called from any thread just before we make a curl request. * - * Because the thread model is NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS - * we can be assured of exclusive access to handle here. + * Because the curl handle must be obtained through get_handle() we + * can be assured of exclusive access here. */ int do_scripts (struct curl_handle *ch) -- 2.39.0
Richard W.M. Jones
2023-Feb-04 12:34 UTC
[Libguestfs] [PATCH nbdkit 6/6] curl: Complete implementation of the curl handle pool
This commit implements the curl handle pool. By default it can grow to up to 4 curl handles, shared between all NBD connections. You can change this using the new connections=N parameter. --- plugins/curl/nbdkit-curl-plugin.pod | 7 +++ plugins/curl/curldefs.h | 2 + plugins/curl/curl.c | 11 ++++ plugins/curl/pool.c | 84 ++++++++++++++++++++++------- 4 files changed, 84 insertions(+), 20 deletions(-) diff --git a/plugins/curl/nbdkit-curl-plugin.pod b/plugins/curl/nbdkit-curl-plugin.pod index a396de027..f4cd07068 100644 --- a/plugins/curl/nbdkit-curl-plugin.pod +++ b/plugins/curl/nbdkit-curl-plugin.pod @@ -52,6 +52,13 @@ that libcurl is compiled with. Set CA certificates directory location for libcurl. See L<CURLOPT_CAPATH(3)> for more information. +=item B<connections=>N + +Open up to C<N> curl connections to the web server. The default is 4. +Curl connections are shared between all NBD clients, so you may wish +to increase this if you expect many simultaneous clients (or a single +client using many multi-conn connections). + =item B<cookie=>COOKIE =item B<cookie=+>FILENAME diff --git a/plugins/curl/curldefs.h b/plugins/curl/curldefs.h index fabd1e537..c498b09e2 100644 --- a/plugins/curl/curldefs.h +++ b/plugins/curl/curldefs.h @@ -50,6 +50,7 @@ extern const char *url; extern const char *cainfo; extern const char *capath; +extern unsigned connections; extern char *cookie; extern const char *cookiefile; extern const char *cookiejar; @@ -89,6 +90,7 @@ struct handle { /* The libcurl handle and some associated fields and buffers. */ struct curl_handle { + bool in_use; /* True if the handle is in use by a thread. */ CURL *c; bool accept_range; int64_t exportsize; diff --git a/plugins/curl/curl.c b/plugins/curl/curl.c index c1d9df746..e7c0e18da 100644 --- a/plugins/curl/curl.c +++ b/plugins/curl/curl.c @@ -57,6 +57,7 @@ const char *url = NULL; /* required */ const char *cainfo = NULL; const char *capath = NULL; +unsigned connections = 4; char *cookie = NULL; const char *cookiefile = NULL; const char *cookiejar = NULL; @@ -206,6 +207,16 @@ curl_config (const char *key, const char *value) capath = value; } + else if (strcmp (key, "connections") == 0) { + if (nbdkit_parse_unsigned ("connections", value, + &connections) == -1) + return -1; + if (connections == 0) { + nbdkit_error ("connections parameter must not be 0"); + return -1; + } + } + else if (strcmp (key, "cookie") == 0) { free (cookie); if (nbdkit_read_password (value, &cookie) == -1) diff --git a/plugins/curl/pool.c b/plugins/curl/pool.c index 8fd1dc147..c7d3e2a5f 100644 --- a/plugins/curl/pool.c +++ b/plugins/curl/pool.c @@ -55,6 +55,7 @@ #include "ascii-ctype.h" #include "ascii-string.h" #include "cleanup.h" +#include "vector.h" #include "curldefs.h" @@ -66,30 +67,40 @@ } while (0) static struct curl_handle *allocate_handle (void); +static void free_handle (struct curl_handle *); static int debug_cb (CURL *handle, curl_infotype type, const char *data, size_t size, void *); static size_t header_cb (void *ptr, size_t size, size_t nmemb, void *opaque); static size_t write_cb (char *ptr, size_t size, size_t nmemb, void *opaque); static size_t read_cb (void *ptr, size_t size, size_t nmemb, void *opaque); -/* In the current implementation there is only one handle. This lock - * prevents it from being used multiple times. - */ +/* This lock protects access to the curl_handles vector below. */ static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; -/* The single curl handle. NULL means not yet allocated. */ -static struct curl_handle *the_ch; +/* List of curl handles. This is allocated dynamically as more + * handles are requested. Currently it does not shrink. It may grow + * up to 'connections' in length. + */ +DEFINE_VECTOR_TYPE(curl_handle_list, struct curl_handle *) +static curl_handle_list curl_handles = empty_vector; + +/* The condition is used when the curl handles vector is full and + * we're waiting for a thread to put_handle. + */ +static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; /* Close and free all handles in the pool. */ void free_all_handles (void) { - if (the_ch) { - curl_easy_cleanup (the_ch->c); - if (the_ch->headers_copy) - curl_slist_free_all (the_ch->headers_copy); - free (the_ch); - } + size_t i; + + nbdkit_debug ("free_all_handles: number of curl handles allocated: %zu", + curl_handles.len); + + for (i = 0; i < curl_handles.len; ++i) + free_handle (curl_handles.ptr[i]); + curl_handle_list_reset (&curl_handles); } /* Get a handle from the pool. @@ -99,25 +110,49 @@ free_all_handles (void) struct curl_handle * get_handle (void) { - int r; + ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock); + size_t i; + struct curl_handle *ch; - r = pthread_mutex_lock (&lock); - assert (r == 0); - if (!the_ch) { - the_ch = allocate_handle (); - if (!the_ch) { - pthread_mutex_unlock (&lock); + again: + /* Look for a handle which is not in_use. */ + for (i = 0; i < curl_handles.len; ++i) { + if (! curl_handles.ptr[i]->in_use) { + curl_handles.ptr[i]->in_use = true; + return curl_handles.ptr[i]; + } + } + + /* If more connections are allowed, then allocate a new handle. */ + if (curl_handles.len < connections) { + ch = allocate_handle (); + if (ch == NULL) + return NULL; + if (curl_handle_list_append (&curl_handles, ch) == -1) { + free_handle (ch); return NULL; } + ch->in_use = true; + return ch; } - return the_ch; + + /* Otherwise we have run out of connections so we must wait until + * another thread calls put_handle. + */ + pthread_cond_wait (&cond, &lock); + goto again; } /* Return the handle to the pool. */ void put_handle (struct curl_handle *ch) { - pthread_mutex_unlock (&lock); + ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&lock); + + ch->in_use = false; + + /* Signal in case any threads are waiting. */ + pthread_cond_signal (&cond); } /* Allocate and initialize a new libcurl handle. */ @@ -480,3 +515,12 @@ read_cb (void *ptr, size_t size, size_t nmemb, void *opaque) return realsize; } + +static void +free_handle (struct curl_handle *ch) +{ + curl_easy_cleanup (ch->c); + if (ch->headers_copy) + curl_slist_free_all (ch->headers_copy); + free (ch); +} -- 2.39.0
Richard W.M. Jones
2023-Feb-05 16:35 UTC
[Libguestfs] [PATCH nbdkit 0/6] curl: Use a curl handle pool
On Sat, Feb 04, 2023 at 12:34:52PM +0000, Richard W.M. Jones wrote:> Anyway, this all seems to work, but it actually reduces performance :-( > > In particular this simple test slows down quite substantially: > > time ./nbdkit -r -U - curl file:/var/tmp/fedora-36.img --run 'nbdcopy --no-extents -p "$uri" null:' > > (where /var/tmp/fedora-36.img is a 10G file).A bit more on this ... The slowdown is most easily observable if you apply this patch series, test it (see command above), and then change just: plugin/curl/curl.c: -#define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL +#define THREAD_MODEL NBDKIT_THREAD_MODEL_SERIALIZE_REQUESTS Serialising requests dramatically, repeatably improves the performance! Here are flame graphs for the two cases: http://oirase.annexia.org/tmp/nbdkit-parallel.svg http://oirase.annexia.org/tmp/nbdkit-serialize-requests.svg These are across all cores on a 12 core / 24 thread machine. nbdkit is somehow able to consume more total machine time in the serialize requests case (67.75%) than in the parallel case (37.75%). nbdcopy is taking about the same amount of time in both cases. In the parallel case, the time spent in do_idle in the kernel dramatically increases. My working theory is this is something to do with starvation of the NBD multi-conn connections: We now have multi-conn enabled, so nbdcopy will make 4 connections to nbdkit. nbdcopy also aggressively keeps multiple requests in flight on each connection (64 at a time). In the serialize_requests case, each NBD connection will only handle a single request at a time. These are shared across the 4 available libcurl handles. In the parallel requests case, it is highly likely that the first 4 requests on the 1st NBD connection will grab the 4 available libcurl handles. The replies will then be sent back over the single NBD connection. Then the next 4 requests from one of the NBD connections will repeat the same thing. Basically even though multi-conn is possible, I expect that only one NBD connection is being fully utilised most of the time (or anyway full use is not made of all 4 NBD connections at the same time). To maximize throughput we want to send replies over all NBD connections simultaneously, and serialize_requests (indirectly and accidentally) achieves that. I'm still adding instrumentation to see if the theory above is right, plus I have no idea how to fix this. Rich. -- Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones Read my programming and virtualization blog: http://rwmj.wordpress.com libguestfs lets you edit virtual machines. Supports shell scripting, bindings from many languages. http://libguestfs.org
Laszlo Ersek
2023-Feb-06 16:23 UTC
[Libguestfs] [PATCH nbdkit 0/6] curl: Use a curl handle pool
On 2/4/23 13:34, Richard W.M. Jones wrote:> This experimental series changes the way that the curl plugin deals > with libcurl handles. It also changes the thread model of the plugin > from SERIALIZE_REQUESTS to PARALLEL. > > Currently one NBD connection opens one libcurl handle. This also > implies one TCP connection to the web server. If you want to open > multiple libcurl handles (and multiple TCP connections), the client > must open multiple NBD connections, eg. using multi-conn. > > After this series, there is a pool of libcurl handles shared across > all NBD connections. The pool defaults to 4 handles, but this can be > changed using the connections=N parameter. > > Previously the plugin relied on nbdkit SERIALIZE_REQUESTS to ensure > that a curl handle could not be used from multiple threads at the same > time (https://curl.se/libcurl/c/threadsafe.html). After this change > it is possible to use the PARALLEL thread model. This change is quite > valuable because it means we can use filters like readahead and scan. > > Anyway, this all seems to work, but it actually reduces performance :-( > > In particular this simple test slows down quite substantially: > > time ./nbdkit -r -U - curl file:/var/tmp/fedora-36.img --run 'nbdcopy --no-extents -p "$uri" null:' > > (where /var/tmp/fedora-36.img is a 10G file). > > I've been looking at flamegraphs all morning and I can't really see > what the problem is (except that lots more time is spent with libcurl > calling sigaction?!?) > > I'm wondering if it might be a locality issue, since curl handles are > now being scattered randomly across threads. (It might mean in the > file: case that Linux kernel readahead is ineffective). I can't > easily see a way to change the implementation to encourage handles to > be reused by the same thread.I believe the result is expected with a local "file:", and that it's precisely due to the reason you name. A good test case could be a long-distance http(s) download, or a download over a somewhat noisy (but not necessarily congested) WiFi link. IOW, scenarios where a single TCP connection doesn't perform supremely: - In the former case (long distance), because the bandwidth may indeed be limited, shared with many other TCP streams that don't terminate on your host, and TCP "plays nice" with oters. By having multiple connections, you might carve out a larger proportion of the bandwith. (Unless traffic shaping rules "up-stream" thwarted that.) And here the assumption is that the total bandwidth is negligible in comparison to what the disk on the remote end can sustain; IOW the same locality issue will not be hit on the remote server. - In the latter case (WiFi), because TCP mistakes packet loss for congestion, and slows down unjustifiedly, even if the lossage is extremely short-lived/transient. By having multiple streams, some streams could "bridge over" the "congestion" perceived by another stream. The second case could be possible to emulate locally, with the "tc" or the "iptables" utility: https://stackoverflow.com/questions/614795/simulate-delayed-and-dropped-packets-on-linux Laszlo