thr3ads.net - Linux Virtualization - [PATCH v6 11/11] vhost: allow userspace to create workers [Apr 2023]

If this information is useful, please help other people find it:
Share via:

Jason Wang

2023-Apr-04 08:00 UTC

[PATCH v6 11/11] vhost: allow userspace to create workers

On Tue, Mar 28, 2023 at 10:17?AM Mike Christie
<michael.christie at oracle.com> wrote:>
> For vhost-scsi with 3 vqs and a workload like that tries to use those vqs
> like:
>
> fio --filename=/dev/sdb  --direct=1 --rw=randrw --bs=4k \
> --ioengine=libaio --iodepth=128  --numjobs=3
>
> the single vhost worker thread will become a bottlneck and we are stuck
> at around 500K IOPs no matter how many jobs, virtqueues, and CPUs are
> used.
>
> To better utilize virtqueues and available CPUs, this patch allows
> userspace to create workers and bind them to vqs. You can have N workers
> per dev and also share N workers with M vqs.
>
> With the patches and doing a worker per vq, we can scale to at least
> 16 vCPUs/vqs (that's my system limit) with the same command fio command
> above with numjobs=16:
>
> fio --filename=/dev/sdb  --direct=1 --rw=randrw --bs=4k \
> --ioengine=libaio --iodepth=64  --numjobs=16
>
> which gives around 2326K IOPs.
>
> Note that for testing I dropped depth to 64 above because the vhost/virt
> layer supports only 1024 total commands per device. And the only tuning I
> did was set LIO's emulate_pr to 0 to avoid LIO's PR lock in the
main IO
> path which becomes an issue at around 12 jobs/virtqueues.
>
> Signed-off-by: Mike Christie <michael.christie at oracle.com>
> ---
>  drivers/vhost/vhost.c            | 177 ++++++++++++++++++++++++++++---
>  drivers/vhost/vhost.h            |   4 +-
>  include/uapi/linux/vhost.h       |  22 ++++
>  include/uapi/linux/vhost_types.h |  15 +++
>  4 files changed, 204 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 1fa5e9a49092..e40699e83c6d 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -271,7 +271,11 @@ EXPORT_SYMBOL_GPL(vhost_vq_work_queue);
>
>  void vhost_dev_flush(struct vhost_dev *dev)
>  {
> -       vhost_work_flush_on(dev->worker);
> +       struct vhost_worker *worker;
> +       unsigned long i;
> +
> +       xa_for_each(&dev->worker_xa, i, worker)
> +               vhost_work_flush_on(worker);
>  }
>  EXPORT_SYMBOL_GPL(vhost_dev_flush);
>
> @@ -489,7 +493,6 @@ void vhost_dev_init(struct vhost_dev *dev,
>         dev->umem = NULL;
>         dev->iotlb = NULL;
>         dev->mm = NULL;
> -       dev->worker = NULL;
>         dev->iov_limit = iov_limit;
>         dev->weight = weight;
>         dev->byte_weight = byte_weight;
> @@ -499,7 +502,7 @@ void vhost_dev_init(struct vhost_dev *dev,
>         INIT_LIST_HEAD(&dev->read_list);
>         INIT_LIST_HEAD(&dev->pending_list);
>         spin_lock_init(&dev->iotlb_lock);
> -
> +       xa_init_flags(&dev->worker_xa, XA_FLAGS_ALLOC);
>
>         for (i = 0; i < dev->nvqs; ++i) {
>                 vq = dev->vqs[i];
> @@ -562,32 +565,67 @@ static void vhost_detach_mm(struct vhost_dev *dev)
>         dev->mm = NULL;
>  }
>
> -static void vhost_worker_free(struct vhost_dev *dev)
> +static void vhost_worker_put(struct vhost_dev *dev, struct vhost_worker
*worker)
>  {
> -       struct vhost_worker *worker = dev->worker;
> -
>         if (!worker)
>                 return;
>
> -       dev->worker = NULL;
> +       if (!refcount_dec_and_test(&worker->refcount))
> +               return;
> +
>         WARN_ON(!llist_empty(&worker->work_list));
>         vhost_task_stop(worker->vtsk);
>         kfree(worker);
>  }
>
> +static void vhost_vq_detach_worker(struct vhost_virtqueue *vq)
> +{
> +       if (vq->worker)
What happens to the pending work that queues for the old worker?
> +               vhost_worker_put(vq->dev, vq->worker);
> +       vq->worker = NULL;
> +}
> +
> +static void vhost_workers_free(struct vhost_dev *dev)
> +{
> +       struct vhost_worker *worker;
> +       unsigned long i;
> +
> +       if (!dev->use_worker)
> +               return;
> +
> +       for (i = 0; i < dev->nvqs; i++)
> +               vhost_vq_detach_worker(dev->vqs[i]);
> +       /*
> +        * Drop the refcount taken during allocation, and handle the
default
> +        * worker and the cases where userspace might have crashed or was
lazy
> +        * and did a VHOST_NEW_WORKER but not a VHOST_FREE_WORKER.
> +        */
> +       xa_for_each(&dev->worker_xa, i, worker) {
> +               xa_erase(&dev->worker_xa, worker->id);
> +               vhost_worker_put(dev, worker);
> +       }
> +       xa_destroy(&dev->worker_xa);
> +}
> +
>  static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
>  {
>         struct vhost_worker *worker;
>         struct vhost_task *vtsk;
>         char name[TASK_COMM_LEN];
> +       int ret;
> +       u32 id;
>
>         worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
>         if (!worker)
>                 return NULL;
>
> -       dev->worker = worker;
>         worker->kcov_handle = kcov_common_handle();
>         init_llist_head(&worker->work_list);
> +       /*
> +        * We increase the refcount for the initial creation and then
> +        * later each time it's attached to a virtqueue.
> +        */
> +       refcount_set(&worker->refcount, 1);
>         snprintf(name, sizeof(name), "vhost-%d",
current->pid);
>
>         vtsk = vhost_task_create(vhost_worker, worker, name);
> @@ -596,14 +634,104 @@ static struct vhost_worker
*vhost_worker_create(struct vhost_dev *dev)
>
>         worker->vtsk = vtsk;
>         vhost_task_start(vtsk);
> +
> +       ret = xa_alloc(&dev->worker_xa, &id, worker,
xa_limit_32b, GFP_KERNEL);
> +       if (ret < 0)
> +               goto stop_worker;
> +       worker->id = id;
> +
>         return worker;
>
> +stop_worker:
> +       vhost_task_stop(vtsk);
>  free_worker:
>         kfree(worker);
> -       dev->worker = NULL;
>         return NULL;
>  }
>
> +/* Caller must have device and virtqueue mutex */
> +static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq,
> +                                    struct vhost_worker *worker)
> +{
> +       refcount_inc(&worker->refcount);
> +       vhost_vq_detach_worker(vq);())
> +       vq->worker = worker;
What happens if there's a pending flush in a specific device (e.g in
vhost_scsi_tmf_resp_work())?

Does this mean we need to hold vq mutex when doing the flush? (which
seems not the case of vhost_scsi_tmf_resp_work()).
> +}
> +
> +/* Caller must have device and virtqueue mutex */
> +static int vhost_vq_attach_worker(struct vhost_virtqueue *vq,
> +                                 struct vhost_vring_worker *info)
> +{
> +       unsigned long index = info->worker_id;
> +       struct vhost_dev *dev = vq->dev;
> +       struct vhost_worker *worker;
> +
> +       if (!dev->use_worker)
> +               return -EINVAL;
> +
> +       /*
> +        * We don't support setting a worker on an active vq to make
flushing
> +        * and removal simple.
> +        */
> +       if (vhost_vq_get_backend(vq))
> +               return -EBUSY;
This assumes the worker won't be started until the backend is set
which is not true.
> +
> +       worker = xa_find(&dev->worker_xa, &index, UINT_MAX,
XA_PRESENT);
> +       if (!worker || worker->id != info->worker_id)
> +               return -ENODEV;
> +
> +       __vhost_vq_attach_worker(vq, worker);
> +       return 0;
> +}
> +
> +/* Caller must have device mutex */
> +static int vhost_new_worker(struct vhost_dev *dev,
> +                           struct vhost_worker_state *info)
> +{
> +       struct vhost_worker *worker;
> +
> +       if (!dev->use_worker)
> +               return -EINVAL;
> +
> +       worker = vhost_worker_create(dev);
> +       if (!worker)
> +               return -ENOMEM;
> +
> +       info->worker_id = worker->id;
> +       return 0;
> +}
> +
> +/* Caller must have device mutex */
> +static int vhost_free_worker(struct vhost_dev *dev,
> +                            struct vhost_worker_state *info)
> +{
> +       unsigned long index = info->worker_id;
> +       struct vhost_worker *worker;
> +
> +       if (!dev->use_worker)
> +               return -EINVAL;
> +
> +       worker = xa_find(&dev->worker_xa, &index, UINT_MAX,
XA_PRESENT);
So we use int for worker_id which conflicts with UINT_MAX here?

struct vhost_worker_state {
        /*
         * For VHOST_NEW_WORKER the kernel will return the new vhost_worker id.
         * For VHOST_FREE_WORKER this must be set to the id of the vhost_worker
         * to free.
         */
        int worker_id;
};

A side effect of using xa_index directly is that userspace can guess
the xa_index of the default worker and free it here, is this intended?
Should we hide the default worker from xa?
> +       if (!worker || worker->id != info->worker_id)
> +               return -ENODEV;
> +
> +       /*
> +        * We can free the worker if it's not attached to any
virtqueues.
> +        */
> +       if (refcount_read(&worker->refcount) != 1)
> +               return -EBUSY;
> +
> +       xa_erase(&dev->worker_xa, worker->id);
> +       /*
> +        * Make sure if there was a flush that saw the worker in the XA
that
> +        * it has completed.
> +        */
> +       vhost_work_flush_on(worker);
> +
> +       vhost_worker_put(dev, worker);
> +       return 0;
> +}
> +
>  /* Caller should have device mutex */
>  long vhost_dev_set_owner(struct vhost_dev *dev)
>  {
> @@ -624,7 +752,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
>                         goto err_worker;
>
>                 for (i = 0; i < dev->nvqs; i++)
> -                       dev->vqs[i]->worker = worker;
> +                       __vhost_vq_attach_worker(dev->vqs[i], worker);
>         }
>
>         err = vhost_dev_alloc_iovecs(dev);
> @@ -633,7 +761,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
>
>         return 0;
>  err_iovecs:
> -       vhost_worker_free(dev);
> +       vhost_workers_free(dev);
>  err_worker:
>         vhost_detach_mm(dev);
>  err_mm:
> @@ -726,7 +854,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
>         dev->iotlb = NULL;
>         vhost_clear_msg(dev);
>         wake_up_interruptible_poll(&dev->wait, EPOLLIN |
EPOLLRDNORM);
> -       vhost_worker_free(dev);
> +       vhost_workers_free(dev);
>         vhost_detach_mm(dev);
>  }
>  EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
> @@ -1616,6 +1744,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned
int ioctl, void __user *arg
>         struct eventfd_ctx *ctx = NULL;
>         u32 __user *idxp = argp;
>         struct vhost_virtqueue *vq;
> +       struct vhost_vring_worker w;
>         struct vhost_vring_state s;
>         struct vhost_vring_file f;
>         u32 idx;
> @@ -1723,7 +1852,16 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned
int ioctl, void __user *arg
>                 if (copy_to_user(argp, &s, sizeof(s)))
>                         r = -EFAULT;
>                 break;
> -       default:
> +       case VHOST_ATTACH_VRING_WORKER:
> +               if (copy_from_user(&w, argp, sizeof(w))) {
> +                       r = -EFAULT;
> +                       break;
> +               }
> +               r = vhost_vq_attach_worker(vq, &w);
> +               if (!r && copy_to_user(argp, &w, sizeof(w)))
> +                       r = -EFAULT;
> +               break;
It's a little odd that we have new and attach but only a free.

Thanks
> +default:
>                 r = -ENOIOCTLCMD;
>         }
>
> @@ -1776,6 +1914,7 @@ EXPORT_SYMBOL_GPL(vhost_init_device_iotlb);
>  /* Caller must have device mutex */
>  long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user
*argp)
>  {
> +       struct vhost_worker_state w;
>         struct eventfd_ctx *ctx;
>         u64 p;
>         long r;
> @@ -1836,6 +1975,18 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned
int ioctl, void __user *argp)
>                 if (ctx)
>                         eventfd_ctx_put(ctx);
>                 break;
> +       case VHOST_NEW_WORKER:
> +               r = vhost_new_worker(d, &w);
> +               if (!r && copy_to_user(argp, &w, sizeof(w)))
> +                       r = -EFAULT;
> +               break;
> +       case VHOST_FREE_WORKER:
> +               if (copy_from_user(&w, argp, sizeof(w))) {
> +                       r = -EFAULT;
> +                       break;
> +               }
> +               r = vhost_free_worker(d, &w);
> +               break;
>         default:
>                 r = -ENOIOCTLCMD;
>                 break;
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 395707c680e5..a67ae8293c38 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -30,6 +30,8 @@ struct vhost_worker {
>         struct vhost_task       *vtsk;
>         struct llist_head       work_list;
>         u64                     kcov_handle;
> +       refcount_t              refcount;
> +       u32                     id;
>  };
>
>  /* Poll a file (eventfd or socket) */
> @@ -156,7 +158,6 @@ struct vhost_dev {
>         struct vhost_virtqueue **vqs;
>         int nvqs;
>         struct eventfd_ctx *log_ctx;
> -       struct vhost_worker *worker;
>         struct vhost_iotlb *umem;
>         struct vhost_iotlb *iotlb;
>         spinlock_t iotlb_lock;
> @@ -166,6 +167,7 @@ struct vhost_dev {
>         int iov_limit;
>         int weight;
>         int byte_weight;
> +       struct xarray worker_xa;
>         bool use_worker;
>         int (*msg_handler)(struct vhost_dev *dev, u32 asid,
>                            struct vhost_iotlb_msg *msg);
> diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
> index 92e1b700b51c..7329e7f349dd 100644
> --- a/include/uapi/linux/vhost.h
> +++ b/include/uapi/linux/vhost.h
> @@ -45,6 +45,23 @@
>  #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
>  /* Specify an eventfd file descriptor to signal on log write. */
>  #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
> +/* By default, a device gets one vhost_worker that its virtqueues share.
This
> + * command allows the owner of the device to create an additional
vhost_worker
> + * for the device. It can later be bound to 1 or more of its virtqueues
using
> + * the VHOST_ATTACH_VRING_WORKER command.
> + *
> + * This must be called after VHOST_SET_OWNER and the caller must be the
owner
> + * of the device. The new thread will inherit caller's cgroups and
namespaces,
> + * and will share the caller's memory space. The new thread will also
be
> + * counted against the caller's RLIMIT_NPROC value.
> + */
> +#define VHOST_NEW_WORKER _IOW(VHOST_VIRTIO, 0x8, struct
vhost_worker_state)
> +/* Free a worker created with VHOST_NEW_WORKER if it's not attached to
any
> + * virtqueue. If userspace is not able to call this for workers its
created,
> + * the kernel will free all the device's workers when the device is
closed and
> + * the last reference to the device has been released.
> + */
> +#define VHOST_FREE_WORKER _IOR(VHOST_VIRTIO, 0x9, struct
vhost_worker_state)
>
>  /* Ring setup. */
>  /* Set number of descriptors in ring. This parameter can not
> @@ -70,6 +87,11 @@
>  #define VHOST_VRING_BIG_ENDIAN 1
>  #define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct
vhost_vring_state)
>  #define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct
vhost_vring_state)
> +/* Attach a vhost_worker created with VHOST_NEW_WORKER to one of the
device's
> + * virtqueues. This must be done before the virtqueue is active.
> + */
> +#define VHOST_ATTACH_VRING_WORKER _IOR(VHOST_VIRTIO, 0x15,             \
> +                                      struct vhost_vring_worker)
>
>  /* The following ioctls use eventfd file descriptors to signal and poll
>   * for events. */
> diff --git a/include/uapi/linux/vhost_types.h
b/include/uapi/linux/vhost_types.h
> index c5690a8992d8..ad0fe2e721be 100644
> --- a/include/uapi/linux/vhost_types.h
> +++ b/include/uapi/linux/vhost_types.h
> @@ -47,6 +47,21 @@ struct vhost_vring_addr {
>         __u64 log_guest_addr;
>  };
>
> +struct vhost_worker_state {
> +       /*
> +        * For VHOST_NEW_WORKER the kernel will return the new vhost_worker
id.
> +        * For VHOST_FREE_WORKER this must be set to the id of the
vhost_worker
> +        * to free.
> +        */
> +       int worker_id;
> +};
> +
> +struct vhost_vring_worker {
> +       unsigned int index;
> +       /* The id of the vhost_worker returned from VHOST_NEW_WORKER */
> +       int worker_id;
> +};
> +
>  /* no alignment requirement */
>  struct vhost_iotlb_msg {
>         __u64 iova;
> --
> 2.25.1
>

Mike Christie

2023-Apr-04 23:08 UTC

head link

[PATCH v6 11/11] vhost: allow userspace to create workers

On 4/4/23 3:00 AM, Jason Wang wrote:>>
>> -static void vhost_worker_free(struct vhost_dev *dev)
>> +static void vhost_worker_put(struct vhost_dev *dev, struct
vhost_worker *worker)
>>  {
>> -       struct vhost_worker *worker = dev->worker;
>> -
>>         if (!worker)
>>                 return;
>>
>> -       dev->worker = NULL;
>> +       if (!refcount_dec_and_test(&worker->refcount))
>> +               return;
>> +
>>         WARN_ON(!llist_empty(&worker->work_list));
>>         vhost_task_stop(worker->vtsk);
>>         kfree(worker);
>>  }
>>
>> +static void vhost_vq_detach_worker(struct vhost_virtqueue *vq)
>> +{
>> +       if (vq->worker)
> 
> What happens to the pending work that queues for the old worker?
I didn't think there would be works queued at this time. I see your comment
below about my assumption about the backend being set being wrong. Will
discuss down there.

>>
>> +/* Caller must have device and virtqueue mutex */
>> +static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq,
>> +                                    struct vhost_worker *worker)
>> +{
>> +       refcount_inc(&worker->refcount);
>> +       vhost_vq_detach_worker(vq);())
>> +       vq->worker = worker;
> 
> What happens if there's a pending flush in a specific device (e.g in
> vhost_scsi_tmf_resp_work())?
We wouldn't hit that specific case where we are running the above code and
vhost_scsi_tmf_resp_work.

Either:

1. The backend is NULL and has never been set, and so we would never have
called vhost_scsi_tmf_resp_work, because it can only be called after the
backend is set.

2. If the backed has been set and vhost_scsi_tmf_resp_work has
run or is running, then we when we would not have called
__vhost_vq_attach_worker
from vhost_vq_attach_worker because it would see vhost_vq_get_backend
returning a non-NULL value.

If vhost_scsi later sets the backend to NULL, then vhost_scsi_clear_endpoint
will have made sure the flush has completed when the clear function returns.
It does that with the device mutex so when we run __vhost_vq_attach_worker
It will only see a vq/worker with no flushes in progress.

For the general case of can we be doing a vhost_dev_flush/vhost_work_flush_on
and __vhost_vq_attach_worker, then I thought we are ok as well because I
thought we have to currently have the device mutex when we flush so those
can't
race with ioctl calls to vhost_vq_attach_worker since we hold the dev mutex
during that ioctls. Or we call flush from the file_operations release function 
so the device is closed and can't race with ioctls.
> 
> Does this mean we need to hold vq mutex when doing the flush? (which
> seems not the case of vhost_scsi_tmf_resp_work()).
> 
>> +}
>> +
>> +/* Caller must have device and virtqueue mutex */
>> +static int vhost_vq_attach_worker(struct vhost_virtqueue *vq,
>> +                                 struct vhost_vring_worker *info)
>> +{
>> +       unsigned long index = info->worker_id;
>> +       struct vhost_dev *dev = vq->dev;
>> +       struct vhost_worker *worker;
>> +
>> +       if (!dev->use_worker)
>> +               return -EINVAL;
>> +
>> +       /*
>> +        * We don't support setting a worker on an active vq to
make flushing
>> +        * and removal simple.
>> +        */
>> +       if (vhost_vq_get_backend(vq))
>> +               return -EBUSY;
> 
> This assumes the worker won't be started until the backend is set
> which is not true.
I can see how we get flushes before setting the backend, but I thought we are
ok because we have the device mutex held.

What are the other possible cases? Is one case we could get a
VHOST_SET_VRING_KICK before setting the backend and so vhost_poll_start starts
to allow vhost_poll_queue before the backend is set? Is there any thing else?

I'll fix this issue.

>> +
>> +/* Caller must have device mutex */
>> +static int vhost_free_worker(struct vhost_dev *dev,
>> +                            struct vhost_worker_state *info)
>> +{
>> +       unsigned long index = info->worker_id;
>> +       struct vhost_worker *worker;
>> +
>> +       if (!dev->use_worker)
>> +               return -EINVAL;
>> +
>> +       worker = xa_find(&dev->worker_xa, &index, UINT_MAX,
XA_PRESENT);
> 
> So we use int for worker_id which conflicts with UINT_MAX here?
I switched from idr in the last versions to xa last second and added this
mistake.
Will fix.

> 
> struct vhost_worker_state {
>         /*
>          * For VHOST_NEW_WORKER the kernel will return the new vhost_worker
id.
>          * For VHOST_FREE_WORKER this must be set to the id of the
vhost_worker
>          * to free.
>          */
>         int worker_id;
> };
> 
> A side effect of using xa_index directly is that userspace can guess
> the xa_index of the default worker and free it here, is this intended?I don't need the functionality. It was only something that I didn't
think mattered much, because you can only free the worker if it's not in
use by any virtqueues, so I didn't add any special code to handle it.
The user would have had to do an ATTACH to the default worker and replace
it then it could free it, so it works like the other workers.
> Should we hide the default worker from xa?
I can change it if you are worried about future problems.

>> -       default:
>> +       case VHOST_ATTACH_VRING_WORKER:
>> +               if (copy_from_user(&w, argp, sizeof(w))) {
>> +                       r = -EFAULT;
>> +                       break;
>> +               }
>> +               r = vhost_vq_attach_worker(vq, &w);
>> +               if (!r && copy_to_user(argp, &w,
sizeof(w)))
>> +                       r = -EFAULT;
>> +               break;
> 
> It's a little odd that we have new and attach but only a free.
Do you mean we would also want a detach? I've been debating that.
I'm not sure what the user wanted though.

Would it leave the virtqueue with no worker? Or, does it pick the first
worker it finds? If there is no worker because we did the former or
because userspace detached all of them, then do we just drop work that
gets queued?

It seemed like a user would never want to drop work, so I made it so
you can only tell it to use new workers (attach which I guess is more
like a swap worker) so we always have a worker and I don't have to
worry about dropping work.

Linux Virtualization - Apr 2023 - [PATCH v6 11/11] vhost: allow userspace to create workers

[PATCH v6 11/11] vhost: allow userspace to create workers

[PATCH v6 11/11] vhost: allow userspace to create workers