Wei Wang
2017-Nov-13 10:34 UTC
[PATCH v17 6/6] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
Ping for comments, thanks. On 11/03/2017 04:13 PM, Wei Wang wrote:> Negotiation of the VIRTIO_BALLOON_F_FREE_PAGE_VQ feature indicates the > support of reporting hints of guest free pages to the host via > virtio-balloon. The host requests the guest to report the free pages by > sending commands via the virtio-balloon configuration registers. > > When the guest starts to report, the first element added to the free page > vq is a sequence id of the start reporting command. The id is given by > the host, and it indicates whether the following free pages correspond > to the command. For example, the host may stop the report and start again > with a new command id. The obsolete pages for the previous start command > can be detected by the id dismatching on the host. The id is added to the > vq using an output buffer, and the free pages are added to the vq using > input buffer. > > Here are some explainations about the added configuration registers: > - host2guest_cmd: a register used by the host to send commands to the > guest. > - guest2host_cmd: written by the guest to ACK to the host about the > commands that have been received. The host will clear the corresponding > bits on the host2guest_cmd register. The guest also uses this register > to send commands to the host (e.g. when finish free page reporting). > - free_page_cmd_id: the sequence id of the free page report command > given by the host. > > Signed-off-by: Wei Wang <wei.w.wang at intel.com> > Signed-off-by: Liang Li <liang.z.li at intel.com> > Cc: Michael S. Tsirkin <mst at redhat.com> > Cc: Michal Hocko <mhocko at kernel.org> > --- > drivers/virtio/virtio_balloon.c | 234 ++++++++++++++++++++++++++++++++---- > include/uapi/linux/virtio_balloon.h | 11 ++ > 2 files changed, 223 insertions(+), 22 deletions(-) > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c > index b31fc25..4087f04 100644 > --- a/drivers/virtio/virtio_balloon.c > +++ b/drivers/virtio/virtio_balloon.c > @@ -55,7 +55,12 @@ static struct vfsmount *balloon_mnt; > > struct virtio_balloon { > struct virtio_device *vdev; > - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; > + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; > + > + /* Balloon's own wq for cpu-intensive work items */ > + struct workqueue_struct *balloon_wq; > + /* The free page reporting work item submitted to the balloon wq */ > + struct work_struct report_free_page_work; > > /* The balloon servicing is delegated to a freezable workqueue. */ > struct work_struct update_balloon_stats_work; > @@ -65,6 +70,10 @@ struct virtio_balloon { > spinlock_t stop_update_lock; > bool stop_update; > > + /* Stop reporting free pages */ > + bool report_free_page_stop; > + uint32_t free_page_cmd_id; > + > /* Waiting for host to ack the pages we released. */ > wait_queue_head_t acked; > > @@ -191,6 +200,30 @@ static void send_balloon_page_sg(struct virtio_balloon *vb, > kick_and_wait(vq, vb->acked); > } > > +static void send_free_page_sg(struct virtqueue *vq, void *addr, uint32_t size) > +{ > + int err = 0; > + unsigned int len; > + > + /* Detach all the used buffers from the vq */ > + while (virtqueue_get_buf(vq, &len)) > + ; > + > + /* > + * Since this is an optimization feature, losing a couple of free > + * pages to report isn't important. We simply resturn without adding > + * the page if the vq is full. > + */ > + if (vq->num_free) { > + err = add_one_sg(vq, addr, size); > + BUG_ON(err); > + } > + > + /* Batch till the vq is full */ > + if (!vq->num_free) > + virtqueue_kick(vq); > +} > + > /* > * Send balloon pages in sgs to host. The balloon pages are recorded in the > * page xbitmap. Each bit in the bitmap corresponds to a page of PAGE_SIZE. > @@ -495,9 +528,8 @@ static void stats_handle_request(struct virtio_balloon *vb) > virtqueue_kick(vq); > } > > -static void virtballoon_changed(struct virtio_device *vdev) > +static void virtballoon_cmd_balloon_memory(struct virtio_balloon *vb) > { > - struct virtio_balloon *vb = vdev->priv; > unsigned long flags; > > spin_lock_irqsave(&vb->stop_update_lock, flags); > @@ -506,6 +538,50 @@ static void virtballoon_changed(struct virtio_device *vdev) > spin_unlock_irqrestore(&vb->stop_update_lock, flags); > } > > +static void virtballoon_cmd_report_free_page_start(struct virtio_balloon *vb) > +{ > + unsigned long flags; > + > + vb->report_free_page_stop = false; > + spin_lock_irqsave(&vb->stop_update_lock, flags); > + if (!vb->stop_update) > + queue_work(vb->balloon_wq, &vb->report_free_page_work); > + spin_unlock_irqrestore(&vb->stop_update_lock, flags); > +} > + > +static void virtballoon_changed(struct virtio_device *vdev) > +{ > + struct virtio_balloon *vb = vdev->priv; > + u32 host2guest_cmd, guest2host_cmd = 0; > + > + if (!virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) { > + virtballoon_cmd_balloon_memory(vb); > + return; > + } > + > + virtio_cread(vb->vdev, struct virtio_balloon_config, host2guest_cmd, > + &host2guest_cmd); > + > + if (host2guest_cmd & VIRTIO_BALLOON_CMD_BALLOON_MEMORY) { > + virtballoon_cmd_balloon_memory(vb); > + guest2host_cmd |= VIRTIO_BALLOON_CMD_BALLOON_MEMORY; > + } > + > + if (host2guest_cmd & VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_START) { > + virtballoon_cmd_report_free_page_start(vb); > + guest2host_cmd |= VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_START; > + } > + > + if (host2guest_cmd & VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_STOP) { > + vb->report_free_page_stop = true; > + guest2host_cmd |= VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_STOP; > + } > + > + /* Ack to the host about the commands that have been received */ > + virtio_cwrite(vb->vdev, struct virtio_balloon_config, guest2host_cmd, > + &guest2host_cmd); > +} > + > static inline s64 towards_target(struct virtio_balloon *vb) > { > s64 target; > @@ -597,42 +673,147 @@ static void update_balloon_size_func(struct work_struct *work) > queue_work(system_freezable_wq, work); > } > > -static int init_vqs(struct virtio_balloon *vb) > +static bool virtio_balloon_send_free_pages(void *opaque, unsigned long pfn, > + unsigned long nr_pages) > { > - struct virtqueue *vqs[3]; > - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; > - static const char * const names[] = { "inflate", "deflate", "stats" }; > - int err, nvqs; > + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; > + void *addr = (void *)pfn_to_kaddr(pfn); > + uint32_t len = nr_pages << PAGE_SHIFT; > + > + if (vb->report_free_page_stop) > + return false; > + > + send_free_page_sg(vb->free_page_vq, addr, len); > > + return true; > +} > + > +static void report_free_page_end(struct virtio_balloon *vb) > +{ > + u32 cmd = VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_STOP; > /* > - * We expect two virtqueues: inflate and deflate, and > - * optionally stat. > + * The host may have already requested to stop the reporting before we > + * finish, so no need to notify the host in this case. > */ > - nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; > - err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL); > + if (vb->report_free_page_stop) > + return; > + vb->report_free_page_stop = true; > + > + virtio_cwrite(vb->vdev, struct virtio_balloon_config, guest2host_cmd, > + &cmd); > +} > + > +static void report_free_page_cmd_id(struct virtio_balloon *vb) > +{ > + struct scatterlist sg; > + int err; > + > + virtio_cread(vb->vdev, struct virtio_balloon_config, free_page_cmd_id, > + &vb->free_page_cmd_id); > + sg_init_one(&sg, &vb->free_page_cmd_id, sizeof(uint32_t)); > + err = virtqueue_add_outbuf(vb->free_page_vq, &sg, 1, > + &vb->free_page_cmd_id, GFP_KERNEL); > + BUG_ON(err); > +} > + > +static void report_free_page(struct work_struct *work) > +{ > + struct virtio_balloon *vb; > + > + vb = container_of(work, struct virtio_balloon, report_free_page_work); > + report_free_page_cmd_id(vb); > + walk_free_mem_block(vb, 0, &virtio_balloon_send_free_pages); > + /* > + * The last few free page blocks that were added may not reach the > + * batch size, but need a kick to notify the device to handle them. > + */ > + virtqueue_kick(vb->free_page_vq); > + report_free_page_end(vb); > +} > + > +static int init_vqs(struct virtio_balloon *vb) > +{ > + struct virtqueue **vqs; > + vq_callback_t **callbacks; > + const char **names; > + struct scatterlist sg; > + int i, nvqs, err = -ENOMEM; > + > + /* Inflateq and deflateq are used unconditionally */ > + nvqs = 2; > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) > + nvqs++; > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) > + nvqs++; > + > + /* Allocate space for find_vqs parameters */ > + vqs = kcalloc(nvqs, sizeof(*vqs), GFP_KERNEL); > + if (!vqs) > + goto err_vq; > + callbacks = kmalloc_array(nvqs, sizeof(*callbacks), GFP_KERNEL); > + if (!callbacks) > + goto err_callback; > + names = kmalloc_array(nvqs, sizeof(*names), GFP_KERNEL); > + if (!names) > + goto err_names; > + > + callbacks[0] = balloon_ack; > + names[0] = "inflate"; > + callbacks[1] = balloon_ack; > + names[1] = "deflate"; > + > + i = 2; > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { > + callbacks[i] = stats_request; > + names[i] = "stats"; > + i++; > + } > + > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) { > + callbacks[i] = NULL; > + names[i] = "free_page_vq"; > + } > + > + err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names, > + NULL, NULL); > if (err) > - return err; > + goto err_find; > > vb->inflate_vq = vqs[0]; > vb->deflate_vq = vqs[1]; > + i = 2; > if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { > - struct scatterlist sg; > - unsigned int num_stats; > - vb->stats_vq = vqs[2]; > - > + vb->stats_vq = vqs[i++]; > /* > * Prime this virtqueue with one buffer so the hypervisor can > * use it to signal us later (it can't be broken yet!). > */ > - num_stats = update_balloon_stats(vb); > - > - sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); > + sg_init_one(&sg, vb->stats, sizeof(vb->stats)); > if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL) > - < 0) > - BUG(); > + < 0) { > + dev_warn(&vb->vdev->dev, "%s: add stat_vq failed\n", > + __func__); > + goto err_find; > + } > virtqueue_kick(vb->stats_vq); > } > + > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) > + vb->free_page_vq = vqs[i]; > + > + kfree(names); > + kfree(callbacks); > + kfree(vqs); > return 0; > + > +err_find: > + kfree(names); > +err_names: > + kfree(callbacks); > +err_callback: > + kfree(vqs); > +err_vq: > + return err; > } > > #ifdef CONFIG_BALLOON_COMPACTION > @@ -761,6 +942,13 @@ static int virtballoon_probe(struct virtio_device *vdev) > if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_SG)) > xb_init(&vb->page_xb); > > + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) { > + vb->balloon_wq = alloc_workqueue("balloon-wq", > + WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); > + INIT_WORK(&vb->report_free_page_work, report_free_page); > + vb->report_free_page_stop = true; > + } > + > vb->nb.notifier_call = virtballoon_oom_notify; > vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY; > err = register_oom_notifier(&vb->nb); > @@ -825,6 +1013,7 @@ static void virtballoon_remove(struct virtio_device *vdev) > spin_unlock_irq(&vb->stop_update_lock); > cancel_work_sync(&vb->update_balloon_size_work); > cancel_work_sync(&vb->update_balloon_stats_work); > + cancel_work_sync(&vb->report_free_page_work); > > remove_common(vb); > #ifdef CONFIG_BALLOON_COMPACTION > @@ -878,6 +1067,7 @@ static unsigned int features[] = { > VIRTIO_BALLOON_F_STATS_VQ, > VIRTIO_BALLOON_F_DEFLATE_ON_OOM, > VIRTIO_BALLOON_F_SG, > + VIRTIO_BALLOON_F_FREE_PAGE_VQ, > }; > > static struct virtio_driver virtio_balloon_driver = { > diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h > index 37780a7..b758484 100644 > --- a/include/uapi/linux/virtio_balloon.h > +++ b/include/uapi/linux/virtio_balloon.h > @@ -35,15 +35,26 @@ > #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ > #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ > #define VIRTIO_BALLOON_F_SG 3 /* Use sg instead of PFN lists */ > +#define VIRTIO_BALLOON_F_FREE_PAGE_VQ 4 /* VQ to report free pages */ > > /* Size of a PFN in the balloon interface. */ > #define VIRTIO_BALLOON_PFN_SHIFT 12 > > +#define VIRTIO_BALLOON_CMD_BALLOON_MEMORY (1 << 0) > +#define VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_START (1 << 1) > +#define VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_STOP (1 << 2) > + > struct virtio_balloon_config { > /* Number of pages host wants Guest to give up. */ > __u32 num_pages; > /* Number of pages we've actually got in balloon. */ > __u32 actual; > + /* Host-to-guest command, readonly by guest */ > + __u32 host2guest_cmd; > + /* Sequence id of the free_page report command, readonly by guest */ > + __u32 free_page_cmd_id; > + /* Guest-to-host command */ > + __u32 guest2host_cmd; > }; > > #define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */
Michael S. Tsirkin
2017-Nov-13 17:32 UTC
[PATCH v17 6/6] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
You should Cc Nitesh who is working on a related feature. On Mon, Nov 13, 2017 at 06:34:48PM +0800, Wei Wang wrote:> Ping for comments, thanks. > > On 11/03/2017 04:13 PM, Wei Wang wrote: > > Negotiation of the VIRTIO_BALLOON_F_FREE_PAGE_VQ feature indicates the > > support of reporting hints of guest free pages to the host via > > virtio-balloon. The host requests the guest to report the free pages by > > sending commands via the virtio-balloon configuration registers. > > > > When the guest starts to report, the first element added to the free page > > vq is a sequence id of the start reporting command. The id is given by > > the host, and it indicates whether the following free pages correspond > > to the command. For example, the host may stop the report and start again > > with a new command id. The obsolete pages for the previous start command > > can be detected by the id dismatching on the host. The id is added to the > > vq using an output buffer, and the free pages are added to the vq using > > input buffer. > > > > Here are some explainations about the added configuration registers: > > - host2guest_cmd: a register used by the host to send commands to the > > guest. > > - guest2host_cmd: written by the guest to ACK to the host about the > > commands that have been received. The host will clear the corresponding > > bits on the host2guest_cmd register. The guest also uses this register > > to send commands to the host (e.g. when finish free page reporting).I am not sure what is the role of guest2host_cmd. Reporting of the correct cmd id seems sufficient indication that guest received the start command. Not getting any more seems sufficient to detect stop.> > - free_page_cmd_id: the sequence id of the free page report command > > given by the host. > > > > Signed-off-by: Wei Wang <wei.w.wang at intel.com> > > Signed-off-by: Liang Li <liang.z.li at intel.com> > > Cc: Michael S. Tsirkin <mst at redhat.com> > > Cc: Michal Hocko <mhocko at kernel.org> > > --- > > drivers/virtio/virtio_balloon.c | 234 ++++++++++++++++++++++++++++++++---- > > include/uapi/linux/virtio_balloon.h | 11 ++ > > 2 files changed, 223 insertions(+), 22 deletions(-) > > > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c > > index b31fc25..4087f04 100644 > > --- a/drivers/virtio/virtio_balloon.c > > +++ b/drivers/virtio/virtio_balloon.c > > @@ -55,7 +55,12 @@ static struct vfsmount *balloon_mnt; > > struct virtio_balloon { > > struct virtio_device *vdev; > > - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; > > + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; > > + > > + /* Balloon's own wq for cpu-intensive work items */ > > + struct workqueue_struct *balloon_wq; > > + /* The free page reporting work item submitted to the balloon wq */ > > + struct work_struct report_free_page_work; > > /* The balloon servicing is delegated to a freezable workqueue. */ > > struct work_struct update_balloon_stats_work; > > @@ -65,6 +70,10 @@ struct virtio_balloon { > > spinlock_t stop_update_lock; > > bool stop_update; > > + /* Stop reporting free pages */ > > + bool report_free_page_stop;I would revert logic here: bool report_free_page;> > + uint32_t free_page_cmd_id; > > + > > /* Waiting for host to ack the pages we released. */ > > wait_queue_head_t acked; > > @@ -191,6 +200,30 @@ static void send_balloon_page_sg(struct virtio_balloon *vb, > > kick_and_wait(vq, vb->acked); > > } > > +static void send_free_page_sg(struct virtqueue *vq, void *addr, uint32_t size) > > +{ > > + int err = 0; > > + unsigned int len; > > + > > + /* Detach all the used buffers from the vq */ > > + while (virtqueue_get_buf(vq, &len)) > > + ; > > + > > + /* > > + * Since this is an optimization feature, losing a couple of free > > + * pages to report isn't important. We simply resturn without adding > > + * the page if the vq is full. > > + */ > > + if (vq->num_free) { > > + err = add_one_sg(vq, addr, size); > > + BUG_ON(err); > > + } > > + > > + /* Batch till the vq is full */ > > + if (!vq->num_free) > > + virtqueue_kick(vq); > > +} > > + > > /* > > * Send balloon pages in sgs to host. The balloon pages are recorded in the > > * page xbitmap. Each bit in the bitmap corresponds to a page of PAGE_SIZE. > > @@ -495,9 +528,8 @@ static void stats_handle_request(struct virtio_balloon *vb) > > virtqueue_kick(vq); > > } > > -static void virtballoon_changed(struct virtio_device *vdev) > > +static void virtballoon_cmd_balloon_memory(struct virtio_balloon *vb) > > { > > - struct virtio_balloon *vb = vdev->priv; > > unsigned long flags; > > spin_lock_irqsave(&vb->stop_update_lock, flags); > > @@ -506,6 +538,50 @@ static void virtballoon_changed(struct virtio_device *vdev) > > spin_unlock_irqrestore(&vb->stop_update_lock, flags); > > } > > +static void virtballoon_cmd_report_free_page_start(struct virtio_balloon *vb) > > +{ > > + unsigned long flags; > > + > > + vb->report_free_page_stop = false;this flag is used a lot outside any locks. Why is this safe? Please add some comments explaining access to this flag.> > + spin_lock_irqsave(&vb->stop_update_lock, flags); > > + if (!vb->stop_update) > > + queue_work(vb->balloon_wq, &vb->report_free_page_work); > > + spin_unlock_irqrestore(&vb->stop_update_lock, flags); > > +} > > + > > +static void virtballoon_changed(struct virtio_device *vdev) > > +{ > > + struct virtio_balloon *vb = vdev->priv; > > + u32 host2guest_cmd, guest2host_cmd = 0; > > + > > + if (!virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) { > > + virtballoon_cmd_balloon_memory(vb); > > + return;This might be a handy feature: one can disable balloon without hot-unplug. But I would use a separate feature flag to control it.> > + } > > + > > + virtio_cread(vb->vdev, struct virtio_balloon_config, host2guest_cmd, > > + &host2guest_cmd); > > + > > + if (host2guest_cmd & VIRTIO_BALLOON_CMD_BALLOON_MEMORY) { > > + virtballoon_cmd_balloon_memory(vb); > > + guest2host_cmd |= VIRTIO_BALLOON_CMD_BALLOON_MEMORY; > > + } > > + > > + if (host2guest_cmd & VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_START) { > > + virtballoon_cmd_report_free_page_start(vb); > > + guest2host_cmd |= VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_START; > > + } > > + > > + if (host2guest_cmd & VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_STOP) { > > + vb->report_free_page_stop = true; > > + guest2host_cmd |= VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_STOP; > > + }I am not sure why free page has start+stop but e.g. balloon has a single bit. In fact I would really just use command id. When it changes, we know a new report is needed.> > + > > + /* Ack to the host about the commands that have been received */ > > + virtio_cwrite(vb->vdev, struct virtio_balloon_config, guest2host_cmd, > > + &guest2host_cmd);So the same register is used to ack stop command and to signal end of report. This seems buggy.> > +} > > + > > static inline s64 towards_target(struct virtio_balloon *vb) > > { > > s64 target; > > @@ -597,42 +673,147 @@ static void update_balloon_size_func(struct work_struct *work) > > queue_work(system_freezable_wq, work); > > } > > -static int init_vqs(struct virtio_balloon *vb) > > +static bool virtio_balloon_send_free_pages(void *opaque, unsigned long pfn, > > + unsigned long nr_pages) > > { > > - struct virtqueue *vqs[3]; > > - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; > > - static const char * const names[] = { "inflate", "deflate", "stats" }; > > - int err, nvqs; > > + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; > > + void *addr = (void *)pfn_to_kaddr(pfn);How do we know all free pages have a kaddr?> > + uint32_t len = nr_pages << PAGE_SHIFT; > > + > > + if (vb->report_free_page_stop) > > + return false; > > + > > + send_free_page_sg(vb->free_page_vq, addr, len); > > + return true; > > +} > > + > > +static void report_free_page_end(struct virtio_balloon *vb) > > +{ > > + u32 cmd = VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_STOP; > > /* > > - * We expect two virtqueues: inflate and deflate, and > > - * optionally stat. > > + * The host may have already requested to stop the reporting before we > > + * finish, so no need to notify the host in this case. > > */ > > - nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; > > - err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL); > > + if (vb->report_free_page_stop) > > + return; > > + vb->report_free_page_stop = true; > > + > > + virtio_cwrite(vb->vdev, struct virtio_balloon_config, guest2host_cmd, > > + &cmd);Wouldn't it be easier to add a buffer in the queue?> > +} > > + > > +static void report_free_page_cmd_id(struct virtio_balloon *vb) > > +{ > > + struct scatterlist sg; > > + int err; > > + > > + virtio_cread(vb->vdev, struct virtio_balloon_config, free_page_cmd_id, > > + &vb->free_page_cmd_id); > > + sg_init_one(&sg, &vb->free_page_cmd_id, sizeof(uint32_t)); > > + err = virtqueue_add_outbuf(vb->free_page_vq, &sg, 1, > > + &vb->free_page_cmd_id, GFP_KERNEL); > > + BUG_ON(err); > > +} > > + > > +static void report_free_page(struct work_struct *work) > > +{ > > + struct virtio_balloon *vb; > > + > > + vb = container_of(work, struct virtio_balloon, report_free_page_work); > > + report_free_page_cmd_id(vb); > > + walk_free_mem_block(vb, 0, &virtio_balloon_send_free_pages); > > + /* > > + * The last few free page blocks that were added may not reach the > > + * batch size, but need a kick to notify the device to handle them. > > + */ > > + virtqueue_kick(vb->free_page_vq); > > + report_free_page_end(vb); > > +} > > + > > +static int init_vqs(struct virtio_balloon *vb) > > +{ > > + struct virtqueue **vqs; > > + vq_callback_t **callbacks; > > + const char **names; > > + struct scatterlist sg; > > + int i, nvqs, err = -ENOMEM; > > + > > + /* Inflateq and deflateq are used unconditionally */ > > + nvqs = 2; > > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) > > + nvqs++; > > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) > > + nvqs++; > > + > > + /* Allocate space for find_vqs parameters */ > > + vqs = kcalloc(nvqs, sizeof(*vqs), GFP_KERNEL); > > + if (!vqs) > > + goto err_vq; > > + callbacks = kmalloc_array(nvqs, sizeof(*callbacks), GFP_KERNEL); > > + if (!callbacks) > > + goto err_callback; > > + names = kmalloc_array(nvqs, sizeof(*names), GFP_KERNEL); > > + if (!names) > > + goto err_names; > > + > > + callbacks[0] = balloon_ack; > > + names[0] = "inflate"; > > + callbacks[1] = balloon_ack; > > + names[1] = "deflate"; > > + > > + i = 2; > > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { > > + callbacks[i] = stats_request; > > + names[i] = "stats"; > > + i++; > > + } > > + > > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) { > > + callbacks[i] = NULL; > > + names[i] = "free_page_vq"; > > + } > > + > > + err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names, > > + NULL, NULL); > > if (err) > > - return err; > > + goto err_find; > > vb->inflate_vq = vqs[0]; > > vb->deflate_vq = vqs[1]; > > + i = 2; > > if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { > > - struct scatterlist sg; > > - unsigned int num_stats; > > - vb->stats_vq = vqs[2]; > > - > > + vb->stats_vq = vqs[i++]; > > /* > > * Prime this virtqueue with one buffer so the hypervisor can > > * use it to signal us later (it can't be broken yet!). > > */ > > - num_stats = update_balloon_stats(vb); > > - > > - sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); > > + sg_init_one(&sg, vb->stats, sizeof(vb->stats)); > > if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL) > > - < 0) > > - BUG(); > > + < 0) { > > + dev_warn(&vb->vdev->dev, "%s: add stat_vq failed\n", > > + __func__); > > + goto err_find; > > + } > > virtqueue_kick(vb->stats_vq); > > } > > + > > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) > > + vb->free_page_vq = vqs[i]; > > + > > + kfree(names); > > + kfree(callbacks); > > + kfree(vqs); > > return 0; > > + > > +err_find: > > + kfree(names); > > +err_names: > > + kfree(callbacks); > > +err_callback: > > + kfree(vqs); > > +err_vq: > > + return err; > > } > > #ifdef CONFIG_BALLOON_COMPACTION > > @@ -761,6 +942,13 @@ static int virtballoon_probe(struct virtio_device *vdev) > > if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_SG)) > > xb_init(&vb->page_xb); > > + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_VQ)) { > > + vb->balloon_wq = alloc_workqueue("balloon-wq", > > + WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); > > + INIT_WORK(&vb->report_free_page_work, report_free_page); > > + vb->report_free_page_stop = true; > > + } > > + > > vb->nb.notifier_call = virtballoon_oom_notify; > > vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY; > > err = register_oom_notifier(&vb->nb); > > @@ -825,6 +1013,7 @@ static void virtballoon_remove(struct virtio_device *vdev) > > spin_unlock_irq(&vb->stop_update_lock); > > cancel_work_sync(&vb->update_balloon_size_work); > > cancel_work_sync(&vb->update_balloon_stats_work); > > + cancel_work_sync(&vb->report_free_page_work); > > remove_common(vb); > > #ifdef CONFIG_BALLOON_COMPACTION > > @@ -878,6 +1067,7 @@ static unsigned int features[] = { > > VIRTIO_BALLOON_F_STATS_VQ, > > VIRTIO_BALLOON_F_DEFLATE_ON_OOM, > > VIRTIO_BALLOON_F_SG, > > + VIRTIO_BALLOON_F_FREE_PAGE_VQ, > > }; > > static struct virtio_driver virtio_balloon_driver = { > > diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h > > index 37780a7..b758484 100644 > > --- a/include/uapi/linux/virtio_balloon.h > > +++ b/include/uapi/linux/virtio_balloon.h > > @@ -35,15 +35,26 @@ > > #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ > > #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ > > #define VIRTIO_BALLOON_F_SG 3 /* Use sg instead of PFN lists */ > > +#define VIRTIO_BALLOON_F_FREE_PAGE_VQ 4 /* VQ to report free pages */ > > /* Size of a PFN in the balloon interface. */ > > #define VIRTIO_BALLOON_PFN_SHIFT 12 > > +#define VIRTIO_BALLOON_CMD_BALLOON_MEMORY (1 << 0) > > +#define VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_START (1 << 1) > > +#define VIRTIO_BALLOON_CMD_REPORT_FREE_PAGE_STOP (1 << 2) > > + > > struct virtio_balloon_config { > > /* Number of pages host wants Guest to give up. */ > > __u32 num_pages; > > /* Number of pages we've actually got in balloon. */ > > __u32 actual; > > + /* Host-to-guest command, readonly by guest */ > > + __u32 host2guest_cmd; > > + /* Sequence id of the free_page report command, readonly by guest */ > > + __u32 free_page_cmd_id; > > + /* Guest-to-host command */ > > + __u32 guest2host_cmd; > > }; > > #define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */
Wei Wang
2017-Nov-14 12:02 UTC
[PATCH v17 6/6] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
On 11/14/2017 01:32 AM, Michael S. Tsirkin wrote:>> - guest2host_cmd: written by the guest to ACK to the host about the >> commands that have been received. The host will clear the corresponding >> bits on the host2guest_cmd register. The guest also uses this register >> to send commands to the host (e.g. when finish free page reporting). > I am not sure what is the role of guest2host_cmd. Reporting of > the correct cmd id seems sufficient indication that guest > received the start command. Not getting any more seems sufficient > to detect stop. >I think the issue is when the host is waiting for the guest to report pages, it does not know whether the guest is going to report more or the report is done already. That's why we need a way to let the guest tell the host "the report is done, don't wait for more", then the host continues to the next step - sending the non-free pages to the destination. The following method is a conclusion of other comments, with some new thought. Please have a check if it is good. Two new configuration registers in total: - cmd_reg: the command register, combined from the previous host2guest and guest2host. I think we can use the same register for host requesting and guest ACKing, since the guest writing will trap to QEMU, that is, all the writes to the register are performed in QEMU, and we can keep things work in a correct way there. - cmd_id_reg: the sequence id of the free page report command. -- free page report: - host requests the guest to start reporting by "cmd_reg | REPORT_START"; - guest ACKs to the host about receiving the start reporting request by "cmd_reg | REPORT_START", host will clear the flag bit once receiving the ACK. - host requests the guest to stop reporting by "cmd_reg | REPORT_STOP"; - guest ACKs to the host about receiving the stop reporting request by "cmd_reg | REPORT_STOP", host will clear the flag once receiving the ACK. - guest tells the host about the start of the reporting by writing "cmd id" into an outbuf, which is added to the free page vq. - guest tells the host about the end of the reporting by writing "0" into an outbuf, which is added to the free page vq. (we reserve "id=0" as the stop sign) -- ballooning: - host requests the guest to start ballooning by "cmd_reg | BALLOONING"; - guest ACKs to the host about receiving the request by "cmd_reg | BALLOONING", host will clear the flag once receiving the ACK. Some more explanations: -- Why not let the host request the guest to start the free page reporting simply by writing a new cmd id to the cmd_id_reg? The configuration interrupt is shared among all the features - ballooning, free page reporting, and future feature extensions which need host-to-guest requests. Some features may need to add other feature specific configuration registers, like free page reporting need the cmd_id_reg, which is not used by ballooning. The rule here is that the feature specific registers are read only when that feature is requested via the cmd_reg. For example, the cmd_id_reg is read only when "cmd_reg | REPORT_START" is true. Otherwise, when the driver receives a configuration interrupt, it has to read both cmd_reg and cmd_id registers to know what are requested by the host - think about the case that ballooning requests are sent frequently while free page reporting isn't requested, the guest has to read the cmd_id register every time a ballooning request is sent by the host, which is not necessary. If future new features follow this style, there will be more unnecessary VMexits to read the unused feature specific registers. So I think it is good to have a central control of the feature request via only one cmd register - reading that one is enough to know what is requested by the host. Best, Wei
Wei Wang
2017-Nov-20 11:42 UTC
[PATCH v17 6/6] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
On 11/14/2017 01:32 AM, Michael S. Tsirkin wrote:> You should Cc Nitesh who is working on a related feature.OK, I'll do. We have two more issues which haven't been discussed yet, please have a check below.> > On Mon, Nov 13, 2017 at 06:34:48PM +0800, Wei Wang wrote: >> Ping for comments, thanks. >> >> On 11/03/2017 04:13 PM, Wei Wang wrote: >>> +static void virtballoon_cmd_report_free_page_start(struct virtio_balloon *vb) >>> +{ >>> + unsigned long flags; >>> + >>> + vb->report_free_page_stop = false; > this flag is used a lot outside any locks. Why is this safe? > Please add some comments explaining access to this flag.I will revert the logic as suggested: vb->report_free_page. Also plan to simplify its usage as below. The flag is set or cleared in the config handler according to the new_cmd_id given by the host: new_cmd_id=0: WRITE_ONCE(vb->report_free_page, false); // stop reporting new_cmd_id != old_cmd_id: WRITE_ONCE(vb->report_free_page, true); // start reporting The flag is read by virtio_balloon_send_free_pages() - the callback to report free pages: if (!READ_ONCE(vb->report_free_page)) return false; I don't find where it could be unsafe then (the flag is written by the config handler only).> >>> +} >>> + >>> static inline s64 towards_target(struct virtio_balloon *vb) >>> { >>> s64 target; >>> @@ -597,42 +673,147 @@ static void update_balloon_size_func(struct work_struct *work) >>> queue_work(system_freezable_wq, work); >>> } >>> -static int init_vqs(struct virtio_balloon *vb) >>> +static bool virtio_balloon_send_free_pages(void *opaque, unsigned long pfn, >>> + unsigned long nr_pages) >>> { >>> - struct virtqueue *vqs[3]; >>> - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; >>> - static const char * const names[] = { "inflate", "deflate", "stats" }; >>> - int err, nvqs; >>> + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; >>> + void *addr = (void *)pfn_to_kaddr(pfn); > How do we know all free pages have a kaddr?For x86_64, it works well since the kernel has all the physical memory mapped already. But for 32-bit kernel, yes, the high memory usually isn't mapped and thus no kaddr. Essentially, this pfn_to_kaddr convert isn't necessary, we do it here because the current API that virtio has is based on "struct scatterlist", which takes a kaddr, and this kaddr is then convert back to physical address in virtqueue_add() when assigning to desc->addr. I think a better solution would be to add a new API, which directly assigns the caller's guest physical address to desc->addr, similar to the previous implementation "add_one_chunk()" (https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg02452.html). But we can change that to a general virtio API: virtqueue_add_one_desc(struct virtqueue *_vq, u64 base_addr, u32 size, bool in_desc, void *data); What do you think? Best, Wei
Possibly Parallel Threads
- [PATCH v17 6/6] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
- [PATCH v17 6/6] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
- [PATCH v17 6/6] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
- [PATCH v17 6/6] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ
- [PATCH v17 6/6] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_VQ