On Thu, Feb 21, 2019 at 06:14:02PM -0800, David Tolnay
wrote:> Add a config TCG_VIRTIO_VTPM which enables a driver providing the guest
> kernel side of TPM over virtio.
>
> Use case: TPM support is needed for performing trusted work from within
> a virtual machine launched by Chrome OS.
>
> Tested inside crosvm, the Chrome OS virtual machine monitor. Crosvm's
> implementation of the virtio TPM device can be found in these two source
> files:
>
> -
chromium.googlesource.com/chromiumos/platform/crosvm/+/18ce5713e6cb99c40aafec52b67c28ba12a44f31/devices/src/virtio/tpm.rs
> -
chromium.googlesource.com/chromiumos/platform/crosvm/+/18ce5713e6cb99c40aafec52b67c28ba12a44f31/tpm2/src/lib.rs
>
> and is currently backed by the libtpm2 TPM simulator:
>
> - chromium.googlesource.com/chromiumos/third_party/tpm2
>
> Reviewed-on: chromium-review.googlesource.com/1387655
> Reviewed-by: Andrey Pronin <apronin at chromium.org>
> Tested-by: David Tolnay <dtolnay at gmail.com>
> Signed-off-by: David Tolnay <dtolnay at gmail.com>
> ---
> UNRESOLVED:
> The driver assumes virtio device number VIRTIO_ID_TPM=31. If there is
> interest in accepting a virtio TPM driver into the kernel, the Chrome OS
> team will coordinate with the OASIS virtio technical committee to secure
> an agreed upon device number and include it in a later patchset.
I am not a tpm expert but I don't see why not.
> drivers/char/tpm/Kconfig | 9 +
> drivers/char/tpm/Makefile | 1 +
> drivers/char/tpm/tpm_virtio.c | 460 ++++++++++++++++++++++++++++++++++
Maintainer entry?
> 3 files changed, 470 insertions(+)
> create mode 100644 drivers/char/tpm/tpm_virtio.c
>
> diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
> index 536e55d3919f..8997060e248e 100644
> --- a/drivers/char/tpm/Kconfig
> +++ b/drivers/char/tpm/Kconfig
> @@ -164,6 +164,15 @@ config TCG_VTPM_PROXY
> /dev/vtpmX and a server-side file descriptor on which the vTPM
> can receive commands.
>
> +config TCG_VIRTIO_VTPM
> + tristate "Virtio vTPM"
> + depends on TCG_TPM
> + help
> + This driver provides the guest kernel side of TPM over Virtio. If
> + you are building Linux to run inside of a hypervisor that supports
> + TPM over Virtio, say Yes and the virtualized TPM will be
> + accessible from the guest.
> +
>
> source "drivers/char/tpm/st33zp24/Kconfig"
> endif # TCG_TPM
> diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
> index a01c4cab902a..4f5d1071257a 100644
> --- a/drivers/char/tpm/Makefile
> +++ b/drivers/char/tpm/Makefile
> @@ -33,3 +33,4 @@ obj-$(CONFIG_TCG_TIS_ST33ZP24) += st33zp24/
> obj-$(CONFIG_TCG_XEN) += xen-tpmfront.o
> obj-$(CONFIG_TCG_CRB) += tpm_crb.o
> obj-$(CONFIG_TCG_VTPM_PROXY) += tpm_vtpm_proxy.o
> +obj-$(CONFIG_TCG_VIRTIO_VTPM) += tpm_virtio.o
> diff --git a/drivers/char/tpm/tpm_virtio.c b/drivers/char/tpm/tpm_virtio.c
> new file mode 100644
> index 000000000000..f3239d983f18
> --- /dev/null
> +++ b/drivers/char/tpm/tpm_virtio.c
> @@ -0,0 +1,460 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright 2019 Google Inc.
> + *
> + * Author: David Tolnay <dtolnay at gmail.com>
> + *
> + * ---
> + *
> + * Device driver for TPM over virtio.
> + *
> + * This driver employs a single virtio queue to handle both send and recv.
TPM
> + * commands are sent over virtio to the hypervisor during a TPM send
operation
> + * and responses are received over the same queue during a recv operation.
> + *
> + * The driver contains a single buffer that is the only buffer we ever
place on
> + * the virtio queue. Commands are copied from the caller's command
buffer into
> + * the driver's buffer before handing off to virtio, and responses are
received
> + * into the driver's buffer then copied into the caller's response
buffer. This
> + * allows us to be resilient to timeouts. When a send or recv operation
times
> + * out, the caller is free to destroy their buffer; we don't want the
hypervisor
> + * continuing to perform reads or writes against that destroyed buffer.
> + *
> + * This driver does not support concurrent send and recv operations.
Mutual
> + * exclusion is upheld by the tpm_mutex lock held in tpm-interface.c
around the
> + * calls to chip->ops->send and chip->ops->recv.
> + *
> + * The intended hypervisor-side implementation is as follows.
> + *
> + * while true:
> + * await next virtio buffer.
> + * expect first descriptor in chain to be guest-to-host.
> + * read tpm command from that buffer.
> + * synchronously perform TPM work determined by the command.
> + * expect second descriptor in chain to be host-to-guest.
> + * write TPM response into that buffer.
> + * place buffer on virtio used queue indicating how many bytes
written.
That's fine I think except generally it should be legal for guest
to split each buffer to several segments.
> + */
> +
> +#include <linux/virtio_config.h>
> +
> +#include "tpm.h"
> +
> +/*
> + * Timeout duration when waiting on the hypervisor to complete its end of
the
> + * TPM operation. This timeout is relatively high because certain TPM
operations
> + * can take dozens of seconds.
> + */
> +#define TPM_VIRTIO_TIMEOUT (120 * HZ)
Should we read this from device? E.g. a busy hypervisor might
take longer to respond ...
> +
> +struct vtpm_device {
> + /*
> + * Data structure for integration with the common code of the TPM driver
> + * in tpm-chip.c.
> + */
> + struct tpm_chip *chip;
> +
> + /*
> + * Virtio queue for sending TPM commands out of the virtual machine and
> + * receiving TPM responses back from the hypervisor.
> + */
> + struct virtqueue *vq;
> +
> + /*
> + * Completion that is notified when a virtio operation has been
> + * fulfilled by the hypervisor.
> + */
> + struct completion complete;
> +
> + /*
> + * Whether driver currently holds ownership of the virtqueue buffer.
> + * When false, the hypervisor is in the process of reading or writing
> + * the buffer and the driver must not touch it.
> + */
> + bool driver_has_buffer;
> +
> + /*
> + * Whether during the most recent TPM operation, a virtqueue_kick failed
> + * or a wait timed out.
> + *
> + * The next send or recv operation will attempt a kick upon seeing this
> + * status. That should clear up the queue in the case that the
> + * hypervisor became temporarily nonresponsive, such as by resource
> + * exhaustion on the host. The extra kick enables recovery from kicks
> + * going unnoticed by the hypervisor as well as recovery from virtio
> + * callbacks going unnoticed by the guest kernel.
Well not necessarily. E.g. virtqueue_kick does not
kick if hypervisor didn't enable notifications
(e.g. with event idx they get disabled automatically).
So it won't recover if hypervisor can discard
kicks.
I think this comment needs clarification.
> + */
> + bool needs_kick;
> +
> + /* Number of bytes available to read from the virtqueue buffer. */
> + unsigned int readable;
> +
> + /*
> + * Buffer in which all virtio transfers take place. Buffer size is the
> + * maximum legal TPM command or response message size.
> + */
> + u8 virtqueue_buffer[TPM_BUFSIZE];
> +};
> +
> +/*
> + * Wait for ownership of the virtqueue buffer.
> + *
> + * The why-string should begin with "waiting to..." or
"waiting for..." with no
> + * trailing newline. It will appear in log output.
> + *
> + * Returns zero for success, otherwise negative error.
> + */
> +static int vtpm_wait_for_buffer(struct vtpm_device *dev, const char *why)
> +{
> + int ret;
> + bool did_kick;
> + struct tpm_chip *chip = dev->chip;
> + unsigned long deadline = jiffies + TPM_VIRTIO_TIMEOUT;
> +
> + /* Kick queue if needed. */
> + if (dev->needs_kick) {
> + did_kick = virtqueue_kick(dev->vq);
> + if (!did_kick) {
> + dev_notice(&chip->dev, "kick failed; will retry\n");
> + return -EBUSY;
> + }
> + dev->needs_kick = false;
> + }
> +
> + while (!dev->driver_has_buffer) {
> + unsigned long now = jiffies;
> +
> + /* Check timeout, otherwise `deadline - now` may underflow. */
> + if time_after_eq(now, deadline) {
> + dev_warn(&chip->dev, "timed out %s\n", why);
> + dev->needs_kick = true;
> + return -ETIMEDOUT;
> + }
> +
> + /*
> + * Wait to be signaled by virtio callback.
> + *
> + * Positive ret is jiffies remaining until timeout when the
> + * completion occurred, which means successful completion. Zero
> + * ret is timeout. Negative ret is error.
> + */
> + ret = wait_for_completion_killable_timeout(
> + &dev->complete, deadline - now);
> +
> + /* Log if completion did not occur. */
> + if (ret == -ERESTARTSYS) {
> + /* Not a warning if it was simply interrupted. */
> + dev_dbg(&chip->dev, "interrupted %s\n", why);
> + } else if (ret == 0) {
> + dev_warn(&chip->dev, "timed out %s\n", why);
> + ret = -ETIMEDOUT;
Should we check NEEDS_RESET bit and try to reset the device?
Or is that too drastic?
> + } else if (ret < 0) {
> + dev_warn(&chip->dev, "failed while %s: error %d\n",
> + why, -ret);
> + }
> +
> + /*
> + * Return error if completion did not occur. Schedule kick to be
> + * retried at the start of the next send/recv to help unblock
> + * the queue.
> + */
> + if (ret < 0) {
> + dev->needs_kick = true;
> + return ret;
> + }
> +
> + /* Completion occurred. Expect response buffer back. */
> + if (virtqueue_get_buf(dev->vq, &dev->readable)) {
> + dev->driver_has_buffer = true;
> +
> + if (dev->readable > TPM_BUFSIZE) {
> + dev_crit(&chip->dev,
> + "hypervisor bug: response exceeds max size, %u > %u\n",
> + dev->readable,
> + (unsigned int) TPM_BUFSIZE);
> + dev->readable = TPM_BUFSIZE;
> + return -EPROTO;
> + }
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int vtpm_op_send(struct tpm_chip *chip, u8 *caller_buf, size_t len)
> +{
> + int ret;
> + bool did_kick;
> + struct scatterlist sg_outbuf, sg_inbuf;
> + struct scatterlist *sgs[2] = { &sg_outbuf, &sg_inbuf };
> + struct vtpm_device *dev = dev_get_drvdata(&chip->dev);
> + u8 *virtqueue_buf = dev->virtqueue_buffer;
> +
> + dev_dbg(&chip->dev, __func__ " %zu bytes\n", len);
> +
> + if (len > TPM_BUFSIZE) {
> + dev_err(&chip->dev,
> + "command is too long, %zu > %zu\n",
> + len, (size_t) TPM_BUFSIZE);
> + return -EINVAL;
> + }
> +
> + /*
> + * Wait until hypervisor relinquishes ownership of the virtqueue buffer.
> + *
> + * This may block if the previous recv operation timed out in the guest
> + * kernel but is still being processed by the hypervisor. Also may block
> + * if send operations are performed back-to-back, such as if something
> + * in the caller failed in between a send and recv.
> + *
> + * During normal operation absent of any errors or timeouts, this does
> + * not block.
> + */
> + ret = vtpm_wait_for_buffer(dev, "waiting to begin send");
> + if (ret)
> + return ret;
> +
> + /* Driver owns virtqueue buffer and may now write into it. */
> + memcpy(virtqueue_buf, caller_buf, len);
> +
> + /*
> + * Enqueue the virtqueue buffer once as outgoing virtio data (written by
> + * the virtual machine and read by the hypervisor) and again as incoming
> + * data (written by the hypervisor and read by the virtual machine).
> + * This step moves ownership of the virtqueue buffer from driver to
> + * hypervisor.
> + *
> + * Note that we don't know here how big of a buffer the caller will
use
> + * with their later call to recv. We allow the hypervisor to write up to
> + * the TPM max message size. If the caller ends up using a smaller
> + * buffer with recv that is too small to hold the entire response, the
> + * recv will return an error. This conceivably breaks TPM
> + * implementations that want to produce a different verbosity of
> + * response depending on the receiver's buffer size.
> + */
> + sg_init_one(&sg_outbuf, virtqueue_buf, len);
> + sg_init_one(&sg_inbuf, virtqueue_buf, TPM_BUFSIZE);
> + ret = virtqueue_add_sgs(dev->vq, sgs, 1, 1, virtqueue_buf,
GFP_KERNEL);
> + if (ret) {
> + dev_err(&chip->dev, "failed virtqueue_add_sgs\n");
> + return ret;
> + }
> +
> + /* Kick the other end of the virtqueue after having added a buffer. */
> + did_kick = virtqueue_kick(dev->vq);
> + if (!did_kick) {
> + dev->needs_kick = true;
> + dev_notice(&chip->dev, "kick failed; will retry\n");
> +
> + /*
> + * We return 0 anyway because what the caller doesn't know can't
> + * hurt them. They can call recv and it will retry the kick. If
> + * that works, everything is fine.
> + *
> + * If the retry in recv fails too, they will get -EBUSY from
> + * recv.
> + */
> + }
> +
> + /*
> + * Hypervisor is now processing the TPM command asynchronously. It will
> + * read the command from the output buffer and write the response into
> + * the input buffer (which are the same buffer). When done, it will send
> + * back the buffers over virtio and the driver's virtio callback will
> + * complete dev->complete so that we know the response is ready to be
> + * read.
> + *
> + * It is important to have copied data out of the caller's buffer
into
> + * the driver's virtqueue buffer because the caller is free to
destroy
> + * their buffer when this call returns. We can't avoid copying by
> + * waiting here for the hypervisor to finish reading, because if that
> + * wait times out, we return and the caller may destroy their buffer
> + * while the hypervisor is continuing to read from it.
> + */
> + dev->driver_has_buffer = false;
> + return 0;
> +}
> +
> +static int vtpm_op_recv(struct tpm_chip *chip, u8 *caller_buf, size_t len)
> +{
> + int ret;
> + struct vtpm_device *dev = dev_get_drvdata(&chip->dev);
> + u8 *virtqueue_buf = dev->virtqueue_buffer;
> +
> + dev_dbg(&chip->dev, __func__ "\n");
> +
> + /*
> + * Wait until the virtqueue buffer is owned by the driver.
> + *
> + * This will usually block while the hypervisor finishes processing the
> + * most recent TPM command.
> + */
> + ret = vtpm_wait_for_buffer(dev, "waiting for TPM response");
> + if (ret)
> + return ret;
> +
> + dev_dbg(&chip->dev, "received %u bytes\n",
dev->readable);
> +
> + if (dev->readable > len) {
> + dev_notice(&chip->dev,
> + "TPM response is bigger than receiver's buffer: %u >
%zu\n",
> + dev->readable, len);
> + return -EINVAL;
> + }
> +
> + /* Copy response over to the caller. */
> + memcpy(caller_buf, virtqueue_buf, dev->readable);
> +
> + return dev->readable;
> +}
> +
> +static void vtpm_op_cancel(struct tpm_chip *chip)
> +{
> + /*
> + * Cancel is not called in this driver's use of tpm-interface.c. It
may
> + * be triggered through tpm-sysfs but that can be implemented as needed.
> + * Be aware that tpm-sysfs performs cancellation without holding the
> + * tpm_mutex that protects our send and recv operations, so a future
> + * implementation will need to consider thread safety of concurrent
> + * send/recv and cancel.
> + */
> + dev_notice(&chip->dev, "cancellation is not
implemented\n");
> +}
> +
> +static u8 vtpm_op_status(struct tpm_chip *chip)
> +{
> + /*
> + * Status is for TPM drivers that want tpm-interface.c to poll for
> + * completion before calling recv. Usually this is when the hardware
> + * needs to be polled i.e. there is no other way for recv to block on
> + * the TPM command completion.
> + *
> + * Polling goes until `(status & complete_mask) == complete_val`.
This
> + * driver defines both complete_mask and complete_val as 0 and blocks on
> + * our own completion object in recv instead.
> + */
> + return 0;
> +}
> +
> +static const struct tpm_class_ops vtpm_ops = {
> + .flags = TPM_OPS_AUTO_STARTUP,
> + .send = vtpm_op_send,
> + .recv = vtpm_op_recv,
> + .cancel = vtpm_op_cancel,
> + .status = vtpm_op_status,
> + .req_complete_mask = 0,
> + .req_complete_val = 0,
> +};
> +
> +static void vtpm_virtio_complete(struct virtqueue *vq)
> +{
> + struct virtio_device *vdev = vq->vdev;
> + struct vtpm_device *dev = vdev->priv;
> +
> + complete(&dev->complete);
> +}
> +
> +static int vtpm_probe(struct virtio_device *vdev)
> +{
> + int err;
> + struct vtpm_device *dev;
> + struct virtqueue *vq;
> + struct tpm_chip *chip;
> +
> + dev_dbg(&vdev->dev, __func__ "\n");
> +
> + dev = kzalloc(sizeof(struct vtpm_device), GFP_KERNEL);
> + if (!dev) {
> + err = -ENOMEM;
> + dev_err(&vdev->dev, "failed kzalloc\n");
> + goto err_dev_alloc;
> + }
> + vdev->priv = dev;
> +
> + vq = virtio_find_single_vq(vdev, vtpm_virtio_complete, "vtpm");
> + if (IS_ERR(vq)) {
> + err = PTR_ERR(vq);
> + dev_err(&vdev->dev, "failed virtio_find_single_vq\n");
> + goto err_virtio_find;
> + }
> + dev->vq = vq;
> +
> + chip = tpm_chip_alloc(&vdev->dev, &vtpm_ops);
> + if (IS_ERR(chip)) {
> + err = PTR_ERR(chip);
> + dev_err(&vdev->dev, "failed tpm_chip_alloc\n");
> + goto err_chip_alloc;
> + }
> + dev_set_drvdata(&chip->dev, dev);
> + chip->flags |= TPM_CHIP_FLAG_TPM2;
> + dev->chip = chip;
> +
> + init_completion(&dev->complete);
> + dev->driver_has_buffer = true;
> + dev->needs_kick = false;
> + dev->readable = 0;
> +
> + /*
> + * Required in order to enable vq use in probe function for auto
> + * startup.
> + */
> + virtio_device_ready(vdev);
> +
> + err = tpm_chip_register(dev->chip);
> + if (err) {
> + dev_err(&vdev->dev, "failed tpm_chip_register\n");
> + goto err_chip_register;
> + }
> +
> + return 0;
> +
> +err_chip_register:
> + put_device(&dev->chip->dev);
> +err_chip_alloc:
> + vdev->config->del_vqs(vdev);
> +err_virtio_find:
> + kfree(dev);
> +err_dev_alloc:
> + return err;
> +}
> +
> +static void vtpm_remove(struct virtio_device *vdev)
> +{
> + struct vtpm_device *dev = vdev->priv;
> +
> + /* Undo tpm_chip_register. */
> + tpm_chip_unregister(dev->chip);
> +
> + /* Undo tpm_chip_alloc. */
> + put_device(&dev->chip->dev);
> +
> + vdev->config->reset(vdev);
> + vdev->config->del_vqs(vdev);
> +
> + kfree(dev);
> +}
> +
> +#define VIRTIO_ID_TPM 31
> +
> +static struct virtio_device_id id_table[] = {
> + {
> + .device = VIRTIO_ID_TPM,
> + .vendor = VIRTIO_DEV_ANY_ID,
> + },
> + {},
> +};
Let's write
static struct virtio_device_id id_table[] = {
{ VIRTIO_ID_TPM, VIRTIO_DEV_ANY_ID },
{ 0 },
};
for consistency with other virtio devices.
> +
> +static struct virtio_driver vtpm_driver = {
> + .driver.name = KBUILD_MODNAME,
> + .driver.owner = THIS_MODULE,
> + .id_table = id_table,
> + .probe = vtpm_probe,
> + .remove = vtpm_remove,
Freeze/restore?
> +};
> +
> +module_virtio_driver(vtpm_driver);
> +
> +MODULE_AUTHOR("David Tolnay (dtolnay at gmail.com)");
> +MODULE_DESCRIPTION("Virtio vTPM Driver");
> +MODULE_VERSION("1.0");
> +MODULE_LICENSE("GPL");
> --
> 2.20.1