Matthew Fioravante
2012-Sep-17 21:55 UTC
[PATCH mini-os enhancements for vtpm 2/8] add posix io to blkfront
This patch adds posix io support (read,write,lseek) to block devices using blkfront Signed off by Matthew Fioravante matthew.fioravante@jhuapl.edu diff --git a/extras/mini-os/blkfront.c b/extras/mini-os/blkfront.c --- a/extras/mini-os/blkfront.c +++ b/extras/mini-os/blkfront.c @@ -392,6 +392,7 @@ void blkfront_aio(struct blkfront_aiocb *aiocbp, int write) static void blkfront_aio_cb(struct blkfront_aiocb *aiocbp, int ret) { aiocbp->data = (void*) 1; + aiocbp->aio_cb = NULL; } void blkfront_io(struct blkfront_aiocb *aiocbp, int write) @@ -547,9 +548,150 @@ moretodo: #ifdef HAVE_LIBC int blkfront_open(struct blkfront_dev *dev) { + /* Silently prevent multiple opens */ + if(dev->fd != -1) { + return dev->fd; + } dev->fd = alloc_fd(FTYPE_BLK); printk("blk_open(%s) -> %d\n", dev->nodename, dev->fd); files[dev->fd].blk.dev = dev; + files[dev->fd].blk.offset = 0; return dev->fd; } + +int blkfront_posix_rwop(int fd, uint8_t* buf, size_t count, int write) +{ + struct blkfront_dev* dev = files[fd].blk.dev; + off_t offset = files[fd].blk.offset; + struct blkfront_aiocb aiocb; + unsigned long long disksize = dev->info.sectors * dev->info.sector_size; + unsigned int blocksize = dev->info.sector_size; + + int blknum; + int blkoff; + size_t bytes; + int rc = 0; + int alignedbuf = 0; + uint8_t* copybuf = NULL; + + /* RW 0 bytes is just a NOP */ + if(count == 0) { + return 0; + } + /* Check for NULL buffer */ + if( buf == NULL ) { + errno = EFAULT; + return -1; + } + + /* Write mode checks */ + if(write) { + /*Make sure we have write permission */ + if(dev->info.info & VDISK_READONLY || (dev->info.mode != O_RDWR && dev->info.mode != O_WRONLY)) { + errno = EACCES; + return -1; + } + /*Make sure disk is big enough for this write */ + if(offset + count > disksize) { + errno = ENOSPC; + return -1; + } + } + /* Read mode checks */ + else + { + /*If the requested read is bigger than the disk, just + * read as much as we can until the end */ + if(offset + count > disksize) { + count = offset >= disksize ? 0 : disksize - offset; + } + } + /* Determine which block to start at and which offset inside of it */ + blknum = offset / blocksize; + blkoff = offset % blocksize; + + /* Optimization: We need to check if buf is aligned to the sector size. + * This is somewhat tricky code. We have to add the blocksize - block offset + * because the first block may be a partial block and then for every subsequent + * block rw the buffer will be offset.*/ + if(!((uintptr_t) (buf +(blocksize - blkoff)) & (dev->info.sector_size-1))) { + alignedbuf = 1; + } + + /* Setup aiocb block object */ + aiocb.aio_dev = dev; + aiocb.aio_nbytes = blocksize; + aiocb.aio_offset = blknum * blocksize; + aiocb.aio_cb = NULL; + aiocb.data = NULL; + + /* If our buffer is unaligned or its aligned but we will need to rw a partial block + * then a copy will have to be done */ + if(!alignedbuf || blkoff != 0 || count % blocksize != 0) { + copybuf = _xmalloc(blocksize, dev->info.sector_size); + } + + rc = count; + while(count > 0) { + /* determine how many bytes to read/write from/to the current block buffer */ + bytes = count > (blocksize - blkoff) ? blocksize - blkoff : count; + + /* read operation */ + if(!write) { + if (alignedbuf && bytes >= blocksize) { + /* If aligned and were reading a whole block, just read right into buf */ + aiocb.aio_buf = buf; + blkfront_read(&aiocb); + } else { + /* If not then we have to do a copy */ + aiocb.aio_buf = copybuf; + blkfront_read(&aiocb); + memcpy(buf, ©buf[blkoff], bytes); + } + } + /* Write operation */ + else { + if(alignedbuf && bytes >= blocksize) { + /* If aligned and were writing a whole block, just write directly from buf */ + aiocb.aio_buf = buf; + blkfront_write(&aiocb); + } else { + /* If not then we have to do a copy. */ + aiocb.aio_buf = copybuf; + /* If we''re writing a partial block, we need to read the current contents first + * so we don''t overwrite the extra bits with garbage */ + if(blkoff != 0 || bytes < blocksize) { + blkfront_read(&aiocb); + } + memcpy(©buf[blkoff], buf, bytes); + blkfront_write(&aiocb); + } + } + /* Will start at beginning of all remaining blocks */ + blkoff = 0; + + /* Increment counters and continue */ + count -= bytes; + buf += bytes; + aiocb.aio_offset += blocksize; + } + + free(copybuf); + files[fd].blk.offset += rc; + return rc; + +} + +int blkfront_posix_fstat(int fd, struct stat* buf) +{ + struct blkfront_dev* dev = files[fd].blk.dev; + + buf->st_mode = dev->info.mode; + buf->st_uid = 0; + buf->st_gid = 0; + buf->st_size = dev->info.sectors * dev->info.sector_size; + buf->st_atime = buf->st_mtime = buf->st_ctime = time(NULL); + + return 0; +} #endif diff --git a/extras/mini-os/include/blkfront.h b/extras/mini-os/include/blkfront.h --- a/extras/mini-os/include/blkfront.h +++ b/extras/mini-os/include/blkfront.h @@ -28,7 +28,17 @@ struct blkfront_info }; struct blkfront_dev *init_blkfront(char *nodename, struct blkfront_info *info); #ifdef HAVE_LIBC +#include <sys/stat.h> +/* POSIX IO functions: + * use blkfront_open() to get a file descriptor to the block device + * Don''t use the other blkfront posix functions here directly, instead use + * read(), write(), lseek() and fstat() on the file descriptor + */ int blkfront_open(struct blkfront_dev *dev); +int blkfront_posix_rwop(int fd, uint8_t* buf, size_t count, int write); +#define blkfront_posix_write(fd, buf, count) blkfront_posix_rwop(fd, (uint8_t*)buf, count, 1) +#define blkfront_posix_read(fd, buf, count) blkfront_posix_rwop(fd, (uint8_t*)buf, count, 0) +int blkfront_posix_fstat(int fd, struct stat* buf); #endif void blkfront_aio(struct blkfront_aiocb *aiocbp, int write); #define blkfront_aio_read(aiocbp) blkfront_aio(aiocbp, 0) diff --git a/extras/mini-os/include/lib.h b/extras/mini-os/include/lib.h --- a/extras/mini-os/include/lib.h +++ b/extras/mini-os/include/lib.h @@ -174,6 +174,7 @@ extern struct file { } tap; struct { struct blkfront_dev *dev; + off_t offset; } blk; struct { struct kbdfront_dev *dev; diff --git a/extras/mini-os/lib/sys.c b/extras/mini-os/lib/sys.c --- a/extras/mini-os/lib/sys.c +++ b/extras/mini-os/lib/sys.c @@ -289,6 +289,11 @@ int read(int fd, void *buf, size_t nbytes) return ret * sizeof(union xenfb_in_event); } #endif +#ifdef CONFIG_BLKFRONT + case FTYPE_BLK: { + return blkfront_posix_read(fd, buf, nbytes); + } +#endif default: break; } @@ -321,6 +326,10 @@ int write(int fd, const void *buf, size_t nbytes) netfront_xmit(files[fd].tap.dev, (void*) buf, nbytes); return nbytes; #endif +#ifdef CONFIG_BLKFRONT + case FTYPE_BLK: + return blkfront_posix_write(fd, buf, nbytes); +#endif default: break; } @@ -331,8 +340,37 @@ int write(int fd, const void *buf, size_t nbytes) off_t lseek(int fd, off_t offset, int whence) { - errno = ESPIPE; - return (off_t) -1; + switch(files[fd].type) { +#ifdef CONFIG_BLKFRONT + case FTYPE_BLK: + switch (whence) { + case SEEK_SET: + files[fd].file.offset = offset; + break; + case SEEK_CUR: + files[fd].file.offset += offset; + break; + case SEEK_END: + { + struct stat st; + int ret; + ret = fstat(fd, &st); + if (ret) + return -1; + files[fd].file.offset = st.st_size + offset; + break; + } + default: + errno = EINVAL; + return -1; + } + return files[fd].file.offset; + break; +#endif + default: /* Not implemented on this FTYPE */ + errno = ESPIPE; + return (off_t) -1; + } } int fsync(int fd) { @@ -445,6 +483,10 @@ int fstat(int fd, struct stat *buf) buf->st_ctime = time(NULL); return 0; } +#ifdef CONFIG_BLKFRONT + case FTYPE_BLK: + return blkfront_posix_fstat(fd, buf); +#endif default: break; } _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Samuel Thibault
2012-Sep-17 22:46 UTC
Re: [PATCH mini-os enhancements for vtpm 2/8] add posix io to blkfront
Matthew Fioravante, le Mon 17 Sep 2012 17:55:43 -0400, a écrit :> + /* Read mode checks */ > + else > + { > + /*If the requested read is bigger than the disk, just > + * read as much as we can until the end */ > + if(offset + count > disksize) { > + count = offset >= disksize ? 0 : disksize - offset; > + } > + }Perhaps return 0 here already instead of just setting count to 0?> + > + /* Setup aiocb block object */ > + aiocb.aio_dev = dev; > + aiocb.aio_nbytes = blocksize; > + aiocb.aio_offset = blknum * blocksize; > + aiocb.aio_cb = NULL; > + aiocb.data = NULL; > + > + /* If our buffer is unaligned or its aligned but we will need to rw > a partial block > + * then a copy will have to be done */ > + if(!alignedbuf || blkoff != 0 || count % blocksize != 0) { > + copybuf = _xmalloc(blocksize, dev->info.sector_size); > + } > + > + rc = count; > + while(count > 0) { > + /* determine how many bytes to read/write from/to the current > block buffer */ > + bytes = count > (blocksize - blkoff) ? blocksize - blkoff : count;Mmm. Optimizing the non-aligned case would make the code tricky, but shouldn''t optimizing the aligned case at least a little bit not too hard? i.e. set bytes to max(count, BLKIF_MAX_SEGMENTS_PER_REQUEST*PAGE_SIZE); in the optimized case? That''d get much better 44KiB transfers instead of one sector at a time. Ideally we should even push a series of aio requests, but that''s a lot less easy since you need to know how many requests you can afford. Apart from that, Acked-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Ian Campbell
2012-Sep-18 07:27 UTC
Re: [PATCH mini-os enhancements for vtpm 2/8] add posix io to blkfront
> diff --git a/extras/mini-os/include/lib.h b/extras/mini-os/include/lib.h > --- a/extras/mini-os/include/lib.h > +++ b/extras/mini-os/include/lib.h > @@ -174,6 +174,7 @@ extern struct file { > } tap; > struct { > struct blkfront_dev *dev; > + off_t offset;In indentation here and in several other places in this patch is all over the place. Please try and stick to the prevailing style (tabs vs spaces, numbers of them, brace placement etc) in the surrounding code.> } blk; > struct { > struct kbdfront_dev *dev; > diff --git a/extras/mini-os/lib/sys.c b/extras/mini-os/lib/sys.c > --- a/extras/mini-os/lib/sys.c > +++ b/extras/mini-os/lib/sys.c > @@ -289,6 +289,11 @@ int read(int fd, void *buf, size_t nbytes) > return ret * sizeof(union xenfb_in_event); > } > #endif > +#ifdef CONFIG_BLKFRONT > + case FTYPE_BLK: { > + return blkfront_posix_read(fd, buf, nbytes); > + } > +#endif > default: > break; > } > @@ -321,6 +326,10 @@ int write(int fd, const void *buf, size_t nbytes) > netfront_xmit(files[fd].tap.dev, (void*) buf, nbytes); > return nbytes; > #endif > +#ifdef CONFIG_BLKFRONT > + case FTYPE_BLK: > + return blkfront_posix_write(fd, buf, nbytes); > +#endif > default: > break; > } > @@ -331,8 +340,37 @@ int write(int fd, const void *buf, size_t nbytes) > > off_t lseek(int fd, off_t offset, int whence) > { > - errno = ESPIPE; > - return (off_t) -1; > + switch(files[fd].type) { > +#ifdef CONFIG_BLKFRONTThe whitespace in the rest of this hunk is particularly confused.> + case FTYPE_BLK: > + switch (whence) { > + case SEEK_SET: > + files[fd].file.offset = offset; > + break; > + case SEEK_CUR: > + files[fd].file.offset += offset; > + break; > + case SEEK_END: > + { > + struct stat st; > + int ret; > + ret = fstat(fd, &st); > + if (ret) > + return -1; > + files[fd].file.offset = st.st_size + offset; > + break; > + } > + default: > + errno = EINVAL; > + return -1; > + } > + return files[fd].file.offset; > + break; > +#endif > + default: /* Not implemented on this FTYPE */ > + errno = ESPIPE; > + return (off_t) -1; > + } > } > > int fsync(int fd) { > @@ -445,6 +483,10 @@ int fstat(int fd, struct stat *buf) > buf->st_ctime = time(NULL); > return 0; > } > +#ifdef CONFIG_BLKFRONT > + case FTYPE_BLK: > + return blkfront_posix_fstat(fd, buf); > +#endif > default: > break; > } > >
Matthew Fioravante
2012-Oct-05 18:02 UTC
Re: [PATCH mini-os enhancements for vtpm 2/8] add posix io to blkfront
On 09/17/2012 06:46 PM, Samuel Thibault wrote:> Matthew Fioravante, le Mon 17 Sep 2012 17:55:43 -0400, a écrit : >> + /* Read mode checks */ >> + else >> + { >> + /*If the requested read is bigger than the disk, just >> + * read as much as we can until the end */ >> + if(offset + count > disksize) { >> + count = offset >= disksize ? 0 : disksize - offset; >> + } >> + } > Perhaps return 0 here already instead of just setting count to 0?I agree> >> + >> + /* Setup aiocb block object */ >> + aiocb.aio_dev = dev; >> + aiocb.aio_nbytes = blocksize; >> + aiocb.aio_offset = blknum * blocksize; >> + aiocb.aio_cb = NULL; >> + aiocb.data = NULL; >> + >> + /* If our buffer is unaligned or its aligned but we will need to rw >> a partial block >> + * then a copy will have to be done */ >> + if(!alignedbuf || blkoff != 0 || count % blocksize != 0) { >> + copybuf = _xmalloc(blocksize, dev->info.sector_size); >> + } >> + >> + rc = count; >> + while(count > 0) { >> + /* determine how many bytes to read/write from/to the current >> block buffer */ >> + bytes = count > (blocksize - blkoff) ? blocksize - blkoff : count; > Mmm. Optimizing the non-aligned case would make the code > tricky, but shouldn''t optimizing the aligned case at least > a little bit not too hard? i.e. set bytes to max(count, > BLKIF_MAX_SEGMENTS_PER_REQUEST*PAGE_SIZE); in the optimized case? That''d > get much better 44KiB transfers instead of one sector at a time. > > Ideally we should even push a series of aio requests, but that''s a lot > less easy since you need to know how many requests you can afford.See latest v2 patch> Apart from that, > > Acked-by: Samuel Thibault <samuel.thibault@ens-lyon.org>_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Samuel Thibault
2012-Oct-08 08:05 UTC
Re: [PATCH mini-os enhancements for vtpm 2/8] add posix io to blkfront
Matthew Fioravante, le Fri 05 Oct 2012 14:02:06 -0400, a écrit :> See latest v2 patchIt looks good, except why BLKIF_MAX_SEGMENTS_PER_REQUEST-1 and not BLKIF_MAX_SEGMENTS_PER_REQUEST? Samuel
Matthew Fioravante
2012-Oct-08 14:34 UTC
Re: [PATCH mini-os enhancements for vtpm 2/8] add posix io to blkfront
The math done by blkfront doesn''t always work if your use BLKIF_MAX_SEGMENTS_PER_REQUEST (11). Where you do: ASSERT(n <= BLKIF_MAX_SEGMENTS_PER_REQUEST); Sometimes n comes out to be BLKIF_MAX_SEGEMENTS_PER_REQUEST +1. This happens when your buffer address is sector aligned (512k), but not page aligned (4096k). So we can either optimize yet again if address is page aligned or use BLKIF_MAX_SEGMENTS_PER_REQUEST-1; Is the page math being done above that assertion correct? On 10/08/2012 04:05 AM, Samuel Thibault wrote:> Matthew Fioravante, le Fri 05 Oct 2012 14:02:06 -0400, a écrit : >> See latest v2 patch > It looks good, except why BLKIF_MAX_SEGMENTS_PER_REQUEST-1 and not > BLKIF_MAX_SEGMENTS_PER_REQUEST? > > Samuel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Samuel Thibault
2012-Oct-08 17:10 UTC
Re: [PATCH mini-os enhancements for vtpm 2/8] add posix io to blkfront
Matthew Fioravante, le Mon 08 Oct 2012 10:34:24 -0400, a écrit :> Sometimes n comes out to be BLKIF_MAX_SEGEMENTS_PER_REQUEST +1. > > This happens when your buffer address is sector aligned (512k), but > not page aligned (4096k). > So we can either optimize yet again if address is page > alignedAh, right. Well, I''d say it''s worth making the effort to test against that, i.e. (untested) else { if (count >= BLKIF_MAX_SEGMENTS_PER_REQUEST*PAGE_SIZE && (!(buf & ~(PAGE_SIZE-1)))) /* page-aligned */ bytes = BLKIF_MAX_SEGMENTS_PER_REQUEST*PAGE_SIZE; else if (count >= (BLKIF_MAX_SEGMENTS_PER_REQUEST-1)*PAGE_SIZE) /* only sector-aligned, will have to realign */ bytes = (BLKIF_MAX_SEGMENTS_PER_REQUEST-1)*PAGE_SIZE; else bytes = count & ~(blocksize - 1); } }> Is the page math being done above that assertion correct?I believe so. Samuel
Matthew Fioravante
2012-Oct-08 17:34 UTC
Re: [PATCH mini-os enhancements for vtpm 2/8] add posix io to blkfront
On 10/08/2012 01:10 PM, Samuel Thibault wrote:> Matthew Fioravante, le Mon 08 Oct 2012 10:34:24 -0400, a écrit : >> Sometimes n comes out to be BLKIF_MAX_SEGEMENTS_PER_REQUEST +1. >> >> This happens when your buffer address is sector aligned (512k), but >> not page aligned (4096k). >> So we can either optimize yet again if address is page >> aligned > Ah, right. Well, I''d say it''s worth making the effort to test against > that, i.e. (untested) > > else { > if (count >= BLKIF_MAX_SEGMENTS_PER_REQUEST*PAGE_SIZE > && (!(buf & ~(PAGE_SIZE-1)))) > /* page-aligned */ > bytes = BLKIF_MAX_SEGMENTS_PER_REQUEST*PAGE_SIZE; > else if (count >= (BLKIF_MAX_SEGMENTS_PER_REQUEST-1)*PAGE_SIZE) > /* only sector-aligned, will have to realign */ > bytes = (BLKIF_MAX_SEGMENTS_PER_REQUEST-1)*PAGE_SIZE; > else > bytes = count & ~(blocksize - 1); > } > }I tested and you can do BLKIF_MAX_SEGEMENTS_PER_REQUEST if the buffer is page aligned. I''m sending a new patch with this optimization.> >> Is the page math being done above that assertion correct? > I believe so. > > Samuel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel