Isaku Yamahata
2007-Aug-28 06:17 UTC
[Xen-devel] [PATCH 1/3] xencomm take 4: xen side fix invalid or racy access
# HG changeset patch # User yamahata@valinux.co.jp # Date 1188279813 -32400 # Node ID fa5615e8bf84df1d5b4f9d87e95359692a65dbcb # Parent 1e5af5e99b791c682a284c208873ecf40ec8fc19 [xen, xencomm] fix various xencomm invalid racy access. - Xencomm should check struct xencomm_desc alignment. - Xencomm should check whether struct xencomm_desc itself (8 bytes) doesn''t cross page boundary. Otherwise a hostile guest kernel can pass such a pointer that may across page boundary. Then xencomm accesses a unrelated page. - Xencomm shouldn''t access struct xencomm_desc::nr_addrs multiple times. Copy it to local area and use the copy. Otherwise a hostile guest can modify at the same time. - Xencomm should check whether struct xencomm_desc::address[] array crosses page boundary. Otherwise xencomm may access unrelated pages. - Xencomm should get_page()/put_page() after address conversion from paddr to maddr because xen supports SMP and balloon driver. Otherwise another vcpu may free the page at the same time. Such a domain behaviour doesn''t make sense, however nothing prevents it. PATCHNAME: fix_xencomm_invalid_racy_access Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp> diff -r 1e5af5e99b79 -r fa5615e8bf84 xen/common/xencomm.c --- a/xen/common/xencomm.c Tue Aug 28 14:51:20 2007 +0900 +++ b/xen/common/xencomm.c Tue Aug 28 14:43:33 2007 +0900 @@ -34,9 +34,154 @@ #endif static void* -xencomm_maddr_to_vaddr(unsigned long maddr) -{ - return maddr ? maddr_to_virt(maddr) : NULL; +xencomm_vaddr(unsigned long paddr, struct page_info *page) +{ + return (void*)((paddr & ~PAGE_MASK) | (unsigned long)page_to_virt(page)); +} + +/* get_page() to prevent from another vcpu freeing the page */ +static int +xencomm_get_page(unsigned long paddr, struct page_info **page) +{ + unsigned long maddr = paddr_to_maddr(paddr); + if ( maddr == 0 ) + return -EFAULT; + + *page = maddr_to_page(maddr); + if ( get_page(*page, current->domain) == 0 ) + { + if ( page_get_owner(*page) != current->domain ) + { + /* + * This page might be a page granted by another domain, or + * this page is freed with decrease reservation hypercall at + * the same time. + */ + gdprintk(XENLOG_WARNING, + "bad page is passed. paddr 0x%lx maddr 0x%lx\n", + paddr, maddr); + return -EFAULT; + } + + /* Try again. */ + cpu_relax(); + return -EAGAIN; + } + + return 0; +} + +/* check if struct desc doesn''t cross page boundry */ +static int +xencomm_desc_cross_page_boundary(unsigned long paddr) +{ + unsigned long offset = paddr & ~PAGE_MASK; + if ( offset > PAGE_SIZE - sizeof(struct xencomm_desc) ) + return 1; + return 0; +} + +struct xencomm_ctxt { + struct xencomm_desc *desc; + + uint32_t nr_addrs; + struct page_info *page; +}; + +static uint32_t +xencomm_ctxt_nr_addrs(const struct xencomm_ctxt *ctxt) +{ + return ctxt->nr_addrs; +} + +static unsigned long* +xencomm_ctxt_address(struct xencomm_ctxt *ctxt, int i) +{ + return &ctxt->desc->address[i]; +} + +/* check if struct xencomm_desc and address array cross page boundary */ +static int +xencomm_ctxt_cross_page_boundary(struct xencomm_ctxt *ctxt) +{ + unsigned long saddr = (unsigned long)ctxt->desc; + unsigned long eaddr + (unsigned long)&ctxt->desc->address[ctxt->nr_addrs] - 1; + if ( (saddr >> PAGE_SHIFT) != (eaddr >> PAGE_SHIFT) ) + return 1; + return 0; +} + +static int +xencomm_ctxt_init(const void* handle, struct xencomm_ctxt *ctxt) +{ + struct page_info *page; + struct xencomm_desc *desc; + int ret; + + /* avoid unaligned access */ + if ( (unsigned long)handle % __alignof__(*desc) != 0 ) + return -EINVAL; + if ( xencomm_desc_cross_page_boundary((unsigned long)handle) ) + return -EINVAL; + + /* first we need to access the descriptor */ + ret = xencomm_get_page((unsigned long)handle, &page); + if ( ret ) + return ret; + + desc = xencomm_vaddr((unsigned long)handle, page); + if ( desc->magic != XENCOMM_MAGIC ) + { + printk("%s: error: %p magic was 0x%x\n", __func__, desc, desc->magic); + put_page(page); + return -EINVAL; + } + + ctxt->desc = desc; + ctxt->nr_addrs = desc->nr_addrs; /* copy before use. + * It is possible for a guest domain to + * modify concurrently. + */ + ctxt->page = page; + if ( xencomm_ctxt_cross_page_boundary(ctxt) ) + { + put_page(page); + return -EINVAL; + } + return 0; +} + +static void +xencomm_ctxt_done(struct xencomm_ctxt *ctxt) +{ + put_page(ctxt->page); +} + +static int +xencomm_copy_chunk_from( + unsigned long to, unsigned long paddr, unsigned int len) +{ + struct page_info *page; + + while (1) + { + int res; + res = xencomm_get_page(paddr, &page); + if ( res != 0 ) + { + if ( res == -EAGAIN ) + continue; /* Try again. */ + return res; + } + xc_dprintk("%lx[%d] -> %lx\n", + (unsigned long)xencomm_vaddr(paddr, page), len, to); + + memcpy((void *)to, xencomm_vaddr(paddr, page), len); + put_page(page); + return 0; + } + /* NOTREACHED */ } static unsigned long @@ -48,14 +193,12 @@ xencomm_inline_from_guest( while ( n > 0 ) { unsigned int chunksz, bytes; - unsigned long src_maddr; chunksz = PAGE_SIZE - (src_paddr % PAGE_SIZE); bytes = min(chunksz, n); - src_maddr = paddr_to_maddr(src_paddr); - xc_dprintk("%lx[%d] -> %lx\n", src_maddr, bytes, (unsigned long)to); - memcpy(to, maddr_to_virt(src_maddr), bytes); + if ( xencomm_copy_chunk_from((unsigned long)to, src_paddr, bytes) ) + return n; src_paddr += bytes; to += bytes; n -= bytes; @@ -81,7 +224,7 @@ xencomm_copy_from_guest( xencomm_copy_from_guest( void *to, const void *from, unsigned int n, unsigned int skip) { - struct xencomm_desc *desc; + struct xencomm_ctxt ctxt; unsigned int from_pos = 0; unsigned int to_pos = 0; unsigned int i = 0; @@ -89,26 +232,14 @@ xencomm_copy_from_guest( if ( xencomm_is_inline(from) ) return xencomm_inline_from_guest(to, from, n, skip); - /* First we need to access the descriptor. */ - desc = (struct xencomm_desc *) - xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)from)); - if ( desc == NULL ) + if ( xencomm_ctxt_init(from, &ctxt) ) return n; - if ( desc->magic != XENCOMM_MAGIC ) - { - printk("%s: error: %p magic was 0x%x\n", - __func__, desc, desc->magic); - return n; - } - - /* Iterate through the descriptor, copying up to a page at a time. */ - while ( (to_pos < n) && (i < desc->nr_addrs) ) - { - unsigned long src_paddr = desc->address[i]; - unsigned int pgoffset; - unsigned int chunksz; - unsigned int chunk_skip; + /* Iterate through the descriptor, copying up to a page at a time */ + while ( (to_pos < n) && (i < xencomm_ctxt_nr_addrs(&ctxt)) ) + { + unsigned long src_paddr = *xencomm_ctxt_address(&ctxt, i); + unsigned int pgoffset, chunksz, chunk_skip; if ( src_paddr == XENCOMM_INVALID ) { @@ -124,18 +255,13 @@ xencomm_copy_from_guest( chunksz -= chunk_skip; skip -= chunk_skip; - if ( (skip == 0) && (chunksz > 0) ) - { - unsigned long src_maddr; - unsigned long dest = (unsigned long)to + to_pos; + if ( skip == 0 && chunksz > 0 ) + { unsigned int bytes = min(chunksz, n - to_pos); - src_maddr = paddr_to_maddr(src_paddr + chunk_skip); - if ( src_maddr == 0 ) - return n - to_pos; - - xc_dprintk("%lx[%d] -> %lx\n", src_maddr, bytes, dest); - memcpy((void *)dest, maddr_to_virt(src_maddr), bytes); + if ( xencomm_copy_chunk_from((unsigned long)to + to_pos, + src_paddr + chunk_skip, bytes) ) + goto out; from_pos += bytes; to_pos += bytes; } @@ -143,7 +269,35 @@ xencomm_copy_from_guest( i++; } +out: + xencomm_ctxt_done(&ctxt); return n - to_pos; +} + +static int +xencomm_copy_chunk_to( + unsigned long paddr, unsigned long from, unsigned int len) +{ + struct page_info *page; + + while (1) + { + int res; + res = xencomm_get_page(paddr, &page); + if ( res != 0 ) + { + if ( res == -EAGAIN ) + continue; /* Try again. */ + return res; + } + xc_dprintk("%lx[%d] -> %lx\n", from, len, + (unsigned long)xencomm_vaddr(paddr, page)); + + memcpy(xencomm_vaddr(paddr, page), (void *)from, len); + put_page(page); + return 0; + } + /* NOTREACHED */ } static unsigned long @@ -155,14 +309,12 @@ xencomm_inline_to_guest( while ( n > 0 ) { unsigned int chunksz, bytes; - unsigned long dest_maddr; chunksz = PAGE_SIZE - (dest_paddr % PAGE_SIZE); bytes = min(chunksz, n); - dest_maddr = paddr_to_maddr(dest_paddr); - xc_dprintk("%lx[%d] -> %lx\n", (unsigned long)from, bytes, dest_maddr); - memcpy(maddr_to_virt(dest_maddr), (void *)from, bytes); + if ( xencomm_copy_chunk_to(dest_paddr, (unsigned long)from, bytes) ) + return n; dest_paddr += bytes; from += bytes; n -= bytes; @@ -188,7 +340,7 @@ xencomm_copy_to_guest( xencomm_copy_to_guest( void *to, const void *from, unsigned int n, unsigned int skip) { - struct xencomm_desc *desc; + struct xencomm_ctxt ctxt; unsigned int from_pos = 0; unsigned int to_pos = 0; unsigned int i = 0; @@ -196,22 +348,13 @@ xencomm_copy_to_guest( if ( xencomm_is_inline(to) ) return xencomm_inline_to_guest(to, from, n, skip); - /* First we need to access the descriptor. */ - desc = (struct xencomm_desc *) - xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)to)); - if ( desc == NULL ) + if ( xencomm_ctxt_init(to, &ctxt) ) return n; - if ( desc->magic != XENCOMM_MAGIC ) - { - printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic); - return n; - } - - /* Iterate through the descriptor, copying up to a page at a time. */ - while ( (from_pos < n) && (i < desc->nr_addrs) ) - { - unsigned long dest_paddr = desc->address[i]; + /* Iterate through the descriptor, copying up to a page at a time */ + while ( (from_pos < n) && (i < xencomm_ctxt_nr_addrs(&ctxt)) ) + { + unsigned long dest_paddr = *xencomm_ctxt_address(&ctxt, i); unsigned int pgoffset, chunksz, chunk_skip; if ( dest_paddr == XENCOMM_INVALID ) @@ -228,18 +371,13 @@ xencomm_copy_to_guest( chunksz -= chunk_skip; skip -= chunk_skip; - if ( (skip == 0) && (chunksz > 0) ) - { - unsigned long dest_maddr; - unsigned long source = (unsigned long)from + from_pos; + if ( skip == 0 && chunksz > 0 ) + { unsigned int bytes = min(chunksz, n - from_pos); - dest_maddr = paddr_to_maddr(dest_paddr + chunk_skip); - if ( dest_maddr == 0 ) - return n - from_pos; - - xc_dprintk("%lx[%d] -> %lx\n", source, bytes, dest_maddr); - memcpy(maddr_to_virt(dest_maddr), (void *)source, bytes); + if ( xencomm_copy_chunk_to(dest_paddr + chunk_skip, + (unsigned long)from + from_pos, bytes) ) + goto out; from_pos += bytes; to_pos += bytes; } @@ -247,6 +385,8 @@ xencomm_copy_to_guest( i++; } +out: + xencomm_ctxt_done(&ctxt); return n - from_pos; } @@ -260,29 +400,23 @@ static int xencomm_inline_add_offset(voi * exhausted pages to XENCOMM_INVALID. */ int xencomm_add_offset(void **handle, unsigned int bytes) { - struct xencomm_desc *desc; + struct xencomm_ctxt ctxt; int i = 0; if ( xencomm_is_inline(*handle) ) return xencomm_inline_add_offset(handle, bytes); - /* First we need to access the descriptor. */ - desc = (struct xencomm_desc *) - xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)*handle)); - if ( desc == NULL ) + if ( xencomm_ctxt_init(handle, &ctxt) ) return -1; - if ( desc->magic != XENCOMM_MAGIC ) - { - printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic); - return -1; - } - - /* Iterate through the descriptor incrementing addresses. */ - while ( (bytes > 0) && (i < desc->nr_addrs) ) - { - unsigned long dest_paddr = desc->address[i]; - unsigned int pgoffset, chunksz, chunk_skip; + /* Iterate through the descriptor incrementing addresses */ + while ( (bytes > 0) && (i < xencomm_ctxt_nr_addrs(&ctxt)) ) + { + unsigned long *address = xencomm_ctxt_address(&ctxt, i); + unsigned long dest_paddr = *address; + unsigned int pgoffset; + unsigned int chunksz; + unsigned int chunk_skip; if ( dest_paddr == XENCOMM_INVALID ) { @@ -295,33 +429,39 @@ int xencomm_add_offset(void **handle, un chunk_skip = min(chunksz, bytes); if ( chunk_skip == chunksz ) - desc->address[i] = XENCOMM_INVALID; /* exchausted this page */ + *address = XENCOMM_INVALID; /* exhausted this page */ else - desc->address[i] += chunk_skip; + *address += chunk_skip; bytes -= chunk_skip; i++; } - + xencomm_ctxt_done(&ctxt); return 0; } int xencomm_handle_is_null(void *handle) { - struct xencomm_desc *desc; + struct xencomm_ctxt ctxt; int i; + int res = 1; if ( xencomm_is_inline(handle) ) return xencomm_inline_addr(handle) == 0; - desc = (struct xencomm_desc *) - xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)handle)); - if ( desc == NULL ) + if ( xencomm_ctxt_init(handle, &ctxt) ) return 1; - for ( i = 0; i < desc->nr_addrs; i++ ) - if ( desc->address[i] != XENCOMM_INVALID ) - return 0; - - return 1; -} + for ( i = 0; i < xencomm_ctxt_nr_addrs(&ctxt); i++ ) + { + if ( *xencomm_ctxt_address(&ctxt, i) != XENCOMM_INVALID ) + { + res = 0; + goto out; + } + } + +out: + xencomm_ctxt_done(&ctxt); + return res; +} _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel