Hi, I''m trying to enable a VNIF driver module in an unmodified Redhat EL3 kernel. Now, it can work, but after some network communication, the guest kernel will crash when running some command like ifconfig, tcpdump, ttcp. For different applications, they are at the different place. but each time, it is same. for example, with ifconfig, it is in sock_create(): if ((i = net_families[family]->create(sock, protocol)) < 0) the net_families[family] is a illegal point, net_families is 0xc03180a0 one problem is I use kernel 2.6.10 as the xen0, Redhat EL3 is 2.4.21 kernel, the backend net driver skb->head is page aligned, but in netfront.c it is not, like 0xcxxxx800. so I did a workround as following patch, can this cause problem? @@ -353,18 +359,20 @@ * ourself and for other kernel subsystems. */ batch_target = np->rx_target - (req_prod - np->rx_resp_cons); - if (unlikely((skb = alloc_xen_skb(dev->mtu + RX_HEADROOM)) =NULL)) + if ( unlikely((skb = alloc_xen_skb(dev->mtu + RX_HEADROOM + 1500)) == NULL) ) break; __skb_queue_tail(&np->rx_batch, skb); } Another main code I did is the do_update_va_mapping hypercall. I wrote a new implement for unmodified guest domain. see the patch. anyone can see the problem in it? (some commented code are what I''ve ever tried but still same result) Index: arch/x86/mm.c ==================================================================--- arch/x86/mm.c (revision 512) +++ arch/x86/mm.c (working copy) @@ -1707,7 +1757,14 @@ break; } - if ( unlikely(__copy_from_user(&req, ureqs, sizeof(req)) != 0) ) + if(VMX_DOMAIN(current)){ + vbdprintk("copy form guest\n"); + rc = copy_from_guest(&req, ureqs, sizeof(req)); + } + else { + rc = __copy_from_user(&req, ureqs, sizeof(req)); + } + if ( unlikely((rc) != 0) ) { MEM_LOG("Bad __copy_from_user"); rc = -EFAULT; @@ -1716,7 +1773,7 @@ cmd = req.ptr & (sizeof(l1_pgentry_t)-1); pfn = req.ptr >> PAGE_SHIFT; - + vbdprintk("req.ptr, cmd, pfn:%x, %x, %x\n", req.ptr, cmd, pfn); okay = 0; switch ( cmd ) @@ -1911,7 +1968,92 @@ return rc; } +void shadow_map_l1_into_current_l2(unsigned long va); +int do_update_vmx_va_mapping(unsigned long va, + unsigned long val, + unsigned long flags) +{ + struct exec_domain *ed = current; + struct domain *d = ed->domain; + int err = 0; + unsigned int cpu = ed->processor; + unsigned long deferred_ops; + unsigned long gpa; + unsigned long sval = 0; + vnifprintk("do_update_va_mapping, va:%p, val:%p, flags:%d\n", va, val, flags); + vnifprintk("shadow_mode:%x\n", d->arch.shadow_mode); + + LOCK_BIGLOCK(d); + +// cleanup_writable_pagetable(d); + + /* + * XXX When we make this support 4MB superpages we should also deal with + * the case of updating L2 entries. + */ + +#if 0 + gval = val ? ((va - KERNEL_PAGE_OFFSET) & PAGE_MASK) | + (val & ~PAGE_MASK) : 0; +#endif + + gpa = gva_to_gpa(va); + if(gpa) { + if(val) + set_phystomachine(gpa >> PAGE_SHIFT, + val >> PAGE_SHIFT); + else + set_phystomachine(gpa >> PAGE_SHIFT, ~0UL); + } + +#if 0 + if ( unlikely(!mod_vmx_l1_entry(va, mk_l1_pgentry(gval))) ) { + printk("mod l1 error\n"); + err = -EINVAL; + } +#endif + + sval = val; + + if((va >> 16) == 0xc031) + printk("c031 va:%p\n", va); + + vnifprintk("shadow_mode_enabled:%p, %p\n", val, sval); + + if ( unlikely(__put_user(sval, ((unsigned long *)( + &shadow_linear_pg_table[l1_linear_offset(va)])))) ) + { + printk("put_user shadow error:%p %p\n", va, sval,val); + printk("spgd:%p\n", ed->arch.shadow_vtable[va>>L2_PAGETABLE_SHIFT]); + + shadow_map_l1_into_current_l2(va); + if(__put_user(sval, ((unsigned long *)( + &shadow_linear_pg_table[l1_linear_offset(va)])))) + printk("still put_user shadow error:%p %p\n", va, val); + + check_pagetable(d, ed->arch.guest_table, "va"); /* debug */ + } + + deferred_ops = percpu_info[cpu].deferred_ops; + percpu_info[cpu].deferred_ops = 0; + + if ( unlikely(deferred_ops & DOP_FLUSH_TLB) || + unlikely(flags & UVMF_FLUSH_TLB) ) + local_flush_tlb(); + else if ( unlikely(flags & UVMF_INVLPG) ) + __flush_tlb_one(va); + + if ( unlikely(deferred_ops & DOP_RELOAD_LDT) ) + (void)map_ldt_shadow_page(0); + + UNLOCK_BIGLOCK(d); + + vnifprintk("exit va mapping\n"); + return err; + +} + int do_update_va_mapping(unsigned long va, unsigned long val, unsigned long flags) @@ -1924,6 +2066,11 @@ perfc_incrc(calls_to_update_va); + if(unlikely(VMX_DOMAIN(ed))) { + return do_update_vmx_va_mapping(va, val, flags); + } + + if ( unlikely(!__addr_ok(va)) ) return -EINVAL; @@ -1949,6 +2096,7 @@ l1pte_propagate_from_guest(d, &val, &sval); + vnifprintk("shadow_mode_enabled:%p, %p\n", val, sval); if ( unlikely(__put_user(sval, ((unsigned long *)( &shadow_linear_pg_table[l1_linear_offset(va)])))) ) { @@ -2975,3 +3123,5 @@ } #endif + + Index: include/asm-x86/mm.h ==================================================================--- include/asm-x86/mm.h (revision 512) +++ include/asm-x86/mm.h (working copy) @@ -150,6 +150,9 @@ free_domheap_page(page); } +#ifndef vnifprintk +#define vnifprintk(_a...) +#endif static inline int get_page(struct pfn_info *page, struct domain *domain) @@ -252,6 +255,21 @@ mfn = l1_pgentry_to_phys(pte) >> PAGE_SHIFT; return mfn; } + +static inline unsigned long set_phystomachine(unsigned long pfn, unsigned long ma) +{ + l1_pgentry_t pte; + if (__get_user(l1_pgentry_val(pte), (__phys_to_machine_mapping + pfn))) { + return 0; + } + l1_pgentry_val(pte) = (__pa(ma) & PAGE_MASK) | (l1_pgentry_val(pte) & ~PAGE_MASK); + l1_pgentry_val(pte) = ma << PAGE_SHIFT; + if(__put_user(l1_pgentry_val(pte), (__phys_to_machine_mapping + pfn))) { + return 0; + } + return ma; +} + #define set_machinetophys(_mfn, _pfn) machine_to_phys_mapping[(_mfn)] = (_pfn) #define DEFAULT_GDT_ENTRIES (LAST_RESERVED_GDT_ENTRY+1) ------------------- Ling Xiaofeng(Daniel) Intel China Software Center. xfling@users.sourceforge.net Opinions are my own and don’t represent those of my employer _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
> Another main code I did is the do_update_va_mapping hypercall. > I wrote a new implement for unmodified guest domain. > see the patch. anyone can see the problem in it? > (some commented code are what I''ve ever tried but still same result)Hmm, I think you''re confused: if you''re in a VT-x guest then you don''t need to use update_va_mpping as you can update your own pagetables directly and shadow mode will take care of propagating the update. Ian _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Pratt <mailto:m+Ian.Pratt@cl.cam.ac.uk> wrote:>> Another main code I did is the do_update_va_mapping hypercall. >> I wrote a new implement for unmodified guest domain. >> see the patch. anyone can see the problem in it? >> (some commented code are what I''ve ever tried but still same result) > > Hmm, I think you''re confused: if you''re in a VT-x guest then you don''t > need to use update_va_mpping as you can update your own pagetables > directly and shadow mode will take care of propagating the update. > > IanYes, but does it still need hypervisor to modify the phys_to_machine table? I just hope to save some page faults or tlb flush vmexit to get high performance and also have less modification to the original netfront.c. So still only one multicall needed each time when gets a net event. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
> Yes, but does it still need hypervisor to modify the > phys_to_machine table? > I just hope to save some page faults or tlb flush vmexit to > get high performance and also have less modification to the > original netfront.c. So still only one multicall needed each > time when gets a net event.The netfront/backdriver driver needs to be switched over to using the grant tables interface. Chris Clark has already checked in the grant tables implementation, but no-one has got around to updating netfront/back to use it. There are patches to switch blkfront/back over to use grant tables, which will be checked in soon, but since the blk driver use the foreign access rather than the page transfer mechanism its probably not too much help. Using grant tables, the front end doesn''t need to know about machine addresses, and the whole thing ends up rather cleaner, particulary for domains running with virtualized VMs. I''ve attached a document from Chris that gives a rough description of the grant table interface. Best, Ian _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
> This looks quite interesting. Is there anything written up about the > motivations for this design, and how it compares to other similar > mechanisms, say, L4''s grant-via-IPC system?I don''t know of much writeup beyond the details in the README and the original description in the 2004 OASIS paper (http://www.cl.cam.ac.uk/netos/papers/2004-oasis-ngio.pdf).> Are the grant references capabilities, or how do you prevent domains > from inventing their own?I think they behave like capabilities. Given a grant ref. ID, the mapping domain asks Xen for a mapping. Xen uses the ID to index into the mappee''s grant table and checks that reference gives the appropriate permissions.> Who takes care of garbage-collecting them when > a domain exists or dies?I guess a domains memory won''t be deallocated until all mappings to it are relinquished (due to ref counting). At this point it will be safe to throw away all the grant references.> Can a domain DoS a Xen-system by allocating all > the grant refs in the system?Each domain has its own grant table, so this shouldn''t be a problem. Cheers, Mark _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Pratt <mailto:m+Ian.Pratt@cl.cam.ac.uk> wrote:>> Yes, but does it still need hypervisor to modify the phys_to_machine >> table? I just hope to save some page faults or tlb flush vmexit to >> get high performance and also have less modification to the >> original netfront.c. So still only one multicall needed each >> time when gets a net event. > > The netfront/backdriver driver needs to be switched over to using the > grant tables interface. > > Chris Clark has already checked in the grant tables implementation, > but no-one has got around to updating netfront/back to use it. There > are patches to switch blkfront/back over to use grant tables, which > will be checked in soon, but since the blk driver use the foreign > access rather than the page transfer mechanism its probably not too > much help. > > Using grant tables, the front end doesn''t need to know about machine > addresses, and the whole thing ends up rather cleaner, particulary for > domains running with virtualized VMs.Yes, there do have security problem to use machine address in netfront. I''m originally thinking that we can pass virtual address to backend through the ring buffer and let the backend to set the mapping for the frontend. I''ve already done a blkfront module driver in unmodified linux with an event channel channel and ctrlif module driver. I''ll wait for the new blkfront with grant table. For netfront, if no one has worked on the grant table update, I can do it together. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
> > Using grant tables, the front end doesn''t need to know > about machine > > addresses, and the whole thing ends up rather cleaner, > particulary for > > domains running with virtualized VMs. > Yes, there do have security problem to use machine address in > netfront.It''s not actually a security problem, but using mfns is a bit ugly.> I''m originally thinking that we can pass virtual address to > backend through the ring buffer and let the backend to set > the mapping for the frontend. > I''ve already done a blkfront module driver in unmodified > linux with an event channel channel and ctrlif module > driver.Great.> I''ll wait for the new blkfront with grant table. For > netfront, if no one has worked on the grant table update, I > can do it together.I''ve attached the patch that updates the grant tables code and switches the blk dev over to use it. It would be great if you could workup something similar up for the netfront/back. Thanks, Ian _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Pratt wrote:> ******************************************************************************** > A Rough Introduction to Using Grant Tables > ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > Christopher Clark, March, 2005. >This looks quite interesting. Is there anything written up about the motivations for this design, and how it compares to other similar mechanisms, say, L4''s grant-via-IPC system? Are the grant references capabilities, or how do you prevent domains from inventing their own? Who takes care of garbage-collecting them when a domain exists or dies? Can a domain DoS a Xen-system by allocating all the grant refs in the system? best, Jacob _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
> Are the grant references capabilities, or how do you prevent > domains from inventing their own?Domains create and maintain their own grant tables. They don''t have to be capabilities to be secure.> Who takes care of garbage-collecting them when a domain exists ordies? Since Xen tracks active grant references revocation is possible, but is a slow-path operation.> Can a domain DoS a Xen-system by allocating all the grant refs in > the system?No... Ian _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Ian Pratt <mailto:m+Ian.Pratt@cl.cam.ac.uk> wrote:>>> Using grant tables, the front end doesn''t need to know about machine >>> addresses, and the whole thing ends up rather cleaner, particulary >>> for domains running with virtualized VMs. >> Yes, there do have security problem to use machine address in >> netfront. > > It''s not actually a security problem, but using mfns is a bit ugly. >I mean for a full-virtualization domain, if the guest can map any mfn to its pfn, it will not be secure. I have a quick look at the grant table, Is the main point that put the mfn to the table and get an id, and then give other domain an id, so the other domain is allowed to map that mfn? _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
> > It''s not actually a security problem, but using mfns is a bit ugly. > > > I mean for a full-virtualization domain, if the guest can map > any mfn to its pfn, it will not be secure.It can''t unless the fully virtualized domain is fully privileged, which it shouldn''t be.> I have a quick look at the grant table, Is the main point > that put the mfn to the table and get an id, and then give > other domain an id, so the other domain is allowed to map that mfn?Yes, that''s how it works. Thanks, Ian _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel