Jan Beulich
2011-May-03 14:08 UTC
[Xen-devel] [PATCH 1/2, v2] x86: replace nr_irqs sized per-domain arrays with radix trees
It would seem possible to fold the two trees into one (making e.g. the emuirq bits stored in the upper half of the pointer), but I''m not certain that''s worth it as it would make deletion of entries more cumbersome. Unless pirq-s and emuirq-s were mutually exclusive... v2: Split setup/teardown into two stages - (de-)allocation (tree node (de-)population) is done with just d->event_lock held (and hence interrupts enabled), while actual insertion/removal of translation data gets done with irq_desc''s lock held (and interrupts disabled). Signed-off-by: Jan Beulich <jbeulich@novell.com> --- 2011-04-29.orig/xen/arch/x86/domain.c +++ 2011-04-29/xen/arch/x86/domain.c @@ -614,26 +614,16 @@ int arch_domain_create(struct domain *d, memset(d->arch.pirq_irq, 0, d->nr_pirqs * sizeof(*d->arch.pirq_irq)); - d->arch.irq_pirq = xmalloc_array(int, nr_irqs); - if ( !d->arch.irq_pirq ) + if ( (rc = init_domain_irq_mapping(d)) != 0 ) goto fail; - memset(d->arch.irq_pirq, 0, - nr_irqs * sizeof(*d->arch.irq_pirq)); - - for ( i = 1; platform_legacy_irq(i); ++i ) - if ( !IO_APIC_IRQ(i) ) - d->arch.irq_pirq[i] = d->arch.pirq_irq[i] = i; if ( is_hvm_domain(d) ) { d->arch.pirq_emuirq = xmalloc_array(int, d->nr_pirqs); - d->arch.emuirq_pirq = xmalloc_array(int, nr_irqs); - if ( !d->arch.pirq_emuirq || !d->arch.emuirq_pirq ) + if ( !d->arch.pirq_emuirq ) goto fail; for (i = 0; i < d->nr_pirqs; i++) d->arch.pirq_emuirq[i] = IRQ_UNBOUND; - for (i = 0; i < nr_irqs; i++) - d->arch.emuirq_pirq[i] = IRQ_UNBOUND; } @@ -671,9 +661,8 @@ int arch_domain_create(struct domain *d, d->is_dying = DOMDYING_dead; vmce_destroy_msr(d); xfree(d->arch.pirq_irq); - xfree(d->arch.irq_pirq); xfree(d->arch.pirq_emuirq); - xfree(d->arch.emuirq_pirq); + cleanup_domain_irq_mapping(d); free_xenheap_page(d->shared_info); if ( paging_initialised ) paging_final_teardown(d); @@ -726,9 +715,8 @@ void arch_domain_destroy(struct domain * free_xenheap_page(d->shared_info); xfree(d->arch.pirq_irq); - xfree(d->arch.irq_pirq); xfree(d->arch.pirq_emuirq); - xfree(d->arch.emuirq_pirq); + cleanup_domain_irq_mapping(d); } unsigned long pv_guest_cr4_fixup(const struct vcpu *v, unsigned long guest_cr4) --- 2011-04-29.orig/xen/arch/x86/irq.c +++ 2011-04-29/xen/arch/x86/irq.c @@ -950,6 +950,64 @@ struct irq_desc *domain_spin_lock_irq_de return desc; } +static int prepare_domain_irq_pirq(struct domain *d, int irq, int pirq) +{ + int err = radix_tree_insert(&d->arch.irq_pirq, irq, NULL, + NULL, NULL); + + return err != -EEXIST ? err : 0; +} + +static void set_domain_irq_pirq(struct domain *d, int irq, int pirq) +{ + *radix_tree_lookup_slot(&d->arch.irq_pirq, irq) = (void *)(long)pirq; + d->arch.pirq_irq[pirq] = irq; +} + +static void clear_domain_irq_pirq(struct domain *d, int irq, int pirq) +{ + d->arch.pirq_irq[pirq] = 0; + *radix_tree_lookup_slot(&d->arch.irq_pirq, irq) = NULL; +} + +static void cleanup_domain_irq_pirq(struct domain *d, int irq, int pirq) +{ + radix_tree_delete(&d->arch.irq_pirq, irq, NULL); +} + +int init_domain_irq_mapping(struct domain *d) +{ + unsigned int i; + int err = 0; + + INIT_RADIX_TREE(&d->arch.irq_pirq, 0); + if ( is_hvm_domain(d) ) + INIT_RADIX_TREE(&d->arch.hvm_domain.emuirq_pirq, 0); + + for ( i = 1; platform_legacy_irq(i); ++i ) + if ( !IO_APIC_IRQ(i) ) + { + err = prepare_domain_irq_pirq(d, i, i); + if ( err ) + break; + set_domain_irq_pirq(d, i, i); + } + + return err; +} + +static void irq_slot_free(void *unused) +{ +} + +void cleanup_domain_irq_mapping(struct domain *d) +{ + radix_tree_destroy(&d->arch.irq_pirq, irq_slot_free, NULL); + if ( is_hvm_domain(d) ) + radix_tree_destroy(&d->arch.hvm_domain.emuirq_pirq, + irq_slot_free, NULL); +} + /* Flush all ready EOIs from the top of this CPU''s pending-EOI stack. */ static void flush_ready_eoi(void) { @@ -1373,7 +1431,7 @@ void pirq_guest_unbind(struct domain *d, { irq_guest_action_t *oldaction = NULL; struct irq_desc *desc; - int irq; + int irq = 0; WARN_ON(!spin_is_locked(&d->event_lock)); @@ -1386,7 +1444,7 @@ void pirq_guest_unbind(struct domain *d, BUG_ON(irq <= 0); desc = irq_to_desc(irq); spin_lock_irq(&desc->lock); - d->arch.pirq_irq[pirq] = d->arch.irq_pirq[irq] = 0; + clear_domain_irq_pirq(d, irq, pirq); } else { @@ -1400,6 +1458,8 @@ void pirq_guest_unbind(struct domain *d, kill_timer(&oldaction->eoi_timer); xfree(oldaction); } + else if ( irq > 0 ) + cleanup_domain_irq_pirq(d, irq, pirq); } static int pirq_guest_force_unbind(struct domain *d, int irq) @@ -1523,6 +1583,10 @@ int map_domain_pirq( return ret; } + ret = prepare_domain_irq_pirq(d, irq, pirq); + if ( ret ) + return ret; + desc = irq_to_desc(irq); if ( type == MAP_PIRQ_TYPE_MSI ) @@ -1544,19 +1608,20 @@ int map_domain_pirq( dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n", d->domain_id, irq); desc->handler = &pci_msi_type; - d->arch.pirq_irq[pirq] = irq; - d->arch.irq_pirq[irq] = pirq; + set_domain_irq_pirq(d, irq, pirq); setup_msi_irq(pdev, msi_desc, irq); spin_unlock_irqrestore(&desc->lock, flags); - } else + } + else { spin_lock_irqsave(&desc->lock, flags); - d->arch.pirq_irq[pirq] = irq; - d->arch.irq_pirq[irq] = pirq; + set_domain_irq_pirq(d, irq, pirq); spin_unlock_irqrestore(&desc->lock, flags); } done: + if ( ret ) + cleanup_domain_irq_pirq(d, irq, pirq); return ret; } @@ -1599,20 +1664,20 @@ int unmap_domain_pirq(struct domain *d, BUG_ON(irq != domain_pirq_to_irq(d, pirq)); if ( !forced_unbind ) - { - d->arch.pirq_irq[pirq] = 0; - d->arch.irq_pirq[irq] = 0; - } + clear_domain_irq_pirq(d, irq, pirq); else { d->arch.pirq_irq[pirq] = -irq; - d->arch.irq_pirq[irq] = -pirq; + *radix_tree_lookup_slot(&d->arch.irq_pirq, irq) = (void *)(long)-pirq; } spin_unlock_irqrestore(&desc->lock, flags); if (msi_desc) msi_free_irq(msi_desc); + if ( !forced_unbind ) + cleanup_domain_irq_pirq(d, irq, pirq); + ret = irq_deny_access(d, pirq); if ( ret ) dprintk(XENLOG_G_ERR, "dom%d: could not deny access to irq %d\n", @@ -1829,10 +1894,25 @@ int map_domain_emuirq_pirq(struct domain return 0; } - d->arch.pirq_emuirq[pirq] = emuirq; /* do not store emuirq mappings for pt devices */ if ( emuirq != IRQ_PT ) - d->arch.emuirq_pirq[emuirq] = pirq; + { + int err = radix_tree_insert(&d->arch.hvm_domain.emuirq_pirq, emuirq, + (void *)((long)pirq + 1), NULL, NULL); + + switch ( err ) + { + case 0: + break; + case -EEXIST: + *radix_tree_lookup_slot(&d->arch.hvm_domain.emuirq_pirq, emuirq) + (void *)((long)pirq + 1); + break; + default: + return err; + } + } + d->arch.pirq_emuirq[pirq] = emuirq; return 0; } @@ -1860,7 +1940,7 @@ int unmap_domain_pirq_emuirq(struct doma d->arch.pirq_emuirq[pirq] = IRQ_UNBOUND; if ( emuirq != IRQ_PT ) - d->arch.emuirq_pirq[emuirq] = IRQ_UNBOUND; + radix_tree_delete(&d->arch.hvm_domain.emuirq_pirq, emuirq, NULL); done: return ret; --- 2011-04-29.orig/xen/common/radix-tree.c +++ 2011-04-29/xen/common/radix-tree.c @@ -26,7 +26,6 @@ * o tagging code removed * o radix_tree_insert has func parameter for dynamic data struct allocation * o radix_tree_destroy added (including recursive helper function) - * o __init functions must be called explicitly * o other include files adapted to Xen */ @@ -35,6 +34,7 @@ #include <xen/lib.h> #include <xen/types.h> #include <xen/errno.h> +#include <xen/xmalloc.h> #include <xen/radix-tree.h> #include <asm/cache.h> @@ -49,6 +49,18 @@ static inline unsigned long radix_tree_m return height_to_maxindex[height]; } +static struct radix_tree_node *_node_alloc(void *unused) +{ + struct radix_tree_node *node = xmalloc(struct radix_tree_node); + + return node ? memset(node, 0, sizeof(*node)) : node; +} + +static void _node_free(struct radix_tree_node *node) +{ + xfree(node); +} + /* * Extend a radix tree so it can store key @index. */ @@ -100,6 +112,9 @@ int radix_tree_insert(struct radix_tree_ int offset; int error; + if (!node_alloc) + node_alloc = _node_alloc; + /* Make sure the tree is high enough. */ if (index > radix_tree_maxindex(root->height)) { error = radix_tree_extend(root, index, node_alloc, arg); @@ -336,6 +351,9 @@ void *radix_tree_delete(struct radix_tre unsigned int height, shift; int offset; + if (!node_free) + node_free = _node_free; + height = root->height; if (index > radix_tree_maxindex(height)) goto out; @@ -420,6 +438,8 @@ void radix_tree_destroy(struct radix_tre if (root->height == 0) slot_free(root->rnode); else { + if (!node_free) + node_free = _node_free; radix_tree_node_destroy(root->rnode, root->height, slot_free, node_free); node_free(root->rnode); @@ -440,10 +460,14 @@ static unsigned long __init __maxindex(u return index; } -void __init radix_tree_init(void) +static int __init radix_tree_init(void) { unsigned int i; for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) height_to_maxindex[i] = __maxindex(i); + + return 0; } +/* pre-SMP just so it runs before ''normal'' initcalls */ +presmp_initcall(radix_tree_init); --- 2011-04-29.orig/xen/common/tmem.c +++ 2011-04-29/xen/common/tmem.c @@ -2925,7 +2925,6 @@ static int __init init_tmem(void) if ( !tmh_enabled() ) return 0; - radix_tree_init(); if ( tmh_dedup_enabled() ) for (i = 0; i < 256; i++ ) { --- 2011-04-29.orig/xen/include/asm-x86/domain.h +++ 2011-04-29/xen/include/asm-x86/domain.h @@ -3,6 +3,7 @@ #include <xen/config.h> #include <xen/mm.h> +#include <xen/radix-tree.h> #include <asm/hvm/vcpu.h> #include <asm/hvm/domain.h> #include <asm/e820.h> @@ -284,10 +285,9 @@ struct arch_domain const char *nested_p2m_function; /* NB. protected by d->event_lock and by irq_desc[irq].lock */ - int *irq_pirq; + struct radix_tree_root irq_pirq; int *pirq_irq; - /* pirq to emulated irq and vice versa */ - int *emuirq_pirq; + /* pirq to emulated irq */ int *pirq_emuirq; /* Maximum physical-address bitwidth supported by this guest. */ --- 2011-04-29.orig/xen/include/asm-x86/hvm/domain.h +++ 2011-04-29/xen/include/asm-x86/hvm/domain.h @@ -59,6 +59,9 @@ struct hvm_domain { /* VCPU which is current target for 8259 interrupts. */ struct vcpu *i8259_target; + /* emulated irq to pirq */ + struct radix_tree_root emuirq_pirq; + /* hvm_print_line() logging. */ #define HVM_PBUF_SIZE 80 char *pbuf; --- 2011-04-29.orig/xen/include/asm-x86/irq.h +++ 2011-04-29/xen/include/asm-x86/irq.h @@ -143,11 +143,17 @@ int bind_irq_vector(int irq, int vector, void irq_set_affinity(struct irq_desc *, const cpumask_t *mask); +int init_domain_irq_mapping(struct domain *); +void cleanup_domain_irq_mapping(struct domain *); + #define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq]) -#define domain_irq_to_pirq(d, irq) ((d)->arch.irq_pirq[irq]) +#define domain_irq_to_pirq(d, irq) \ + ((long)radix_tree_lookup(&(d)->arch.irq_pirq, irq)) #define PIRQ_ALLOCATED -1 #define domain_pirq_to_emuirq(d, pirq) ((d)->arch.pirq_emuirq[pirq]) -#define domain_emuirq_to_pirq(d, emuirq) ((d)->arch.emuirq_pirq[emuirq]) +#define domain_emuirq_to_pirq(d, emuirq) \ + (((long)radix_tree_lookup(&(d)->arch.hvm_domain.emuirq_pirq, emuirq) ?: \ + IRQ_UNBOUND + 1) - 1) #define IRQ_UNBOUND -1 #define IRQ_PT -2 --- 2011-04-29.orig/xen/include/xen/radix-tree.h +++ 2011-04-29/xen/include/xen/radix-tree.h @@ -73,6 +73,5 @@ void *radix_tree_delete(struct radix_tre unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items); -void radix_tree_init(void); #endif /* _XEN_RADIX_TREE_H */ _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2011-May-03 21:08 UTC
Re: [Xen-devel] [PATCH 1/2, v2] x86: replace nr_irqs sized per-domain arrays with radix trees
On 03/05/2011 15:08, "Jan Beulich" <JBeulich@novell.com> wrote:> It would seem possible to fold the two trees into one (making e.g. the > emuirq bits stored in the upper half of the pointer), but I''m not > certain that''s worth it as it would make deletion of entries more > cumbersome. Unless pirq-s and emuirq-s were mutually exclusive... > > v2: Split setup/teardown into two stages - (de-)allocation (tree node > (de-)population) is done with just d->event_lock held (and hence > interrupts enabled), while actual insertion/removal of translation data > gets done with irq_desc''s lock held (and interrupts disabled).This is mostly okay, because the only operations that occur with irqs disabled are read-only on the radix-rtree structure itself, hence no alloc/dealloc will happen. *However* those calls to radix_tree_lookup[_slot]() are not synchronised wrt your calls to radix_tree_{insert,delete}(). The latter hold d->event_lock, while the former do not. Hence you need RCU and you need a new first patch in your patch set to pull in a modern radix-tree.[ch] from upstream Linux. -- keir> Signed-off-by: Jan Beulich <jbeulich@novell.com> > > --- 2011-04-29.orig/xen/arch/x86/domain.c > +++ 2011-04-29/xen/arch/x86/domain.c > @@ -614,26 +614,16 @@ int arch_domain_create(struct domain *d, > memset(d->arch.pirq_irq, 0, > d->nr_pirqs * sizeof(*d->arch.pirq_irq)); > > - d->arch.irq_pirq = xmalloc_array(int, nr_irqs); > - if ( !d->arch.irq_pirq ) > + if ( (rc = init_domain_irq_mapping(d)) != 0 ) > goto fail; > - memset(d->arch.irq_pirq, 0, > - nr_irqs * sizeof(*d->arch.irq_pirq)); > - > - for ( i = 1; platform_legacy_irq(i); ++i ) > - if ( !IO_APIC_IRQ(i) ) > - d->arch.irq_pirq[i] = d->arch.pirq_irq[i] = i; > > if ( is_hvm_domain(d) ) > { > d->arch.pirq_emuirq = xmalloc_array(int, d->nr_pirqs); > - d->arch.emuirq_pirq = xmalloc_array(int, nr_irqs); > - if ( !d->arch.pirq_emuirq || !d->arch.emuirq_pirq ) > + if ( !d->arch.pirq_emuirq ) > goto fail; > for (i = 0; i < d->nr_pirqs; i++) > d->arch.pirq_emuirq[i] = IRQ_UNBOUND; > - for (i = 0; i < nr_irqs; i++) > - d->arch.emuirq_pirq[i] = IRQ_UNBOUND; > } > > > @@ -671,9 +661,8 @@ int arch_domain_create(struct domain *d, > d->is_dying = DOMDYING_dead; > vmce_destroy_msr(d); > xfree(d->arch.pirq_irq); > - xfree(d->arch.irq_pirq); > xfree(d->arch.pirq_emuirq); > - xfree(d->arch.emuirq_pirq); > + cleanup_domain_irq_mapping(d); > free_xenheap_page(d->shared_info); > if ( paging_initialised ) > paging_final_teardown(d); > @@ -726,9 +715,8 @@ void arch_domain_destroy(struct domain * > > free_xenheap_page(d->shared_info); > xfree(d->arch.pirq_irq); > - xfree(d->arch.irq_pirq); > xfree(d->arch.pirq_emuirq); > - xfree(d->arch.emuirq_pirq); > + cleanup_domain_irq_mapping(d); > } > > unsigned long pv_guest_cr4_fixup(const struct vcpu *v, unsigned long > guest_cr4) > --- 2011-04-29.orig/xen/arch/x86/irq.c > +++ 2011-04-29/xen/arch/x86/irq.c > @@ -950,6 +950,64 @@ struct irq_desc *domain_spin_lock_irq_de > return desc; > } > > +static int prepare_domain_irq_pirq(struct domain *d, int irq, int pirq) > +{ > + int err = radix_tree_insert(&d->arch.irq_pirq, irq, NULL, > + NULL, NULL); > + > + return err != -EEXIST ? err : 0; > +} > + > +static void set_domain_irq_pirq(struct domain *d, int irq, int pirq) > +{ > + *radix_tree_lookup_slot(&d->arch.irq_pirq, irq) = (void *)(long)pirq; > + d->arch.pirq_irq[pirq] = irq; > +} > + > +static void clear_domain_irq_pirq(struct domain *d, int irq, int pirq) > +{ > + d->arch.pirq_irq[pirq] = 0; > + *radix_tree_lookup_slot(&d->arch.irq_pirq, irq) = NULL; > +} > + > +static void cleanup_domain_irq_pirq(struct domain *d, int irq, int pirq) > +{ > + radix_tree_delete(&d->arch.irq_pirq, irq, NULL); > +} > + > +int init_domain_irq_mapping(struct domain *d) > +{ > + unsigned int i; > + int err = 0; > + > + INIT_RADIX_TREE(&d->arch.irq_pirq, 0); > + if ( is_hvm_domain(d) ) > + INIT_RADIX_TREE(&d->arch.hvm_domain.emuirq_pirq, 0); > + > + for ( i = 1; platform_legacy_irq(i); ++i ) > + if ( !IO_APIC_IRQ(i) ) > + { > + err = prepare_domain_irq_pirq(d, i, i); > + if ( err ) > + break; > + set_domain_irq_pirq(d, i, i); > + } > + > + return err; > +} > + > +static void irq_slot_free(void *unused) > +{ > +} > + > +void cleanup_domain_irq_mapping(struct domain *d) > +{ > + radix_tree_destroy(&d->arch.irq_pirq, irq_slot_free, NULL); > + if ( is_hvm_domain(d) ) > + radix_tree_destroy(&d->arch.hvm_domain.emuirq_pirq, > + irq_slot_free, NULL); > +} > + > /* Flush all ready EOIs from the top of this CPU''s pending-EOI stack. */ > static void flush_ready_eoi(void) > { > @@ -1373,7 +1431,7 @@ void pirq_guest_unbind(struct domain *d, > { > irq_guest_action_t *oldaction = NULL; > struct irq_desc *desc; > - int irq; > + int irq = 0; > > WARN_ON(!spin_is_locked(&d->event_lock)); > > @@ -1386,7 +1444,7 @@ void pirq_guest_unbind(struct domain *d, > BUG_ON(irq <= 0); > desc = irq_to_desc(irq); > spin_lock_irq(&desc->lock); > - d->arch.pirq_irq[pirq] = d->arch.irq_pirq[irq] = 0; > + clear_domain_irq_pirq(d, irq, pirq); > } > else > { > @@ -1400,6 +1458,8 @@ void pirq_guest_unbind(struct domain *d, > kill_timer(&oldaction->eoi_timer); > xfree(oldaction); > } > + else if ( irq > 0 ) > + cleanup_domain_irq_pirq(d, irq, pirq); > } > > static int pirq_guest_force_unbind(struct domain *d, int irq) > @@ -1523,6 +1583,10 @@ int map_domain_pirq( > return ret; > } > > + ret = prepare_domain_irq_pirq(d, irq, pirq); > + if ( ret ) > + return ret; > + > desc = irq_to_desc(irq); > > if ( type == MAP_PIRQ_TYPE_MSI ) > @@ -1544,19 +1608,20 @@ int map_domain_pirq( > dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n", > d->domain_id, irq); > desc->handler = &pci_msi_type; > - d->arch.pirq_irq[pirq] = irq; > - d->arch.irq_pirq[irq] = pirq; > + set_domain_irq_pirq(d, irq, pirq); > setup_msi_irq(pdev, msi_desc, irq); > spin_unlock_irqrestore(&desc->lock, flags); > - } else > + } > + else > { > spin_lock_irqsave(&desc->lock, flags); > - d->arch.pirq_irq[pirq] = irq; > - d->arch.irq_pirq[irq] = pirq; > + set_domain_irq_pirq(d, irq, pirq); > spin_unlock_irqrestore(&desc->lock, flags); > } > > done: > + if ( ret ) > + cleanup_domain_irq_pirq(d, irq, pirq); > return ret; > } > > @@ -1599,20 +1664,20 @@ int unmap_domain_pirq(struct domain *d, > BUG_ON(irq != domain_pirq_to_irq(d, pirq)); > > if ( !forced_unbind ) > - { > - d->arch.pirq_irq[pirq] = 0; > - d->arch.irq_pirq[irq] = 0; > - } > + clear_domain_irq_pirq(d, irq, pirq); > else > { > d->arch.pirq_irq[pirq] = -irq; > - d->arch.irq_pirq[irq] = -pirq; > + *radix_tree_lookup_slot(&d->arch.irq_pirq, irq) = (void > *)(long)-pirq; > } > > spin_unlock_irqrestore(&desc->lock, flags); > if (msi_desc) > msi_free_irq(msi_desc); > > + if ( !forced_unbind ) > + cleanup_domain_irq_pirq(d, irq, pirq); > + > ret = irq_deny_access(d, pirq); > if ( ret ) > dprintk(XENLOG_G_ERR, "dom%d: could not deny access to irq %d\n", > @@ -1829,10 +1894,25 @@ int map_domain_emuirq_pirq(struct domain > return 0; > } > > - d->arch.pirq_emuirq[pirq] = emuirq; > /* do not store emuirq mappings for pt devices */ > if ( emuirq != IRQ_PT ) > - d->arch.emuirq_pirq[emuirq] = pirq; > + { > + int err = radix_tree_insert(&d->arch.hvm_domain.emuirq_pirq, emuirq, > + (void *)((long)pirq + 1), NULL, NULL); > + > + switch ( err ) > + { > + case 0: > + break; > + case -EEXIST: > + *radix_tree_lookup_slot(&d->arch.hvm_domain.emuirq_pirq, emuirq) > > + (void *)((long)pirq + 1); > + break; > + default: > + return err; > + } > + } > + d->arch.pirq_emuirq[pirq] = emuirq; > > return 0; > } > @@ -1860,7 +1940,7 @@ int unmap_domain_pirq_emuirq(struct doma > > d->arch.pirq_emuirq[pirq] = IRQ_UNBOUND; > if ( emuirq != IRQ_PT ) > - d->arch.emuirq_pirq[emuirq] = IRQ_UNBOUND; > + radix_tree_delete(&d->arch.hvm_domain.emuirq_pirq, emuirq, NULL); > > done: > return ret; > --- 2011-04-29.orig/xen/common/radix-tree.c > +++ 2011-04-29/xen/common/radix-tree.c > @@ -26,7 +26,6 @@ > * o tagging code removed > * o radix_tree_insert has func parameter for dynamic data struct allocation > * o radix_tree_destroy added (including recursive helper function) > - * o __init functions must be called explicitly > * o other include files adapted to Xen > */ > > @@ -35,6 +34,7 @@ > #include <xen/lib.h> > #include <xen/types.h> > #include <xen/errno.h> > +#include <xen/xmalloc.h> > #include <xen/radix-tree.h> > #include <asm/cache.h> > > @@ -49,6 +49,18 @@ static inline unsigned long radix_tree_m > return height_to_maxindex[height]; > } > > +static struct radix_tree_node *_node_alloc(void *unused) > +{ > + struct radix_tree_node *node = xmalloc(struct radix_tree_node); > + > + return node ? memset(node, 0, sizeof(*node)) : node; > +} > + > +static void _node_free(struct radix_tree_node *node) > +{ > + xfree(node); > +} > + > /* > * Extend a radix tree so it can store key @index. > */ > @@ -100,6 +112,9 @@ int radix_tree_insert(struct radix_tree_ > int offset; > int error; > > + if (!node_alloc) > + node_alloc = _node_alloc; > + > /* Make sure the tree is high enough. */ > if (index > radix_tree_maxindex(root->height)) { > error = radix_tree_extend(root, index, node_alloc, arg); > @@ -336,6 +351,9 @@ void *radix_tree_delete(struct radix_tre > unsigned int height, shift; > int offset; > > + if (!node_free) > + node_free = _node_free; > + > height = root->height; > if (index > radix_tree_maxindex(height)) > goto out; > @@ -420,6 +438,8 @@ void radix_tree_destroy(struct radix_tre > if (root->height == 0) > slot_free(root->rnode); > else { > + if (!node_free) > + node_free = _node_free; > radix_tree_node_destroy(root->rnode, root->height, > slot_free, node_free); > node_free(root->rnode); > @@ -440,10 +460,14 @@ static unsigned long __init __maxindex(u > return index; > } > > -void __init radix_tree_init(void) > +static int __init radix_tree_init(void) > { > unsigned int i; > > for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) > height_to_maxindex[i] = __maxindex(i); > + > + return 0; > } > +/* pre-SMP just so it runs before ''normal'' initcalls */ > +presmp_initcall(radix_tree_init); > --- 2011-04-29.orig/xen/common/tmem.c > +++ 2011-04-29/xen/common/tmem.c > @@ -2925,7 +2925,6 @@ static int __init init_tmem(void) > if ( !tmh_enabled() ) > return 0; > > - radix_tree_init(); > if ( tmh_dedup_enabled() ) > for (i = 0; i < 256; i++ ) > { > --- 2011-04-29.orig/xen/include/asm-x86/domain.h > +++ 2011-04-29/xen/include/asm-x86/domain.h > @@ -3,6 +3,7 @@ > > #include <xen/config.h> > #include <xen/mm.h> > +#include <xen/radix-tree.h> > #include <asm/hvm/vcpu.h> > #include <asm/hvm/domain.h> > #include <asm/e820.h> > @@ -284,10 +285,9 @@ struct arch_domain > const char *nested_p2m_function; > > /* NB. protected by d->event_lock and by irq_desc[irq].lock */ > - int *irq_pirq; > + struct radix_tree_root irq_pirq; > int *pirq_irq; > - /* pirq to emulated irq and vice versa */ > - int *emuirq_pirq; > + /* pirq to emulated irq */ > int *pirq_emuirq; > > /* Maximum physical-address bitwidth supported by this guest. */ > --- 2011-04-29.orig/xen/include/asm-x86/hvm/domain.h > +++ 2011-04-29/xen/include/asm-x86/hvm/domain.h > @@ -59,6 +59,9 @@ struct hvm_domain { > /* VCPU which is current target for 8259 interrupts. */ > struct vcpu *i8259_target; > > + /* emulated irq to pirq */ > + struct radix_tree_root emuirq_pirq; > + > /* hvm_print_line() logging. */ > #define HVM_PBUF_SIZE 80 > char *pbuf; > --- 2011-04-29.orig/xen/include/asm-x86/irq.h > +++ 2011-04-29/xen/include/asm-x86/irq.h > @@ -143,11 +143,17 @@ int bind_irq_vector(int irq, int vector, > > void irq_set_affinity(struct irq_desc *, const cpumask_t *mask); > > +int init_domain_irq_mapping(struct domain *); > +void cleanup_domain_irq_mapping(struct domain *); > + > #define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq]) > -#define domain_irq_to_pirq(d, irq) ((d)->arch.irq_pirq[irq]) > +#define domain_irq_to_pirq(d, irq) \ > + ((long)radix_tree_lookup(&(d)->arch.irq_pirq, irq)) > #define PIRQ_ALLOCATED -1 > #define domain_pirq_to_emuirq(d, pirq) ((d)->arch.pirq_emuirq[pirq]) > -#define domain_emuirq_to_pirq(d, emuirq) ((d)->arch.emuirq_pirq[emuirq]) > +#define domain_emuirq_to_pirq(d, emuirq) \ > + (((long)radix_tree_lookup(&(d)->arch.hvm_domain.emuirq_pirq, emuirq) ?: \ > + IRQ_UNBOUND + 1) - 1) > #define IRQ_UNBOUND -1 > #define IRQ_PT -2 > > --- 2011-04-29.orig/xen/include/xen/radix-tree.h > +++ 2011-04-29/xen/include/xen/radix-tree.h > @@ -73,6 +73,5 @@ void *radix_tree_delete(struct radix_tre > unsigned int > radix_tree_gang_lookup(struct radix_tree_root *root, void **results, > unsigned long first_index, unsigned int max_items); > -void radix_tree_init(void); > > #endif /* _XEN_RADIX_TREE_H */ > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com > http://lists.xensource.com/xen-devel_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2011-May-04 07:27 UTC
Re: [Xen-devel] [PATCH 1/2, v2] x86: replace nr_irqs sized per-domain arrays with radix trees
>>> On 03.05.11 at 23:08, Keir Fraser <keir@xen.org> wrote: > On 03/05/2011 15:08, "Jan Beulich" <JBeulich@novell.com> wrote: > >> It would seem possible to fold the two trees into one (making e.g. the >> emuirq bits stored in the upper half of the pointer), but I''m not >> certain that''s worth it as it would make deletion of entries more >> cumbersome. Unless pirq-s and emuirq-s were mutually exclusive... >> >> v2: Split setup/teardown into two stages - (de-)allocation (tree node >> (de-)population) is done with just d->event_lock held (and hence >> interrupts enabled), while actual insertion/removal of translation data >> gets done with irq_desc''s lock held (and interrupts disabled). > > This is mostly okay, because the only operations that occur with irqs > disabled are read-only on the radix-rtree structure itself, hence no > alloc/dealloc will happen. *However* those calls to > radix_tree_lookup[_slot]() are not synchronised wrt your calls to > radix_tree_{insert,delete}(). The latter hold d->event_lock, while the > former do not. Hence you need RCU and you need a new first patch in your > patch set to pull in a modern radix-tree.[ch] from upstream Linux.Right you are - I didn''t pay attention to the tree internal nodes. Will take a few days though before I can get to this. Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2011-May-04 08:13 UTC
Re: [Xen-devel] [PATCH 1/2, v2] x86: replace nr_irqs sized per-domain arrays with radix trees
On 04/05/2011 08:27, "Jan Beulich" <JBeulich@novell.com> wrote:>>>> On 03.05.11 at 23:08, Keir Fraser <keir@xen.org> wrote: >> On 03/05/2011 15:08, "Jan Beulich" <JBeulich@novell.com> wrote: >> >>> It would seem possible to fold the two trees into one (making e.g. the >>> emuirq bits stored in the upper half of the pointer), but I''m not >>> certain that''s worth it as it would make deletion of entries more >>> cumbersome. Unless pirq-s and emuirq-s were mutually exclusive... >>> >>> v2: Split setup/teardown into two stages - (de-)allocation (tree node >>> (de-)population) is done with just d->event_lock held (and hence >>> interrupts enabled), while actual insertion/removal of translation data >>> gets done with irq_desc''s lock held (and interrupts disabled). >> >> This is mostly okay, because the only operations that occur with irqs >> disabled are read-only on the radix-rtree structure itself, hence no >> alloc/dealloc will happen. *However* those calls to >> radix_tree_lookup[_slot]() are not synchronised wrt your calls to >> radix_tree_{insert,delete}(). The latter hold d->event_lock, while the >> former do not. Hence you need RCU and you need a new first patch in your >> patch set to pull in a modern radix-tree.[ch] from upstream Linux. > > Right you are - I didn''t pay attention to the tree internal nodes. > Will take a few days though before I can get to this.Actually I''ll take a look at it myself. -- Keir> Jan >_______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Dan Magenheimer
2011-May-04 16:13 UTC
RE: [Xen-devel] [PATCH 1/2, v2] x86: replace nr_irqs sized per-domain arrays with radix trees
> From: Jan Beulich [mailto:JBeulich@novell.com] > Subject: Re: [Xen-devel] [PATCH 1/2, v2] x86: replace nr_irqs sized > per-domain arrays with radix trees > > >>> On 03.05.11 at 23:08, Keir Fraser <keir@xen.org> wrote: > > On 03/05/2011 15:08, "Jan Beulich" <JBeulich@novell.com> wrote: > > > >> It would seem possible to fold the two trees into one (making e.g. > the > >> emuirq bits stored in the upper half of the pointer), but I''m not > >> certain that''s worth it as it would make deletion of entries more > >> cumbersome. Unless pirq-s and emuirq-s were mutually exclusive... > >> > >> v2: Split setup/teardown into two stages - (de-)allocation (tree > node > >> (de-)population) is done with just d->event_lock held (and hence > >> interrupts enabled), while actual insertion/removal of translation > data > >> gets done with irq_desc''s lock held (and interrupts disabled). > > > > This is mostly okay, because the only operations that occur with irqs > > disabled are read-only on the radix-rtree structure itself, hence no > > alloc/dealloc will happen. *However* those calls to > > radix_tree_lookup[_slot]() are not synchronised wrt your calls to > > radix_tree_{insert,delete}(). The latter hold d->event_lock, while > the > > former do not. Hence you need RCU and you need a new first patch in > your > > patch set to pull in a modern radix-tree.[ch] from upstream Linux. > > Right you are - I didn''t pay attention to the tree internal nodes. > Will take a few days though before I can get to this.Jan, Keir -- If either of you are working on radix-tree.c in Xen, this lkml post explains how the Xen version differs from the Linux upstream version and (for tmem) why: https://lkml.org/lkml/2010/12/7/294 After some offlist discussion, for kztmem (later renamed "the new zcache"), I ended up extracting the essence of the radix-tree data structure and adapting it inline specifically for my in-kernel tmem needs. While I''m not a fan of duplicating code, this was an expedient way to avoid a political quagmire. Ideally, the Xen radix-tree.c would support both needs through some kind of layering, but if your preference is to go to the upstream Linux radix-tree.c, I can probably leverage my inlined-radix-tree code into Xen tmem.c. I don''t expect it will be trivial though. Thanks, Dan P.S. In case you are curious about looking at the end result for tmem in Linux, see https://lkml.org/lkml/2010/12/7/294 and search (case-insensitive) for instances of "objnode"... I''m sure you''ll see the resemblance to radix-tree.c. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2011-May-04 19:55 UTC
Re: [Xen-devel] [PATCH 1/2, v2] x86: replace nr_irqs sized per-domain arrays with radix trees
On 04/05/2011 17:13, "Dan Magenheimer" <dan.magenheimer@oracle.com> wrote:> After some offlist discussion, for kztmem (later renamed "the new > zcache"), I ended up extracting the essence of the radix-tree data > structure and adapting it inline specifically for my in-kernel tmem > needs. While I''m not a fan of duplicating code, this was an > expedient way to avoid a political quagmire. > > Ideally, the Xen radix-tree.c would support both needs through some > kind of layering, but if your preference is to go to the upstream > Linux radix-tree.c, I can probably leverage my inlined-radix-tree > code into Xen tmem.c. I don''t expect it will be trivial though.I''m sure I can pull in upstream radix-tree.c and get Xen''s tmem ported onto it. As for performance, I may leave the tagging stuff out as we won''t need it. Then it''s just quibbling over RCU overheads, which I would bet are manageable. We can modify radix-tree.c further if there is good reason for it. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Keir Fraser
2011-May-04 20:37 UTC
Re: [Xen-devel] [PATCH 1/2, v2] x86: replace nr_irqs sized per-domain arrays with radix trees
On 04/05/2011 20:55, "Keir Fraser" <keir.xen@gmail.com> wrote:>> Ideally, the Xen radix-tree.c would support both needs through some >> kind of layering, but if your preference is to go to the upstream >> Linux radix-tree.c, I can probably leverage my inlined-radix-tree >> code into Xen tmem.c. I don''t expect it will be trivial though. > > I''m sure I can pull in upstream radix-tree.c and get Xen''s tmem ported onto > it. As for performance, I may leave the tagging stuff out as we won''t need > it. Then it''s just quibbling over RCU overheads, which I would bet are > manageable. We can modify radix-tree.c further if there is good reason for > it.Looking at 2.6.38''s radix-tree implementation, I''ll be stripping it down heavily as I pull it in. It will likely be acceptable enough for tmem as is after that, else after only minor further changes. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel