... splitting it into global nr_irqs (determined at boot time) and per-
domain nr_pirqs (derived from nr_irqs and a possibly command line
specified value, which probably should later become a per-domain config
setting).
This has the (desirable imo) side effect of reducing the size of struct
hvm_irq_dpci from requiring an order-3 page to order-2 (on x86-64),
which nevertheless still is too large.
However, there is now a variable size bit array on the stack in
pt_irq_time_out() - while for the moment this probably is okay, it
certainly doesn''t look nice. However, replacing this with a static
(pre-)allocation also seems less than ideal, because that would
require at least min(d->nr_pirqs, NR_VECTORS) bit arrays of d->nr_pirqs
bits, since this bit array is used outside of the serialized code
region in that function, and keeping the domain''s event lock acquired
across pirq_guest_eoi() doesn''t look like a good idea either.
The IRQ- and vector-indexed arrays hanging off struct hvm_irq_dpci
could in fact be changed further to dynamically use the smaller of the
two ranges for indexing, since there are other assumptions about a
one-to-one relationship between IRQs and vectors here and elsewhere.
Additionally, it seems to me that struct hvm_mirq_dpci_mapping''s
digl_list and gmsi fields could really be overlayed, which would yield
significant savings since this structure gets always instanciated in
form of d->nr_pirqs (as per the above could also be the smaller of this
and NR_VECTORS) dimensioned arrays.
This patch will only apply cleanly with the previously sent irq related
cleanups patch applied first.
IA64 built-tested only with the changes to common code.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- 2009-05-19.orig/xen/arch/x86/apic.c 2009-05-20 14:40:35.000000000 +0200
+++ 2009-05-19/xen/arch/x86/apic.c 2009-05-19 16:40:37.000000000 +0200
@@ -890,35 +890,7 @@ __next:
if (boot_cpu_physical_apicid == -1U)
boot_cpu_physical_apicid = get_apic_id();
-#ifdef CONFIG_X86_IO_APIC
- {
- unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
- int i;
-
- for (i = 0; i < nr_ioapics; i++) {
- if (smp_found_config) {
- ioapic_phys = mp_ioapics[i].mpc_apicaddr;
- if (!ioapic_phys) {
- printk(KERN_ERR
- "WARNING: bogus zero IO-APIC "
- "address found in MPTABLE, "
- "disabling IO/APIC support!\n");
- smp_found_config = 0;
- skip_ioapic_setup = 1;
- goto fake_ioapic_page;
- }
- } else {
-fake_ioapic_page:
- ioapic_phys = __pa(alloc_xenheap_page());
- clear_page(__va(ioapic_phys));
- }
- set_fixmap_nocache(idx, ioapic_phys);
- apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx
(%08lx)\n",
- __fix_to_virt(idx), ioapic_phys);
- idx++;
- }
- }
-#endif
+ init_ioapic_mappings();
}
/*****************************************************************************
--- 2009-05-19.orig/xen/arch/x86/domain.c 2009-05-20 14:40:35.000000000 +0200
+++ 2009-05-19/xen/arch/x86/domain.c 2009-05-19 16:40:37.000000000 +0200
@@ -454,6 +454,12 @@ int arch_domain_create(struct domain *d,
share_xen_page_with_guest(
virt_to_page(d->shared_info), d, XENSHARE_writable);
+ d->arch.pirq_vector = xmalloc_array(s16, d->nr_pirqs);
+ if ( !d->arch.pirq_vector )
+ goto fail;
+ memset(d->arch.pirq_vector, 0,
+ d->nr_pirqs * sizeof(*d->arch.pirq_vector));
+
if ( (rc = iommu_domain_init(d)) != 0 )
goto fail;
@@ -488,6 +494,7 @@ int arch_domain_create(struct domain *d,
fail:
d->is_dying = DOMDYING_dead;
+ xfree(d->arch.pirq_vector);
free_xenheap_page(d->shared_info);
if ( paging_initialised )
paging_final_teardown(d);
@@ -523,6 +530,7 @@ void arch_domain_destroy(struct domain *
#endif
free_xenheap_page(d->shared_info);
+ xfree(d->arch.pirq_vector);
}
unsigned long pv_guest_cr4_fixup(unsigned long guest_cr4)
--- 2009-05-19.orig/xen/arch/x86/domain_build.c 2009-05-20 14:40:35.000000000
+0200
+++ 2009-05-19/xen/arch/x86/domain_build.c 2009-05-19 16:40:37.000000000 +0200
@@ -1000,7 +1000,7 @@ int __init construct_dom0(
/* DOM0 is permitted full I/O capabilities. */
rc |= ioports_permit_access(dom0, 0, 0xFFFF);
rc |= iomem_permit_access(dom0, 0UL, ~0UL);
- rc |= irqs_permit_access(dom0, 0, NR_IRQS-1);
+ rc |= irqs_permit_access(dom0, 0, d->nr_pirqs - 1);
/*
* Modify I/O port access permissions.
--- 2009-05-19.orig/xen/arch/x86/i8259.c 2009-05-20 14:40:35.000000000 +0200
+++ 2009-05-19/xen/arch/x86/i8259.c 2009-05-19 16:40:37.000000000 +0200
@@ -403,6 +403,9 @@ void __init init_IRQ(void)
set_intr_gate(i, interrupt[i]);
}
+ irq_vector = xmalloc_array(u8, nr_irqs);
+ memset(irq_vector, 0, nr_irqs * sizeof(*irq_vector));
+
for ( i = 0; i < 16; i++ )
{
vector_irq[LEGACY_VECTOR(i)] = i;
--- 2009-05-19.orig/xen/arch/x86/io_apic.c 2009-05-19 17:13:19.000000000 +0200
+++ 2009-05-19/xen/arch/x86/io_apic.c 2009-05-19 16:40:37.000000000 +0200
@@ -71,8 +71,8 @@ int disable_timer_pin_1 __initdata;
* Rough estimation of how many shared IRQs there are, can
* be changed anytime.
*/
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+#define MAX_PLUS_SHARED_IRQS nr_irqs
+#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + nr_irqs)
/*
* This is performance-critical, we want to do it O(1)
@@ -82,11 +82,10 @@ int disable_timer_pin_1 __initdata;
*/
static struct irq_pin_list {
- int apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE] = {
- [0 ... PIN_MAP_SIZE-1].pin = -1
-};
-static int irq_2_pin_free_entry = NR_IRQS;
+ int apic, pin;
+ unsigned int next;
+} *irq_2_pin;
+static unsigned int irq_2_pin_free_entry;
/*
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
@@ -663,7 +662,7 @@ static inline int IO_APIC_irq_trigger(in
}
/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-u8 irq_vector[NR_IRQS] __read_mostly;
+u8 *irq_vector __read_mostly = (u8 *)(1UL << (BITS_PER_LONG - 1));
static struct hw_interrupt_type ioapic_level_type;
static struct hw_interrupt_type ioapic_edge_type;
@@ -929,7 +928,7 @@ void /*__init*/ __print_IO_APIC(void)
}
printk(KERN_INFO "Using vector-based indexing\n");
printk(KERN_DEBUG "IRQ to pin mappings:\n");
- for (i = 0; i < NR_IRQS; i++) {
+ for (i = 0; i < nr_irqs; i++) {
struct irq_pin_list *entry = irq_2_pin + i;
if (entry->pin < 0)
continue;
@@ -961,24 +960,16 @@ void print_IO_APIC_keyhandler(unsigned c
static void __init enable_IO_APIC(void)
{
- union IO_APIC_reg_01 reg_01;
int i8259_apic, i8259_pin;
int i, apic;
unsigned long flags;
/* Initialise dynamic irq_2_pin free list. */
- for (i = NR_IRQS; i < PIN_MAP_SIZE; i++)
+ irq_2_pin = xmalloc_array(struct irq_pin_list, PIN_MAP_SIZE);
+ memset(irq_2_pin, 0, nr_irqs * sizeof(*irq_2_pin));
+ for (i = irq_2_pin_free_entry = nr_irqs; i < PIN_MAP_SIZE; i++)
irq_2_pin[i].next = i + 1;
- /*
- * The number of IO-APIC IRQ registers (== #pins):
- */
- for (apic = 0; apic < nr_ioapics; apic++) {
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_01.raw = io_apic_read(apic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- nr_ioapic_registers[apic] = reg_01.bits.entries+1;
- }
for(apic = 0; apic < nr_ioapics; apic++) {
int pin;
/* See if any of the pins is in ExtINT mode */
@@ -2174,7 +2165,7 @@ void dump_ioapic_irq_info(void)
unsigned int irq, pin, printed = 0;
unsigned long flags;
- for ( irq = 0; irq < NR_IRQS; irq++ )
+ for ( irq = 0; irq < nr_irqs; irq++ )
{
entry = &irq_2_pin[irq];
if ( entry->pin == -1 )
@@ -2210,3 +2201,55 @@ void dump_ioapic_irq_info(void)
}
}
}
+
+void __init init_ioapic_mappings(void)
+{
+ unsigned long ioapic_phys;
+ unsigned int i, idx = FIX_IO_APIC_BASE_0;
+ union IO_APIC_reg_01 reg_01;
+
+ if ( smp_found_config )
+ nr_irqs = 0;
+ for ( i = 0; i < nr_ioapics; i++ )
+ {
+ if ( smp_found_config )
+ {
+ ioapic_phys = mp_ioapics[i].mpc_apicaddr;
+ if ( !ioapic_phys )
+ {
+ printk(KERN_ERR "WARNING: bogus zero IO-APIC address
"
+ "found in MPTABLE, disabling IO/APIC
support!\n");
+ smp_found_config = 0;
+ skip_ioapic_setup = 1;
+ goto fake_ioapic_page;
+ }
+ }
+ else
+ {
+ fake_ioapic_page:
+ ioapic_phys = __pa(alloc_xenheap_page());
+ clear_page(__va(ioapic_phys));
+ }
+ set_fixmap_nocache(idx, ioapic_phys);
+ apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
+ __fix_to_virt(idx), ioapic_phys);
+ idx++;
+
+ if ( smp_found_config )
+ {
+ /* The number of IO-APIC IRQ registers (== #pins): */
+ reg_01.raw = io_apic_read(i, 1);
+ nr_ioapic_registers[i] = reg_01.bits.entries + 1;
+ nr_irqs += nr_ioapic_registers[i];
+ }
+ }
+ if ( !smp_found_config || skip_ioapic_setup || nr_irqs < 16 )
+ nr_irqs = 16;
+ else if ( nr_irqs > PAGE_SIZE * 8 )
+ {
+ /* for PHYSDEVOP_pirq_eoi_gmfn guest assumptions */
+ printk(KERN_WARNING "Limiting number of IRQs found (%u) to
%lu\n",
+ nr_irqs, PAGE_SIZE * 8);
+ nr_irqs = PAGE_SIZE * 8;
+ }
+}
--- 2009-05-19.orig/xen/arch/x86/irq.c 2009-04-02 09:16:26.000000000 +0200
+++ 2009-05-19/xen/arch/x86/irq.c 2009-05-20 11:46:56.000000000 +0200
@@ -25,6 +25,7 @@
int opt_noirqbalance = 0;
boolean_param("noirqbalance", opt_noirqbalance);
+unsigned int __read_mostly nr_irqs = 16;
irq_desc_t irq_desc[NR_VECTORS];
static DEFINE_SPINLOCK(vector_lock);
@@ -78,7 +79,7 @@ int assign_irq_vector(int irq)
static unsigned current_vector = FIRST_DYNAMIC_VECTOR;
unsigned vector;
- BUG_ON(irq >= NR_IRQS);
+ BUG_ON(irq >= nr_irqs && irq != AUTO_ASSIGN_IRQ);
spin_lock(&vector_lock);
@@ -520,7 +521,7 @@ static void __pirq_guest_eoi(struct doma
int pirq_guest_eoi(struct domain *d, int irq)
{
- if ( (irq < 0) || (irq >= NR_IRQS) )
+ if ( (irq < 0) || (irq >= d->nr_pirqs) )
return -EINVAL;
__pirq_guest_eoi(d, irq);
@@ -530,11 +531,11 @@ int pirq_guest_eoi(struct domain *d, int
int pirq_guest_unmask(struct domain *d)
{
- unsigned int irq;
+ unsigned int irq, nr = d->nr_pirqs;
- for ( irq = find_first_bit(d->pirq_mask, NR_IRQS);
- irq < NR_IRQS;
- irq = find_next_bit(d->pirq_mask, NR_IRQS, irq+1) )
+ for ( irq = find_first_bit(d->pirq_mask, nr);
+ irq < nr;
+ irq = find_next_bit(d->pirq_mask, nr, irq+1) )
{
if ( !test_bit(d->pirq_to_evtchn[irq], &shared_info(d,
evtchn_mask)) )
__pirq_guest_eoi(d, irq);
@@ -879,15 +880,15 @@ int get_free_pirq(struct domain *d, int
if ( type == MAP_PIRQ_TYPE_GSI )
{
- for ( i = 16; i < NR_IRQS; i++ )
+ for ( i = 16; i < nr_irqs; i++ )
if ( !d->arch.pirq_vector[i] )
break;
- if ( i == NR_IRQS )
+ if ( i == nr_irqs )
return -ENOSPC;
}
else
{
- for ( i = NR_IRQS - 1; i >= 16; i-- )
+ for ( i = d->nr_pirqs - 1; i >= 16; i-- )
if ( !d->arch.pirq_vector[i] )
break;
if ( i == 16 )
@@ -913,7 +914,7 @@ int map_domain_pirq(
if ( !IS_PRIV(current->domain) )
return -EPERM;
- if ( pirq < 0 || pirq >= NR_IRQS || vector < 0 || vector >=
NR_VECTORS )
+ if ( pirq < 0 || pirq >= d->nr_pirqs || vector < 0 || vector
>= NR_VECTORS )
{
dprintk(XENLOG_G_ERR, "dom%d: invalid pirq %d or vector
%d\n",
d->domain_id, pirq, vector);
@@ -985,7 +986,7 @@ int unmap_domain_pirq(struct domain *d,
bool_t forced_unbind;
struct msi_desc *msi_desc = NULL;
- if ( (pirq < 0) || (pirq >= NR_IRQS) )
+ if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
return -EINVAL;
if ( !IS_PRIV(current->domain) )
@@ -1057,7 +1058,7 @@ void free_domain_pirqs(struct domain *d)
spin_lock(&pcidevs_lock);
spin_lock(&d->event_lock);
- for ( i = 0; i < NR_IRQS; i++ )
+ for ( i = 0; i < d->nr_pirqs; i++ )
if ( d->arch.pirq_vector[i] > 0 )
unmap_domain_pirq(d, i);
--- 2009-05-19.orig/xen/arch/x86/physdev.c 2009-05-20 14:40:35.000000000 +0200
+++ 2009-05-19/xen/arch/x86/physdev.c 2009-05-19 16:40:37.000000000 +0200
@@ -55,7 +55,7 @@ static int physdev_map_pirq(struct physd
switch ( map->type )
{
case MAP_PIRQ_TYPE_GSI:
- if ( map->index < 0 || map->index >= NR_IRQS )
+ if ( map->index < 0 || map->index >= nr_irqs )
{
dprintk(XENLOG_G_ERR, "dom%d: map invalid irq %d\n",
d->domain_id, map->index);
@@ -196,7 +196,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
if ( copy_from_guest(&eoi, arg, 1) != 0 )
break;
ret = -EINVAL;
- if ( eoi.irq < 0 || eoi.irq >= NR_IRQS )
+ if ( eoi.irq < 0 || eoi.irq >= v->domain->nr_pirqs )
break;
if ( v->domain->arch.pirq_eoi_map )
evtchn_unmask(v->domain->pirq_to_evtchn[eoi.irq]);
@@ -208,8 +208,6 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
struct physdev_pirq_eoi_gmfn info;
unsigned long mfn;
- BUILD_BUG_ON(NR_IRQS > (PAGE_SIZE * 8));
-
ret = -EFAULT;
if ( copy_from_guest(&info, arg, 1) != 0 )
break;
@@ -254,7 +252,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
break;
irq = irq_status_query.irq;
ret = -EINVAL;
- if ( (irq < 0) || (irq >= NR_IRQS) )
+ if ( (irq < 0) || (irq >= v->domain->nr_pirqs) )
break;
irq_status_query.flags = 0;
/*
@@ -346,7 +344,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
irq = irq_op.irq;
ret = -EINVAL;
- if ( (irq < 0) || (irq >= NR_IRQS) )
+ if ( (irq < 0) || (irq >= nr_irqs) )
break;
irq_op.vector = assign_irq_vector(irq);
--- 2009-05-19.orig/xen/common/domain.c 2009-05-20 14:40:35.000000000 +0200
+++ 2009-05-19/xen/common/domain.c 2009-05-19 16:40:37.000000000 +0200
@@ -8,6 +8,7 @@
#include <xen/compat.h>
#include <xen/init.h>
#include <xen/lib.h>
+#include <xen/ctype.h>
#include <xen/errno.h>
#include <xen/sched.h>
#include <xen/domain.h>
@@ -197,6 +198,16 @@ struct vcpu *alloc_idle_vcpu(unsigned in
return v;
}
+static unsigned int extra_dom0_irqs, extra_domU_irqs = 8;
+static void __init parse_extra_guest_irqs(const char *s)
+{
+ if ( isdigit(*s) )
+ extra_domU_irqs = simple_strtoul(s, &s, 0);
+ if ( *s == '','' && isdigit(*++s) )
+ extra_dom0_irqs = simple_strtoul(s, &s, 0);
+}
+custom_param("extra_guest_irqs", parse_extra_guest_irqs);
+
struct domain *domain_create(
domid_t domid, unsigned int domcr_flags, ssidref_t ssidref)
{
@@ -243,9 +254,19 @@ struct domain *domain_create(
d->is_paused_by_controller = 1;
atomic_inc(&d->pause_count);
+ d->nr_pirqs = nr_irqs +
+ (domid ? extra_domU_irqs :
+ extra_dom0_irqs ?: nr_irqs);
if ( evtchn_init(d) != 0 )
goto fail;
init_status |= INIT_evtchn;
+ d->pirq_to_evtchn = xmalloc_array(u16, d->nr_pirqs);
+ d->pirq_mask = xmalloc_array(unsigned long,
+ BITS_TO_LONGS(d->nr_pirqs));
+ if ( !d->pirq_to_evtchn || !d->pirq_mask )
+ goto fail;
+ memset(d->pirq_to_evtchn, 0, d->nr_pirqs *
sizeof(*d->pirq_to_evtchn));
+ bitmap_zero(d->pirq_mask, d->nr_pirqs);
if ( grant_table_create(d) != 0 )
goto fail;
@@ -288,7 +309,11 @@ struct domain *domain_create(
if ( init_status & INIT_gnttab )
grant_table_destroy(d);
if ( init_status & INIT_evtchn )
+ {
+ xfree(d->pirq_mask);
+ xfree(d->pirq_to_evtchn);
evtchn_destroy(d);
+ }
if ( init_status & INIT_rangeset )
rangeset_domain_destroy(d);
if ( init_status & INIT_xsm )
--- 2009-05-19.orig/xen/common/domctl.c 2009-05-20 14:40:35.000000000 +0200
+++ 2009-05-19/xen/common/domctl.c 2009-05-19 16:40:37.000000000 +0200
@@ -774,16 +774,14 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
struct domain *d;
unsigned int pirq = op->u.irq_permission.pirq;
- ret = -EINVAL;
- if ( pirq >= NR_IRQS )
- break;
-
ret = -ESRCH;
d = rcu_lock_domain_by_id(op->domain);
if ( d == NULL )
break;
- if ( op->u.irq_permission.allow_access )
+ if ( pirq >= d->nr_pirqs )
+ ret = -EINVAL;
+ else if ( op->u.irq_permission.allow_access )
ret = irq_permit_access(d, pirq);
else
ret = irq_deny_access(d, pirq);
--- 2009-05-19.orig/xen/common/event_channel.c 2009-05-20 14:40:35.000000000
+0200
+++ 2009-05-19/xen/common/event_channel.c 2009-05-19 16:40:37.000000000 +0200
@@ -302,7 +302,7 @@ static long evtchn_bind_pirq(evtchn_bind
int port, pirq = bind->pirq;
long rc;
- if ( (pirq < 0) || (pirq >= ARRAY_SIZE(d->pirq_to_evtchn)) )
+ if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
return -EINVAL;
if ( !irq_access_permitted(d, pirq) )
--- 2009-05-19.orig/xen/drivers/passthrough/io.c 2009-05-20 14:40:35.000000000
+0200
+++ 2009-05-19/xen/drivers/passthrough/io.c 2009-05-20 14:42:07.000000000 +0200
@@ -38,9 +38,10 @@ static void pt_irq_time_out(void *data)
struct dev_intx_gsi_link *digl;
struct hvm_girq_dpci_mapping *girq;
uint32_t device, intx;
- DECLARE_BITMAP(machine_gsi_map, NR_IRQS);
+ unsigned int nr_pirqs = irq_map->dom->nr_pirqs;
+ DECLARE_BITMAP(machine_gsi_map, nr_pirqs);
- bitmap_zero(machine_gsi_map, NR_IRQS);
+ bitmap_zero(machine_gsi_map, nr_pirqs);
spin_lock(&irq_map->dom->event_lock);
@@ -59,9 +60,9 @@ static void pt_irq_time_out(void *data)
hvm_pci_intx_deassert(irq_map->dom, device, intx);
}
- for ( machine_gsi = find_first_bit(machine_gsi_map, NR_IRQS);
- machine_gsi < NR_IRQS;
- machine_gsi = find_next_bit(machine_gsi_map, NR_IRQS,
+ for ( machine_gsi = find_first_bit(machine_gsi_map, nr_pirqs);
+ machine_gsi < nr_pirqs;
+ machine_gsi = find_next_bit(machine_gsi_map, nr_pirqs,
machine_gsi + 1) )
{
clear_bit(machine_gsi, dpci->dirq_mask);
@@ -71,15 +72,23 @@ static void pt_irq_time_out(void *data)
spin_unlock(&irq_map->dom->event_lock);
- for ( machine_gsi = find_first_bit(machine_gsi_map, NR_IRQS);
- machine_gsi < NR_IRQS;
- machine_gsi = find_next_bit(machine_gsi_map, NR_IRQS,
+ for ( machine_gsi = find_first_bit(machine_gsi_map, nr_pirqs);
+ machine_gsi < nr_pirqs;
+ machine_gsi = find_next_bit(machine_gsi_map, nr_pirqs,
machine_gsi + 1) )
{
pirq_guest_eoi(irq_map->dom, machine_gsi);
}
}
+void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci)
+{
+ xfree(dpci->mirq);
+ xfree(dpci->dirq_mask);
+ xfree(dpci->mapping);
+ xfree(dpci);
+}
+
int pt_irq_create_bind_vtd(
struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
{
@@ -90,7 +99,7 @@ int pt_irq_create_bind_vtd(
struct hvm_girq_dpci_mapping *girq;
int rc, pirq = pt_irq_bind->machine_irq;
- if ( pirq < 0 || pirq >= NR_IRQS )
+ if ( pirq < 0 || pirq >= d->nr_pirqs )
return -EINVAL;
spin_lock(&d->event_lock);
@@ -105,16 +114,33 @@ int pt_irq_create_bind_vtd(
return -ENOMEM;
}
memset(hvm_irq_dpci, 0, sizeof(*hvm_irq_dpci));
- for ( int i = 0; i < NR_IRQS; i++ )
+ hvm_irq_dpci->mirq = xmalloc_array(struct hvm_mirq_dpci_mapping,
+ d->nr_pirqs);
+ hvm_irq_dpci->dirq_mask = xmalloc_array(unsigned long,
+ BITS_TO_LONGS(d->nr_pirqs));
+ hvm_irq_dpci->mapping = xmalloc_array(unsigned long,
+ BITS_TO_LONGS(d->nr_pirqs));
+ if ( !hvm_irq_dpci->mirq ||
+ !hvm_irq_dpci->dirq_mask ||
+ !hvm_irq_dpci->mapping )
{
+ spin_unlock(&d->event_lock);
+ free_hvm_irq_dpci(hvm_irq_dpci);
+ return -ENOMEM;
+ }
+ memset(hvm_irq_dpci->mirq, 0,
+ d->nr_pirqs * sizeof(*hvm_irq_dpci->mirq));
+ bitmap_zero(hvm_irq_dpci->dirq_mask, d->nr_pirqs);
+ bitmap_zero(hvm_irq_dpci->mapping, d->nr_pirqs);
+ for ( int i = 0; i < d->nr_pirqs; i++ )
INIT_LIST_HEAD(&hvm_irq_dpci->mirq[i].digl_list);
+ for ( int i = 0; i < NR_HVM_IRQS; i++ )
INIT_LIST_HEAD(&hvm_irq_dpci->girq[i]);
- }
if ( domain_set_irq_dpci(d, hvm_irq_dpci) == 0 )
{
spin_unlock(&d->event_lock);
- xfree(hvm_irq_dpci);
+ free_hvm_irq_dpci(hvm_irq_dpci);
return -EINVAL;
}
}
@@ -364,7 +390,7 @@ static void __msi_pirq_eoi(struct domain
struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
irq_desc_t *desc;
- if ( ( pirq >= 0 ) && ( pirq < NR_IRQS ) &&
+ if ( ( pirq >= 0 ) && ( pirq < d->nr_pirqs ) &&
test_bit(pirq, hvm_irq_dpci->mapping) &&
( hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_MACH_MSI) )
{
@@ -414,9 +440,9 @@ void hvm_dirq_assist(struct vcpu *v)
if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
return;
- for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS);
- irq < NR_IRQS;
- irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) )
+ for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, d->nr_pirqs);
+ irq < d->nr_pirqs;
+ irq = find_next_bit(hvm_irq_dpci->dirq_mask, d->nr_pirqs, irq +
1) )
{
if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) )
continue;
--- 2009-05-19.orig/xen/drivers/passthrough/pci.c 2009-05-20 14:40:35.000000000
+0200
+++ 2009-05-19/xen/drivers/passthrough/pci.c 2009-05-19 16:40:37.000000000 +0200
@@ -201,9 +201,9 @@ static void pci_clean_dpci_irqs(struct d
hvm_irq_dpci = domain_get_irq_dpci(d);
if ( hvm_irq_dpci != NULL )
{
- for ( i = find_first_bit(hvm_irq_dpci->mapping, NR_IRQS);
- i < NR_IRQS;
- i = find_next_bit(hvm_irq_dpci->mapping, NR_IRQS, i + 1) )
+ for ( i = find_first_bit(hvm_irq_dpci->mapping, d->nr_pirqs);
+ i < d->nr_pirqs;
+ i = find_next_bit(hvm_irq_dpci->mapping, d->nr_pirqs, i +
1) )
{
pirq_guest_unbind(d, i);
kill_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(i)]);
@@ -219,7 +219,7 @@ static void pci_clean_dpci_irqs(struct d
}
d->arch.hvm_domain.irq.dpci = NULL;
- xfree(hvm_irq_dpci);
+ free_hvm_irq_dpci(hvm_irq_dpci);
}
spin_unlock(&d->event_lock);
}
--- 2009-05-19.orig/xen/drivers/passthrough/vtd/x86/vtd.c 2009-05-20
14:40:35.000000000 +0200
+++ 2009-05-19/xen/drivers/passthrough/vtd/x86/vtd.c 2009-05-19
16:40:37.000000000 +0200
@@ -130,9 +130,9 @@ void hvm_dpci_isairq_eoi(struct domain *
return;
}
/* Multiple mirq may be mapped to one isa irq */
- for ( i = find_first_bit(dpci->mapping, NR_IRQS);
- i < NR_IRQS;
- i = find_next_bit(dpci->mapping, NR_IRQS, i + 1) )
+ for ( i = find_first_bit(dpci->mapping, d->nr_pirqs);
+ i < d->nr_pirqs;
+ i = find_next_bit(dpci->mapping, d->nr_pirqs, i + 1) )
{
list_for_each_entry_safe ( digl, tmp,
&dpci->mirq[i].digl_list, list )
--- 2009-05-19.orig/xen/include/asm-x86/config.h 2009-05-20 14:40:35.000000000
+0200
+++ 2009-05-19/xen/include/asm-x86/config.h 2009-05-19 16:40:37.000000000 +0200
@@ -52,12 +52,6 @@
#define NR_CPUS 32
#endif
-#ifdef MAX_PHYS_IRQS
-#define NR_IRQS MAX_PHYS_IRQS
-#else
-#define NR_IRQS 256
-#endif
-
#if defined(__i386__) && (NR_CPUS > 32)
#error "Maximum of 32 physical processors supported by Xen on x86_32"
#endif
--- 2009-05-19.orig/xen/include/asm-x86/domain.h 2009-05-20 14:40:35.000000000
+0200
+++ 2009-05-19/xen/include/asm-x86/domain.h 2009-05-19 16:40:37.000000000 +0200
@@ -263,7 +263,7 @@ struct arch_domain
/* NB. protected by d->event_lock and by irq_desc[vector].lock */
int vector_pirq[NR_VECTORS];
- s16 pirq_vector[NR_IRQS];
+ s16 *pirq_vector;
/* Shared page for notifying that explicit PIRQ EOI is required. */
unsigned long *pirq_eoi_map;
--- 2009-05-19.orig/xen/include/asm-x86/io_apic.h 2009-05-19 17:13:48.000000000
+0200
+++ 2009-05-19/xen/include/asm-x86/io_apic.h 2009-05-19 17:15:22.000000000 +0200
@@ -175,11 +175,14 @@ extern int io_apic_set_pci_routing (int
extern int timer_uses_ioapic_pin_0;
#endif /*CONFIG_ACPI_BOOT*/
+extern void init_ioapic_mappings(void);
+
extern int (*ioapic_renumber_irq)(int ioapic, int irq);
extern void ioapic_suspend(void);
extern void ioapic_resume(void);
#else /* !CONFIG_X86_IO_APIC */
+static inline void init_ioapic_mappings(void) {}
static inline void ioapic_suspend(void) {}
static inline void ioapic_resume(void) {}
#endif
--- 2009-05-19.orig/xen/include/asm-x86/irq.h 2009-04-02 09:16:35.000000000
+0200
+++ 2009-05-19/xen/include/asm-x86/irq.h 2009-05-19 16:40:37.000000000 +0200
@@ -18,7 +18,7 @@
#define vector_to_irq(vec) (vector_irq[vec])
extern int vector_irq[NR_VECTORS];
-extern u8 irq_vector[NR_IRQS];
+extern u8 *irq_vector;
#define platform_legacy_irq(irq) ((irq) < 16)
--- 2009-05-19.orig/xen/include/xen/hvm/irq.h 2009-05-20 14:40:35.000000000
+0200
+++ 2009-05-19/xen/include/xen/hvm/irq.h 2009-05-19 16:40:37.000000000 +0200
@@ -68,21 +68,26 @@ struct hvm_girq_dpci_mapping {
#define NR_ISAIRQS 16
#define NR_LINK 4
+#if defined(__i386__) || defined(__x86_64__)
+# define NR_HVM_IRQS VIOAPIC_NUM_PINS
+#elif defined(__ia64__)
+# define NR_HVM_IRQS VIOSAPIC_NUM_PINS
+#endif
/* Protected by domain''s event_lock */
struct hvm_irq_dpci {
/* Machine IRQ to guest device/intx mapping. */
- DECLARE_BITMAP(mapping, NR_IRQS);
- struct hvm_mirq_dpci_mapping mirq[NR_IRQS];
+ unsigned long *mapping;
+ struct hvm_mirq_dpci_mapping *mirq;
+ unsigned long *dirq_mask;
/* Guest IRQ to guest device/intx mapping. */
- struct list_head girq[NR_IRQS];
- uint8_t msi_gvec_pirq[NR_VECTORS];
- DECLARE_BITMAP(dirq_mask, NR_IRQS);
+ struct list_head girq[NR_HVM_IRQS];
+ uint8_t msi_gvec_pirq[0x100];
/* Record of mapped ISA IRQs */
DECLARE_BITMAP(isairq_map, NR_ISAIRQS);
/* Record of mapped Links */
uint8_t link_cnt[NR_LINK];
- struct timer hvm_timer[NR_IRQS];
+ struct timer hvm_timer[NR_VECTORS];
};
/* Modify state of a PCI INTx wire. */
--- 2009-05-19.orig/xen/include/xen/iommu.h 2009-05-20 14:40:35.000000000 +0200
+++ 2009-05-19/xen/include/xen/iommu.h 2009-05-19 16:40:37.000000000 +0200
@@ -91,6 +91,7 @@ struct iommu_flush *iommu_get_flush(stru
void hvm_dpci_isairq_eoi(struct domain *d, unsigned int isairq);
struct hvm_irq_dpci *domain_get_irq_dpci(struct domain *domain);
int domain_set_irq_dpci(struct domain *domain, struct hvm_irq_dpci *dpci);
+void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci);
#define PT_IRQ_TIME_OUT MILLISECS(8)
#define VTDPREFIX "[VT-D]"
--- 2009-05-19.orig/xen/include/xen/irq.h 2009-05-20 14:40:35.000000000 +0200
+++ 2009-05-19/xen/include/xen/irq.h 2009-05-19 16:40:37.000000000 +0200
@@ -49,6 +49,12 @@ typedef struct hw_interrupt_type hw_irq_
#include <asm/irq.h>
+#ifdef NR_IRQS
+# define nr_irqs NR_IRQS
+#else
+extern unsigned int nr_irqs;
+#endif
+
struct msi_desc;
/*
* This is the "IRQ descriptor", which contains various information
--- 2009-05-19.orig/xen/include/xen/sched.h 2009-05-20 14:40:35.000000000 +0200
+++ 2009-05-19/xen/include/xen/sched.h 2009-05-19 16:40:37.000000000 +0200
@@ -200,8 +200,9 @@ struct domain
* domain''s event-channel spinlock. Read accesses can also
synchronise on
* the lock, but races don''t usually matter.
*/
- u16 pirq_to_evtchn[NR_IRQS];
- DECLARE_BITMAP(pirq_mask, NR_IRQS);
+ unsigned int nr_pirqs;
+ u16 *pirq_to_evtchn;
+ unsigned long *pirq_mask;
/* I/O capabilities (access to IRQs and memory-mapped I/O). */
struct rangeset *iomem_caps;
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Keir Fraser
2009-May-20 13:48 UTC
Re: [Xen-devel] [PATCH] x86: eliminate hard-coded NR_IRQS
On 20/05/2009 06:16, "Jan Beulich" <JBeulich@novell.com> wrote:> ... splitting it into global nr_irqs (determined at boot time) and per- > domain nr_pirqs (derived from nr_irqs and a possibly command line > specified value, which probably should later become a per-domain config > setting). > > This has the (desirable imo) side effect of reducing the size of struct > hvm_irq_dpci from requiring an order-3 page to order-2 (on x86-64), > which nevertheless still is too large.Erm, well I''m not sure about this patch. Your single stated motivation, to reduce some struct sizes, could also be addressed by replacing arrays with other really simple alternatives like hash tables or radix trees. Or replacing in-place arrays with pointers to arrays (which obviously you do in some places out of necessity in your patch, but that could equally be done without making nr_irqs/nr_pirqs dynamic). Does it make sense to have nr_pirqs > nr_irqs? Are you thinking of a shared irq being exposed to a guest as non-shared multiple pirqs? Basically I thought the setting of nr_pirqs based on nr_irqs plus some command-line values looks a bit bizarre and kludgy and I''m not sure what the usage scenario is there. -- Keir _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel
Jan Beulich
2009-May-20 14:33 UTC
Re: [Xen-devel] [PATCH] x86: eliminate hard-coded NR_IRQS
>>> Keir Fraser <keir.fraser@eu.citrix.com> 20.05.09 15:48 >>> >On 20/05/2009 06:16, "Jan Beulich" <JBeulich@novell.com> wrote: > >> ... splitting it into global nr_irqs (determined at boot time) and per- >> domain nr_pirqs (derived from nr_irqs and a possibly command line >> specified value, which probably should later become a per-domain config >> setting). >> >> This has the (desirable imo) side effect of reducing the size of struct >> hvm_irq_dpci from requiring an order-3 page to order-2 (on x86-64), >> which nevertheless still is too large. > >Erm, well I''m not sure about this patch. Your single stated motivation, to >reduce some struct sizes, could also be addressed by replacing arrays with >other really simple alternatives like hash tables or radix trees. Or >replacing in-place arrays with pointers to arrays (which obviously you do in >some places out of necessity in your patch, but that could equally be done >without making nr_irqs/nr_pirqs dynamic).No, that wasn''t the motivation - as I said this is just a nice side effect.>Does it make sense to have nr_pirqs > nr_irqs? Are you thinking of a sharedYes - nr_irqs is only what comes through the IO-APIC. nr_pirqs includes MSI sources (which only require vectors, and with vector and irq spaces now properly separated, including them in the irq space is no longer needed). Thus nr_pirqs generally *must* be larger than nr_irqs.>irq being exposed to a guest as non-shared multiple pirqs? Basically I >thought the setting of nr_pirqs based on nr_irqs plus some command-line >values looks a bit bizarre and kludgy and I''m not sure what the usage >scenario is there.Jan _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel