thr3ads.net - Xen devel - [PATCH 0/7] xen/arm: run on real hardware [Oct 2012]

If this information is useful, please help other people find it:
Share via:

Stefano Stabellini

2012-Oct-24 15:03 UTC

[PATCH 0/7] xen/arm: run on real hardware

Hi all,
this is a collection of fixes that I wrote trying to run Xen on a real
Versatile Express Cortex A15 machine.


Stefano Stabellini (7):
      xen/arm: fix rank calculation in vgic_vcpu_inject_irq
      xen/arm: setup the fixmap in head.S
      pl011: set baud and clock_hz to the right defaults for Versatile Express
      xen/arm: set the SMP bit in the ACTLR register
      xen/arm: wake up secondary cpus
      xen/arm: flush D-cache and I-cache when appropriate
      xen/arm: get the number of cpus from device tree

 xen/arch/arm/early_printk.c     |    5 +--
 xen/arch/arm/gic.c              |    4 +--
 xen/arch/arm/gic.h              |    2 +-
 xen/arch/arm/head.S             |   55 ++++++++++++++++++++++++++++----------
 xen/arch/arm/mm.c               |   16 ++++++++++-
 xen/arch/arm/mode_switch.S      |   31 ++++++++++++++++++++++
 xen/arch/arm/setup.c            |    5 +--
 xen/arch/arm/smpboot.c          |    2 +
 xen/arch/arm/vgic.c             |    2 +-
 xen/common/device_tree.c        |   20 ++++++++++++++
 xen/drivers/char/pl011.c        |    4 +-
 xen/include/asm-arm/page.h      |   14 ++++++++++
 xen/include/asm-arm/processor.h |   30 +++++++++++++++++++++
 xen/include/xen/device_tree.h   |    1 +
 14 files changed, 161 insertions(+), 30 deletions(-)

Cheers,

Stefano

Stefano Stabellini

2012-Oct-24 15:03 UTC

head link

[PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

Each rank holds 32 irqs, so we should divide by 32 rather than by 4.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 xen/arch/arm/vgic.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
index 3c3983f..5eae61c 100644
--- a/xen/arch/arm/vgic.c
+++ b/xen/arch/arm/vgic.c
@@ -577,7 +577,7 @@ struct pending_irq *irq_to_pending(struct vcpu *v, unsigned
int irq)
 
 void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int irq, int virtual)
 {
-    int idx = irq >> 2, byte = irq & 0x3;
+    int idx = irq / 32, byte = irq & 0x3;
     uint8_t priority;
     struct vgic_irq_rank *rank = vgic_irq_rank(v, 8, idx);
     struct pending_irq *iter, *n = irq_to_pending(v, irq);
-- 
1.7.2.5

Stefano Stabellini

2012-Oct-24 15:03 UTC

head link

[PATCH 2/7] xen/arm: setup the fixmap in head.S

Setup the fixmap mapping directly in head.S rather than having a
temporary mapping only to re-do it later in C.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 xen/arch/arm/early_printk.c |    5 ++---
 xen/arch/arm/head.S         |   32 +++++++++++++++++++-------------
 xen/arch/arm/mm.c           |    2 +-
 xen/arch/arm/setup.c        |    2 --
 4 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/xen/arch/arm/early_printk.c b/xen/arch/arm/early_printk.c
index 3e51252..bdf4c0e 100644
--- a/xen/arch/arm/early_printk.c
+++ b/xen/arch/arm/early_printk.c
@@ -17,12 +17,11 @@
 
 #ifdef EARLY_UART_ADDRESS
 
-static void __init early_putch(char c)
+void __init early_putch(char c)
 {
     volatile uint32_t *r;
 
-    r = (uint32_t *)((EARLY_UART_ADDRESS & 0x001fffff)
-                     + XEN_VIRT_START + (1 << 21));
+    r = (uint32_t *)(XEN_VIRT_START + (1 << 21));
 
     /* XXX: assuming a PL011 UART. */
     while(*(r + 0x6) & 0x8)
diff --git a/xen/arch/arm/head.S b/xen/arch/arm/head.S
index 9fdeda7..3d01be0 100644
--- a/xen/arch/arm/head.S
+++ b/xen/arch/arm/head.S
@@ -24,6 +24,7 @@
 #define PT_PT  0xe7f /* nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=111, T=1, P=1 */
 #define PT_MEM 0xe7d /* nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=111, T=0, P=1 */
 #define PT_DEV 0xe71 /* nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=100, T=0, P=1 */
+#define PT_DEV_L3 0xe73 /* lev3: nG=1, AF=1, SH=10, AP=01, NS=1, ATTR=100, T=1,
P=1 */
 
 #define PT_UPPER(x) (PT_##x & 0xf00)
 #define PT_LOWER(x) (PT_##x & 0x0ff)
@@ -183,6 +184,16 @@ skip_bss:
 	teq   r12, #0
 	bne   pt_ready
 	
+	/* console fixmap */
+	ldr   r1, =xen_fixmap
+	add   r1, r1, r10            /* r1 := paddr (xen_fixmap) */
+	mov   r3, #0
+	lsr   r2, r11, #12
+	lsl   r2, r2, #12            /* 4K aligned paddr of UART */
+	orr   r2, r2, #PT_UPPER(DEV_L3)
+	orr   r2, r2, #PT_LOWER(DEV_L3) /* r2:r3 := 4K dev map including UART */
+	strd  r2, r3, [r1, #0]       /* Map it in the first fixmap''s slot */
+	
 	/* Build the baseline idle pagetable''s first-level entries */
 	ldr   r1, =xen_second
 	add   r1, r1, r10            /* r1 := paddr (xen_second) */
@@ -205,12 +216,13 @@ skip_bss:
 	ldr   r4, =start
 	lsr   r4, #18                /* Slot for vaddr(start) */
 	strd  r2, r3, [r1, r4]       /* Map Xen there too */
+
 #ifdef EARLY_UART_ADDRESS
-	ldr   r3, =(1<<(54-32))      /* NS for device mapping */
-	lsr   r2, r11, #21
-	lsl   r2, r2, #21            /* 2MB-aligned paddr of UART */
-	orr   r2, r2, #PT_UPPER(DEV)
-	orr   r2, r2, #PT_LOWER(DEV) /* r2:r3 := 2MB dev map including UART */
+	/* xen_fixmap pagetable */
+	ldr r2, =xen_fixmap
+	add r2, r2, r10
+	orr   r2, r2, #PT_UPPER(PT)
+	orr   r2, r2, #PT_LOWER(PT) /* r2:r3 := paddr (xen_fixmap) */
 	add   r4, r4, #8
 	strd  r2, r3, [r1, r4]       /* Map it in the fixmap''s slot */
 #else
@@ -236,13 +248,9 @@ pt_ready:
 	mov   pc, r1                 /* Get a proper vaddr into PC */
 paging:
 
+	
 #ifdef EARLY_UART_ADDRESS
-	/* Recover the UART address in the new address space. */
-	lsl   r11, #11
-	lsr   r11, #11               /* UART base''s offset from 2MB base */
-	adr   r0, start
-	add   r0, r0, #0x200000      /* vaddr of the fixmap''s 2MB slot */
-	add   r11, r11, r0           /* r11 := vaddr (UART base address) */
+	ldr r11, =FIXMAP_ADDR(FIXMAP_CONSOLE)
 #endif
 
 	PRINT("- Ready -\r\n")
@@ -261,8 +269,6 @@ paging:
 	mcr   CP32(r0, BPIALL)       /* Flush branch predictor */
 	dsb                          /* Ensure completion of TLB+BP flush */
 	isb
- 	/* Now, the UART is in its proper fixmap address */
-	ldrne r11, =FIXMAP_ADDR(FIXMAP_CONSOLE)
 
 	/* Non-boot CPUs report that they''ve got this far */
 	ldr   r0, =ready_cpus
diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index b0068d2..d0cd2c9 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -37,7 +37,7 @@ struct domain *dom_xen, *dom_io;
 /* Static start-of-day pagetables that we use before the allocators are up */
 lpae_t xen_pgtable[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
 lpae_t xen_second[LPAE_ENTRIES*4] __attribute__((__aligned__(4096*4)));
-static lpae_t xen_fixmap[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
+lpae_t xen_fixmap[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
 static lpae_t xen_xenmap[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
 
 /* Non-boot CPUs use this to find the correct pagetables. */
diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
index 7568968..6fbcb81 100644
--- a/xen/arch/arm/setup.c
+++ b/xen/arch/arm/setup.c
@@ -194,9 +194,7 @@ void __init start_xen(unsigned long boot_phys_offset,
     setup_pagetables(boot_phys_offset, get_xen_paddr());
 
 #ifdef EARLY_UART_ADDRESS
-    /* Map the UART */
     /* TODO Need to get device tree or command line for UART address */
-    set_fixmap(FIXMAP_CONSOLE, EARLY_UART_ADDRESS >> PAGE_SHIFT,
DEV_SHARED);
     pl011_init(0, FIXMAP_ADDR(FIXMAP_CONSOLE));
     console_init_preirq();
 #endif
-- 
1.7.2.5

Stefano Stabellini

2012-Oct-24 15:03 UTC

head link

[PATCH 3/7] pl011: set baud and clock_hz to the right defaults for Versatile Express

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 xen/drivers/char/pl011.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/xen/drivers/char/pl011.c b/xen/drivers/char/pl011.c
index 6af45aa..6ccb73a 100644
--- a/xen/drivers/char/pl011.c
+++ b/xen/drivers/char/pl011.c
@@ -241,8 +241,8 @@ void __init pl011_init(int index, unsigned long
register_base_address)
 
     uart = &pl011_com[index];
 
-    uart->clock_hz  = 7372800;
-    uart->baud      = 115200;
+    uart->clock_hz  = 0x16e3600;
+    uart->baud      = 38400;
     uart->data_bits = 8;
     uart->parity    = PARITY_NONE;
     uart->stop_bits = 1;
-- 
1.7.2.5

Stefano Stabellini

2012-Oct-24 15:03 UTC

head link

[PATCH 4/7] xen/arm: set the SMP bit in the ACTLR register

From the Cortex A15 manual:

"Enables the processor to receive instruction cache, BTB, and TLB
maintenance
operations from other processors"

...

"You must set this bit before enabling the caches and MMU, or
performing any cache and TLB maintenance operations. The only time
you must clear this bit is during a processor power-down sequence"

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 xen/arch/arm/head.S             |    6 ++++++
 xen/include/asm-arm/processor.h |   30 ++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 0 deletions(-)

diff --git a/xen/arch/arm/head.S b/xen/arch/arm/head.S
index 3d01be0..c784f4d 100644
--- a/xen/arch/arm/head.S
+++ b/xen/arch/arm/head.S
@@ -148,6 +148,12 @@ skip_bss:
 
 	PRINT("- Setting up control registers -\r\n")
 	
+	/* XXX: Cortex A15 specific */
+	/* Set up the SMP bit in ACTLR */
+	mrc   CP32(r0, ACTLR)
+	orr   r0, r0, #(ACTLR_SMP) /* enable SMP bit*/
+	mcr   CP32(r0, ACTLR)
+
 	/* Set up memory attribute type tables */
 	ldr   r0, =MAIR0VAL
 	ldr   r1, =MAIR1VAL
diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
index 3849b23..91a5836 100644
--- a/xen/include/asm-arm/processor.h
+++ b/xen/include/asm-arm/processor.h
@@ -34,6 +34,36 @@
 #define SCTLR_A         (1<<1)
 #define SCTLR_M         (1<<0)
 
+/* ACTLR Auxiliary Control Register */
+#define ACTLR_SNOOP_DELAYED      (1<<31)
+#define ACTLR_MAIN_CLOCK         (1<<30)
+#define ACTLR_NEON_CLOCK         (1<<29)
+#define ACTLR_NONCACHE           (1<<24)
+#define ACTLR_INORDER_REQ        (1<<23)
+#define ACTLR_INORDER_LOAD       (1<<22)
+#define ACTLR_L2_TLB_PREFETCH    (1<<21)
+#define ACTLR_L2_IPA_PA_CACHE    (1<<20)
+#define ACTLR_L2_CACHE           (1<<19)
+#define ACTLR_L2_PA_CACHE        (1<<18)
+#define ACTLR_TLB                (1<<17)
+#define ACTLR_STRONGY_ORDERED    (1<<16)
+#define ACTLR_INORDER            (1<<15)
+#define ACTLR_FORCE_LIM          (1<<14)
+#define ACTLR_CP_FLUSH           (1<<13)
+#define ACTLR_CP_PUSH            (1<<12)
+#define ACTLR_LIM                (1<<11)
+#define ACTLR_SER                (1<<10)
+#define ACTLR_OPT                (1<<9)
+#define ACTLR_WFI                (1<<8)
+#define ACTLR_WFE                (1<<7)
+#define ACTLR_SMP                (1<<6)
+#define ACTLR_PLD                (1<<5)
+#define ACTLR_IP                 (1<<4)
+#define ACTLR_MICRO_BTB          (1<<3)
+#define ACTLR_LOOP_ONE           (1<<2)
+#define ACTLR_LOOP_DISABLE       (1<<1)
+#define ACTLR_BTB                (1<<0)
+
 #define SCTLR_BASE        0x00c50078
 #define HSCTLR_BASE       0x30c51878
 
-- 
1.7.2.5

Stefano Stabellini

2012-Oct-24 15:03 UTC

head link

[PATCH 5/7] xen/arm: wake up secondary cpus

Secondary cpus are held by the firmware until we send an IPI to them.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 xen/arch/arm/head.S        |    8 ++++++--
 xen/arch/arm/mode_switch.S |   31 +++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/xen/arch/arm/head.S b/xen/arch/arm/head.S
index c784f4d..39c4774 100644
--- a/xen/arch/arm/head.S
+++ b/xen/arch/arm/head.S
@@ -79,12 +79,12 @@ start:
 	beq   boot_cpu               /* If we''re CPU 0, boot now */
 
 	/* Non-boot CPUs wait here to be woken up one at a time. */
-1:	wfe
-	dsb
+1:	dsb
 	ldr   r0, =smp_up_cpu        /* VA of gate */
 	add   r0, r0, r10            /* PA of gate */
 	ldr   r1, [r0]               /* Which CPU is being booted? */
 	teq   r1, r12                /* Is it us? */
+	wfene
 	bne   1b
 
 boot_cpu:
@@ -98,6 +98,10 @@ boot_cpu:
 	PRINT(" booting -\r\n")
 #endif
 
+	/* Wake up secondary cpus */
+	teq   r12, #0
+	bleq  kick_cpus
+
 	/* Check that this CPU has Hyp mode */
 	mrc   CP32(r0, ID_PFR1)
 	and   r0, r0, #0xf000        /* Bits 12-15 define virt extensions */
diff --git a/xen/arch/arm/mode_switch.S b/xen/arch/arm/mode_switch.S
index 83a682b..39d80e8 100644
--- a/xen/arch/arm/mode_switch.S
+++ b/xen/arch/arm/mode_switch.S
@@ -21,6 +21,37 @@
 #include <asm/page.h>
 #include <asm/asm_defns.h>
 
+/* XXX: Versatile Express specific code */
+/* wake up secondary cpus */
+.globl kick_cpus
+kick_cpus:
+    /* write start paddr to v2m sysreg FLAGSSET register */
+	ldr   r0, =0x1c010000 /* base V2M sysreg MMIO address */
+	add   r1, r0, #0x34   /* SYS_FLAGSCLR register */
+	dsb
+	mov   r2, #0xffffffff
+	str   r2, [r1]
+	dsb
+	add   r1, r0, #0x30   /* SYS_FLAGSSET register */
+	ldr   r2, =start
+	add   r2, r2, r10
+	str   r2, [r1]
+	dsb
+	/* send an interrupt */
+	ldr   r0, =0x2c001000 /* base GICD MMIO address */
+	mov   r1, r0
+	mov   r2, #0x1        /* GICD_CTLR */
+	str   r2, [r1]        /* enable distributor */
+	add   r1, r0, #0xf00  /* GICD_SGIR */
+	mov   r2, #0xfe0000
+	str   r2, [r1]        /* send IPI to everybody */
+	dsb
+	mov   r1, r0
+	mov   r2, #0x0        /* GICD_CTLR */
+	str   r2, [r1]        /* disable distributor */
+	mov   pc, lr
+
+
 /* Get up a CPU into Hyp mode.  Clobbers r0-r3.
  *
  * Expects r12 == CPU number
-- 
1.7.2.5

Stefano Stabellini

2012-Oct-24 15:03 UTC

head link

[PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

- invalidate tlb after setting WXN;
- flush D-cache and I-cache after relocation;
- flush D-cache after writing to smp_up_cpu;
- flush TLB before changing HTTBR;
- flush I-cache after changing HTTBR;
- flush I-cache and branch predictor after writing Xen text ptes.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 xen/arch/arm/head.S        |    9 +++++++++
 xen/arch/arm/mm.c          |   14 +++++++++++++-
 xen/arch/arm/smpboot.c     |    2 ++
 xen/include/asm-arm/page.h |   14 ++++++++++++++
 4 files changed, 38 insertions(+), 1 deletions(-)

diff --git a/xen/arch/arm/head.S b/xen/arch/arm/head.S
index 39c4774..4c420ac 100644
--- a/xen/arch/arm/head.S
+++ b/xen/arch/arm/head.S
@@ -274,8 +274,15 @@ paging:
 	ldr   r4, =boot_httbr        /* VA of HTTBR value stashed by CPU 0 */
 	add   r4, r4, r10            /* PA of it */
 	ldrd  r4, r5, [r4]           /* Actual value */
+	dsb
+	mcr   CP32(r0, TLBIALLH)     /* Flush hypervisor TLB */
+	dsb
+	isb
 	mcrr  CP64(r4, r5, HTTBR)
+	dsb
+	isb
 	mcr   CP32(r0, TLBIALLH)     /* Flush hypervisor TLB */
+	mcr   CP32(r0, ICIALLU)      /* Flush I-cache */
 	mcr   CP32(r0, BPIALL)       /* Flush branch predictor */
 	dsb                          /* Ensure completion of TLB+BP flush */
 	isb
@@ -288,6 +295,8 @@ paging:
 	teq   r2, #0
 	bne   1b
 	dsb
+	mcr   CP32(r0, DCCMVAC)      /* flush D-Cache */
+	isb
 
 	/* Here, the non-boot CPUs must wait again -- they''re now running on
 	 * the boot CPU''s pagetables so it''s safe for the boot CPU
to
diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index d0cd2c9..37e49c8 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -211,6 +211,7 @@ void __init setup_pagetables(unsigned long boot_phys_offset,
paddr_t xen_paddr)
     unsigned long dest_va;
     lpae_t pte, *p;
     int i;
+    unsigned long cacheline_size = READ_CP32(CCSIDR);
 
     /* Map the destination in the boot misc area. */
     dest_va = BOOT_MISC_VIRT_START;
@@ -244,10 +245,18 @@ void __init setup_pagetables(unsigned long
boot_phys_offset, paddr_t xen_paddr)
 
     /* Change pagetables to the copy in the relocated Xen */
     boot_httbr = (unsigned long) xen_pgtable + phys_offset;
+    flush_xen_dcache_va((unsigned long)&boot_httbr);
+    for ( i = 0; i < _end - _start; i += cacheline_size )
+        flush_xen_dcache_va(dest_va + i);
+    flush_xen_text_tlb();
+
     asm volatile (
+        "dsb;"                        /* Ensure visibility of HTTBR
update */
         STORE_CP64(0, HTTBR)          /* Change translation base */
         "dsb;"                        /* Ensure visibility of HTTBR
update */
+        "isb;"
         STORE_CP32(0, TLBIALLH)       /* Flush hypervisor TLB */
+        STORE_CP32(0, ICIALLU)        /* Flush I-cache */
         STORE_CP32(0, BPIALL)         /* Flush branch predictor */
         "dsb;"                        /* Ensure completion of TLB+BP
flush */
         "isb;"
@@ -292,10 +301,11 @@ void __init setup_pagetables(unsigned long
boot_phys_offset, paddr_t xen_paddr)
     pte.pt.table = 1;
     write_pte(xen_second + second_linear_offset(XEN_VIRT_START), pte);
     /* Have changed a mapping used for .text. Flush everything for safety. */
-    flush_xen_text_tlb();
 
     /* From now on, no mapping may be both writable and executable. */
     WRITE_CP32(READ_CP32(HSCTLR) | SCTLR_WXN, HSCTLR);
+    isb();
+    flush_xen_text_tlb();
 }
 
 /* MMU setup for secondary CPUS (which already have paging enabled) */
@@ -303,6 +313,8 @@ void __cpuinit mmu_init_secondary_cpu(void)
 {
     /* From now on, no mapping may be both writable and executable. */
     WRITE_CP32(READ_CP32(HSCTLR) | SCTLR_WXN, HSCTLR);
+    isb();
+    flush_xen_text_tlb();
 }
 
 /* Create Xen''s mappings of memory.
diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c
index c0750c0..767e553 100644
--- a/xen/arch/arm/smpboot.c
+++ b/xen/arch/arm/smpboot.c
@@ -105,6 +105,7 @@ make_cpus_ready(unsigned int max_cpus, unsigned long
boot_phys_offset)
         /* Tell the next CPU to get ready */
         /* TODO: handle boards where CPUIDs are not contiguous */
         *gate = i;
+        flush_xen_dcache_va((unsigned long)gate);
         asm volatile("dsb; isb; sev");
         /* And wait for it to respond */
         while ( ready_cpus < i )
@@ -201,6 +202,7 @@ int __cpu_up(unsigned int cpu)
     /* Unblock the CPU.  It should be waiting in the loop in head.S
      * for an event to arrive when smp_up_cpu matches its cpuid. */
     smp_up_cpu = cpu;
+    flush_xen_dcache_va((unsigned long)&smp_up_cpu);
     asm volatile("dsb; isb; sev");
 
     while ( !cpu_online(cpu) )
diff --git a/xen/include/asm-arm/page.h b/xen/include/asm-arm/page.h
index 9511c45..7d70d8c 100644
--- a/xen/include/asm-arm/page.h
+++ b/xen/include/asm-arm/page.h
@@ -232,13 +232,26 @@ static inline lpae_t mfn_to_p2m_entry(unsigned long mfn,
unsigned int mattr)
 static inline void write_pte(lpae_t *p, lpae_t pte)
 {
     asm volatile (
+        "dsb;"
         /* Safely write the entry (STRD is atomic on CPUs that support LPAE) */
         "strd %0, %H0, [%1];"
+        "dsb;"
         /* Push this cacheline to the PoC so the rest of the system sees it. */
         STORE_CP32(1, DCCMVAC)
+        "isb;"
         : : "r" (pte.bits), "r" (p) : "memory");
 }
 
+static inline void flush_xen_dcache_va(unsigned long va)
+{
+    register unsigned long r0 asm ("r0") = va;
+    asm volatile (
+        "dsb;"
+        STORE_CP32(0, DCCMVAC)
+        "isb;"
+        : : "r" (r0) : "memory");
+}
+
 /*
  * Flush all hypervisor mappings from the TLB and branch predictor.
  * This is needed after changing Xen code mappings. 
@@ -249,6 +262,7 @@ static inline void flush_xen_text_tlb(void)
     asm volatile (
         "dsb;"                        /* Ensure visibility of PTE
writes */
         STORE_CP32(0, TLBIALLH)       /* Flush hypervisor TLB */
+        STORE_CP32(0, ICIALLU)        /* Flush I-cache */
         STORE_CP32(0, BPIALL)         /* Flush branch predictor */
         "dsb;"                        /* Ensure completion of TLB+BP
flush */
         "isb;"
-- 
1.7.2.5

Stefano Stabellini

2012-Oct-24 15:03 UTC

head link

[PATCH 7/7] xen/arm: get the number of cpus from device tree

The system might have fewer cpus than the GIC supports.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 xen/arch/arm/gic.c            |    4 +---
 xen/arch/arm/gic.h            |    2 +-
 xen/arch/arm/setup.c          |    3 ++-
 xen/common/device_tree.c      |   20 ++++++++++++++++++++
 xen/include/xen/device_tree.h |    1 +
 5 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index 5f06e08..0c6fab9 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -304,7 +304,7 @@ static void __cpuinit gic_hyp_disable(void)
 }
 
 /* Set up the GIC */
-int __init gic_init(void)
+void __init gic_init(void)
 {
     /* XXX FIXME get this from devicetree */
     gic.dbase = GIC_BASE_ADDRESS + GIC_DR_OFFSET;
@@ -328,8 +328,6 @@ int __init gic_init(void)
     gic.lr_mask = 0ULL;
 
     spin_unlock(&gic.lock);
-
-    return gic.cpus;
 }
 
 /* Set up the per-CPU parts of the GIC for a secondary CPU */
diff --git a/xen/arch/arm/gic.h b/xen/arch/arm/gic.h
index b8f9f201..beeaa46 100644
--- a/xen/arch/arm/gic.h
+++ b/xen/arch/arm/gic.h
@@ -146,7 +146,7 @@ extern int gic_route_irq_to_guest(struct domain *d, unsigned
int irq,
 /* Accept an interrupt from the GIC and dispatch its handler */
 extern void gic_interrupt(struct cpu_user_regs *regs, int is_fiq);
 /* Bring up the interrupt controller, and report # cpus attached */
-extern int gic_init(void);
+extern void gic_init(void);
 /* Bring up a secondary CPU''s per-CPU GIC interface */
 extern void gic_init_secondary_cpu(void);
 /* Take down a CPU''s per-CPU GIC interface */
diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
index 6fbcb81..26df902 100644
--- a/xen/arch/arm/setup.c
+++ b/xen/arch/arm/setup.c
@@ -189,6 +189,7 @@ void __init start_xen(unsigned long boot_phys_offset,
         + (atag_paddr & ((1 << SECOND_SHIFT) - 1));
     fdt_size = device_tree_early_init(fdt);
 
+    cpus = device_tree_cpus(fdt);
     cmdline_parse(device_tree_bootargs(fdt));
 
     setup_pagetables(boot_phys_offset, get_xen_paddr());
@@ -199,7 +200,7 @@ void __init start_xen(unsigned long boot_phys_offset,
     console_init_preirq();
 #endif
 
-    cpus = gic_init();
+    gic_init();
     make_cpus_ready(cpus, boot_phys_offset);
 
     percpu_init_areas();
diff --git a/xen/common/device_tree.c b/xen/common/device_tree.c
index 3d1f0f4..3ae0f4d 100644
--- a/xen/common/device_tree.c
+++ b/xen/common/device_tree.c
@@ -153,6 +153,26 @@ const char *device_tree_bootargs(const void *fdt)
     return prop->data;
 }
 
+int device_tree_cpus(const void *fdt)
+{
+    int node, depth;
+    int cpus = 0;
+
+    node = fdt_path_offset(fdt, "/cpus");
+    if ( node < 0 )
+        return -1;
+    do {
+        node = fdt_next_node(fdt, node, &depth);
+        if ( node < 0 )
+            break;
+        if ( strncmp(fdt_get_name(fdt, node, NULL), "cpu", 3) )
+            break;
+        cpus++;
+    } while ( depth > 0 );
+
+    return cpus;
+}
+
 static int dump_node(const void *fdt, int node, const char *name, int depth,
                      u32 address_cells, u32 size_cells, void *data)
 {
diff --git a/xen/include/xen/device_tree.h b/xen/include/xen/device_tree.h
index 4d010c0..46bc0f8 100644
--- a/xen/include/xen/device_tree.h
+++ b/xen/include/xen/device_tree.h
@@ -49,6 +49,7 @@ bool_t device_tree_node_matches(const void *fdt, int node,
const char *match);
 int device_tree_for_each_node(const void *fdt,
                               device_tree_node_func func, void *data);
 const char *device_tree_bootargs(const void *fdt);
+int device_tree_cpus(const void *fdt);
 void device_tree_dump(const void *fdt);
 
 #endif
-- 
1.7.2.5

Tim Deegan

2012-Oct-24 15:27 UTC

head link

Re: [PATCH 2/7] xen/arm: setup the fixmap in head.S

At 16:03 +0100 on 24 Oct (1351094622), Stefano Stabellini
wrote:> Setup the fixmap mapping directly in head.S rather than having a
> temporary mapping only to re-do it later in C.
> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
I''m generally against doing anything in asm that could be in C, as
you know, but this actually looks OK.  Nits below.
> @@ -183,6 +184,16 @@ skip_bss:
>  	teq   r12, #0
>  	bne   pt_ready
>  	
> +	/* console fixmap */
> +	ldr   r1, =xen_fixmap
> +	add   r1, r1, r10            /* r1 := paddr (xen_fixmap) */
> +	mov   r3, #0
> +	lsr   r2, r11, #12
> +	lsl   r2, r2, #12            /* 4K aligned paddr of UART */
> +	orr   r2, r2, #PT_UPPER(DEV_L3)
> +	orr   r2, r2, #PT_LOWER(DEV_L3) /* r2:r3 := 4K dev map including UART */
> +	strd  r2, r3, [r1, #0]       /* Map it in the first fixmap''s
slot */
Can you use #(FIXMAP_CONSOLE*8) here rather than hard-coding the 0?
> +	
>  	/* Build the baseline idle pagetable''s first-level entries */
>  	ldr   r1, =xen_second
>  	add   r1, r1, r10            /* r1 := paddr (xen_second) */
> @@ -205,12 +216,13 @@ skip_bss:
>  	ldr   r4, =start
>  	lsr   r4, #18                /* Slot for vaddr(start) */
>  	strd  r2, r3, [r1, r4]       /* Map Xen there too */
> +
>  #ifdef EARLY_UART_ADDRESS
> -	ldr   r3, =(1<<(54-32))      /* NS for device mapping */
> -	lsr   r2, r11, #21
> -	lsl   r2, r2, #21            /* 2MB-aligned paddr of UART */
> -	orr   r2, r2, #PT_UPPER(DEV)
> -	orr   r2, r2, #PT_LOWER(DEV) /* r2:r3 := 2MB dev map including UART */
> +	/* xen_fixmap pagetable */
> +	ldr r2, =xen_fixmap
> +	add r2, r2, r10
Please keep the operands aligned with the code above and below, and maybe
add a comment that r2 := paddr (xen_fixmap).
> +	orr   r2, r2, #PT_UPPER(PT)
> +	orr   r2, r2, #PT_LOWER(PT) /* r2:r3 := paddr (xen_fixmap) */
r2:r3 is a pagetable mapping here, not a paddr.  Also the comment''s not
aligned with the ones above and below.
>  	add   r4, r4, #8
>  	strd  r2, r3, [r1, r4]       /* Map it in the fixmap''s slot */
>  #else
> @@ -236,13 +248,9 @@ pt_ready:
>  	mov   pc, r1                 /* Get a proper vaddr into PC */
>  paging:
>  
> +	
>  #ifdef EARLY_UART_ADDRESS
> -	/* Recover the UART address in the new address space. */
> -	lsl   r11, #11
> -	lsr   r11, #11               /* UART base''s offset from 2MB base
*/
> -	adr   r0, start
> -	add   r0, r0, #0x200000      /* vaddr of the fixmap''s 2MB slot
*/
> -	add   r11, r11, r0           /* r11 := vaddr (UART base address) */
> +	ldr r11, =FIXMAP_ADDR(FIXMAP_CONSOLE)
Please leave a comment in to say what this is doing.  Also, the
alignment is wrong again.  Sorry to be picky about this but I want to
keep head.S readable.

Cheers,

Tim.

Stefano Stabellini

2012-Oct-24 15:37 UTC

head link

Re: [PATCH 2/7] xen/arm: setup the fixmap in head.S

On Wed, 24 Oct 2012, Tim Deegan wrote:> At 16:03 +0100 on 24 Oct (1351094622), Stefano Stabellini wrote:
> > Setup the fixmap mapping directly in head.S rather than having a
> > temporary mapping only to re-do it later in C.
> > 
> > Signed-off-by: Stefano Stabellini
<stefano.stabellini@eu.citrix.com>
> 
> I''m generally against doing anything in asm that could be in C, as
> you know, but this actually looks OK.  Nits below.
Thanks for the quick review!

> > @@ -183,6 +184,16 @@ skip_bss:
> >  	teq   r12, #0
> >  	bne   pt_ready
> >  	
> > +	/* console fixmap */
> > +	ldr   r1, =xen_fixmap
> > +	add   r1, r1, r10            /* r1 := paddr (xen_fixmap) */
> > +	mov   r3, #0
> > +	lsr   r2, r11, #12
> > +	lsl   r2, r2, #12            /* 4K aligned paddr of UART */
> > +	orr   r2, r2, #PT_UPPER(DEV_L3)
> > +	orr   r2, r2, #PT_LOWER(DEV_L3) /* r2:r3 := 4K dev map including
UART */
> > +	strd  r2, r3, [r1, #0]       /* Map it in the first
fixmap''s slot */
> 
> Can you use #(FIXMAP_CONSOLE*8) here rather than hard-coding the 0?
Yep

> > +	
> >  	/* Build the baseline idle pagetable''s first-level entries
*/
> >  	ldr   r1, =xen_second
> >  	add   r1, r1, r10            /* r1 := paddr (xen_second) */
> > @@ -205,12 +216,13 @@ skip_bss:
> >  	ldr   r4, =start
> >  	lsr   r4, #18                /* Slot for vaddr(start) */
> >  	strd  r2, r3, [r1, r4]       /* Map Xen there too */
> > +
> >  #ifdef EARLY_UART_ADDRESS
> > -	ldr   r3, =(1<<(54-32))      /* NS for device mapping */
> > -	lsr   r2, r11, #21
> > -	lsl   r2, r2, #21            /* 2MB-aligned paddr of UART */
> > -	orr   r2, r2, #PT_UPPER(DEV)
> > -	orr   r2, r2, #PT_LOWER(DEV) /* r2:r3 := 2MB dev map including UART
*/
> > +	/* xen_fixmap pagetable */
> > +	ldr r2, =xen_fixmap
> > +	add r2, r2, r10
> 
> Please keep the operands aligned with the code above and below, and maybe
> add a comment that r2 := paddr (xen_fixmap).
good point

> > +	orr   r2, r2, #PT_UPPER(PT)
> > +	orr   r2, r2, #PT_LOWER(PT) /* r2:r3 := paddr (xen_fixmap) */
> 
> r2:r3 is a pagetable mapping here, not a paddr.  Also the
comment''s not
> aligned with the ones above and below.
I''ll fix

> >  	add   r4, r4, #8
> >  	strd  r2, r3, [r1, r4]       /* Map it in the fixmap''s slot
*/
> >  #else
> > @@ -236,13 +248,9 @@ pt_ready:
> >  	mov   pc, r1                 /* Get a proper vaddr into PC */
> >  paging:
> >  
> > +	
> >  #ifdef EARLY_UART_ADDRESS
> > -	/* Recover the UART address in the new address space. */
> > -	lsl   r11, #11
> > -	lsr   r11, #11               /* UART base''s offset from 2MB
base */
> > -	adr   r0, start
> > -	add   r0, r0, #0x200000      /* vaddr of the fixmap''s 2MB
slot */
> > -	add   r11, r11, r0           /* r11 := vaddr (UART base address) */
> > +	ldr r11, =FIXMAP_ADDR(FIXMAP_CONSOLE)
> 
> Please leave a comment in to say what this is doing.  Also, the
> alignment is wrong again.  Sorry to be picky about this but I want to
> keep head.S readable.
You have my complete support on code style issues and comments in
assembly code!

Tim Deegan

2012-Oct-24 15:38 UTC

head link

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

At 16:03 +0100 on 24 Oct (1351094625), Stefano Stabellini
wrote:> Secondary cpus are held by the firmware until we send an IPI to them.
> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> ---
>  xen/arch/arm/head.S        |    8 ++++++--
>  xen/arch/arm/mode_switch.S |   31 +++++++++++++++++++++++++++++++
>  2 files changed, 37 insertions(+), 2 deletions(-)
> 
> diff --git a/xen/arch/arm/head.S b/xen/arch/arm/head.S
> index c784f4d..39c4774 100644
> --- a/xen/arch/arm/head.S
> +++ b/xen/arch/arm/head.S
> @@ -79,12 +79,12 @@ start:
>  	beq   boot_cpu               /* If we''re CPU 0, boot now */
>  
>  	/* Non-boot CPUs wait here to be woken up one at a time. */
> -1:	wfe
> -	dsb
> +1:	dsb
>  	ldr   r0, =smp_up_cpu        /* VA of gate */
>  	add   r0, r0, r10            /* PA of gate */
>  	ldr   r1, [r0]               /* Which CPU is being booted? */
>  	teq   r1, r12                /* Is it us? */
> +	wfene
Shouldn''t that be wf_i_ne?
>  	bne   1b
>  
>  boot_cpu:
> @@ -98,6 +98,10 @@ boot_cpu:
>  	PRINT(" booting -\r\n")
>  #endif
>  
> +	/* Wake up secondary cpus */
> +	teq   r12, #0
> +	bleq  kick_cpus
> +
>  	/* Check that this CPU has Hyp mode */
>  	mrc   CP32(r0, ID_PFR1)
>  	and   r0, r0, #0xf000        /* Bits 12-15 define virt extensions */
> diff --git a/xen/arch/arm/mode_switch.S b/xen/arch/arm/mode_switch.S
> index 83a682b..39d80e8 100644
> --- a/xen/arch/arm/mode_switch.S
> +++ b/xen/arch/arm/mode_switch.S
> @@ -21,6 +21,37 @@
>  #include <asm/page.h>
>  #include <asm/asm_defns.h>
>  
> +/* XXX: Versatile Express specific code */
> +/* wake up secondary cpus */
> +.globl kick_cpus
> +kick_cpus:
> +    /* write start paddr to v2m sysreg FLAGSSET register */
> +	ldr   r0, =0x1c010000 /* base V2M sysreg MMIO address */
> +	add   r1, r0, #0x34   /* SYS_FLAGSCLR register */
> +	dsb
> +	mov   r2, #0xffffffff
> +	str   r2, [r1]
Why not str r2, [r0, #0x34] and save the explicit add (and similarly below)?
> +	dsb
> +	add   r1, r0, #0x30   /* SYS_FLAGSSET register */
> +	ldr   r2, =start
> +	add   r2, r2, r10
> +	str   r2, [r1]
> +	dsb
> +	/* send an interrupt */
> +	ldr   r0, =0x2c001000 /* base GICD MMIO address */
> +	mov   r1, r0
> +	mov   r2, #0x1        /* GICD_CTLR */
> +	str   r2, [r1]        /* enable distributor */
> +	add   r1, r0, #0xf00  /* GICD_SGIR */
These GIGD_* addresses and constants are already defined in config.h and
gic.h; can you reuse those definitions?

Tim.
> +	mov   r2, #0xfe0000
> +	str   r2, [r1]        /* send IPI to everybody */
> +	dsb
> +	mov   r1, r0
> +	mov   r2, #0x0        /* GICD_CTLR */
> +	str   r2, [r1]        /* disable distributor */
> +	mov   pc, lr
> +
> +
>  /* Get up a CPU into Hyp mode.  Clobbers r0-r3.
>   *
>   * Expects r12 == CPU number
> -- 
> 1.7.2.5
>

Tim Deegan

2012-Oct-24 15:59 UTC

head link

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

At 16:03 +0100 on 24 Oct (1351094626), Stefano Stabellini
wrote:> - invalidate tlb after setting WXN;
> - flush D-cache and I-cache after relocation;
> - flush D-cache after writing to smp_up_cpu;
> - flush TLB before changing HTTBR;
> - flush I-cache after changing HTTBR;
> - flush I-cache and branch predictor after writing Xen text ptes.
> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> @@ -244,10 +245,18 @@ void __init setup_pagetables(unsigned long
boot_phys_offset, paddr_t xen_paddr)
>  
>      /* Change pagetables to the copy in the relocated Xen */
>      boot_httbr = (unsigned long) xen_pgtable + phys_offset;
> +    flush_xen_dcache_va((unsigned long)&boot_httbr);
> +    for ( i = 0; i < _end - _start; i += cacheline_size )
> +        flush_xen_dcache_va(dest_va + i);
> +    flush_xen_text_tlb();
> +
>      asm volatile (
> +        "dsb;"                        /* Ensure visibility of
HTTBR update */
That comment should be changed -- this dsb is to make sure all the PT
changes are completed, right?
>          STORE_CP64(0, HTTBR)          /* Change translation base */
>          "dsb;"                        /* Ensure visibility of
HTTBR update */
> +        "isb;"
>          STORE_CP32(0, TLBIALLH)       /* Flush hypervisor TLB */
> +        STORE_CP32(0, ICIALLU)        /* Flush I-cache */
>          STORE_CP32(0, BPIALL)         /* Flush branch predictor */
>          "dsb;"                        /* Ensure completion of
TLB+BP flush */
>          "isb;"
> diff --git a/xen/include/asm-arm/page.h b/xen/include/asm-arm/page.h
> index 9511c45..7d70d8c 100644
> --- a/xen/include/asm-arm/page.h
> +++ b/xen/include/asm-arm/page.h
> @@ -232,13 +232,26 @@ static inline lpae_t mfn_to_p2m_entry(unsigned long
mfn, unsigned int mattr)
>  static inline void write_pte(lpae_t *p, lpae_t pte)
>  {
>      asm volatile (
> +        "dsb;"
I guess this is to make sure all writes that used the old mapping have
completed?  Can you add a comment?
>          /* Safely write the entry (STRD is atomic on CPUs that support
LPAE) */
>          "strd %0, %H0, [%1];"
> +        "dsb;"
>          /* Push this cacheline to the PoC so the rest of the system sees
it. */
>          STORE_CP32(1, DCCMVAC)
> +        "isb;"
This is for code modifications, right?  I think we can drop it, and have
all the paths that modify text mappings do explicit isb()s there -- the
vast majority of PTE writes don''t need it. 
>          : : "r" (pte.bits), "r" (p) :
"memory");
>  }
>  
> +static inline void flush_xen_dcache_va(unsigned long va)
Three of the four users of this function cast their arguments from
pointer types - maybe it should take a void * instead?.
> +{
> +    register unsigned long r0 asm ("r0") = va;
I don''t think this is necessary - why not just pass va directly to the
inline asm?  We don''t care what register it''s in (and if we
did I''m not
convinced this would guarantee it was r0).
> +    asm volatile (
> +        "dsb;"
> +        STORE_CP32(0, DCCMVAC)
> +        "isb;"
> +        : : "r" (r0) : "memory");
Does this need a ''memory'' clobber?  Can we get away with just
saying it
consumes *va as an input?  All we need to be sure of is that the
particular thing we''re flushing has been written out; no need to stop
any other optimizations.

I guess it might need to be re-cast as a macro so the compiler knows how
big *va is?

Tim.
> +}
> +
>  /*
>   * Flush all hypervisor mappings from the TLB and branch predictor.
>   * This is needed after changing Xen code mappings. 
> @@ -249,6 +262,7 @@ static inline void flush_xen_text_tlb(void)
>      asm volatile (
>          "dsb;"                        /* Ensure visibility of
PTE writes */
>          STORE_CP32(0, TLBIALLH)       /* Flush hypervisor TLB */
> +        STORE_CP32(0, ICIALLU)        /* Flush I-cache */
>          STORE_CP32(0, BPIALL)         /* Flush branch predictor */
>          "dsb;"                        /* Ensure completion of
TLB+BP flush */
>          "isb;"
> -- 
> 1.7.2.5
>

Stefano Stabellini

2012-Oct-24 15:59 UTC

head link

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

On Wed, 24 Oct 2012, Tim Deegan wrote:> At 16:03 +0100 on 24 Oct (1351094625), Stefano Stabellini wrote:
> > Secondary cpus are held by the firmware until we send an IPI to them.
> > 
> > Signed-off-by: Stefano Stabellini
<stefano.stabellini@eu.citrix.com>
> > ---
> >  xen/arch/arm/head.S        |    8 ++++++--
> >  xen/arch/arm/mode_switch.S |   31 +++++++++++++++++++++++++++++++
> >  2 files changed, 37 insertions(+), 2 deletions(-)
> > 
> > diff --git a/xen/arch/arm/head.S b/xen/arch/arm/head.S
> > index c784f4d..39c4774 100644
> > --- a/xen/arch/arm/head.S
> > +++ b/xen/arch/arm/head.S
> > @@ -79,12 +79,12 @@ start:
> >  	beq   boot_cpu               /* If we''re CPU 0, boot now */
> >  
> >  	/* Non-boot CPUs wait here to be woken up one at a time. */
> > -1:	wfe
> > -	dsb
> > +1:	dsb
> >  	ldr   r0, =smp_up_cpu        /* VA of gate */
> >  	add   r0, r0, r10            /* PA of gate */
> >  	ldr   r1, [r0]               /* Which CPU is being booted? */
> >  	teq   r1, r12                /* Is it us? */
> > +	wfene
> 
> Shouldn''t that be wf_i_ne?
Nope. We need wfe here, in fact the corresponding code is
xen/arch/arm/smpboot.c:make_cpus_ready that uses sev to wake the other
cpus up. The code running wfi on secondary cpus is actually in the
firmware.

> >  	bne   1b
> >  
> >  boot_cpu:
> > @@ -98,6 +98,10 @@ boot_cpu:
> >  	PRINT(" booting -\r\n")
> >  #endif
> >  
> > +	/* Wake up secondary cpus */
> > +	teq   r12, #0
> > +	bleq  kick_cpus
> > +
> >  	/* Check that this CPU has Hyp mode */
> >  	mrc   CP32(r0, ID_PFR1)
> >  	and   r0, r0, #0xf000        /* Bits 12-15 define virt extensions */
> > diff --git a/xen/arch/arm/mode_switch.S b/xen/arch/arm/mode_switch.S
> > index 83a682b..39d80e8 100644
> > --- a/xen/arch/arm/mode_switch.S
> > +++ b/xen/arch/arm/mode_switch.S
> > @@ -21,6 +21,37 @@
> >  #include <asm/page.h>
> >  #include <asm/asm_defns.h>
> >  
> > +/* XXX: Versatile Express specific code */
> > +/* wake up secondary cpus */
> > +.globl kick_cpus
> > +kick_cpus:
> > +    /* write start paddr to v2m sysreg FLAGSSET register */
> > +	ldr   r0, =0x1c010000 /* base V2M sysreg MMIO address */
> > +	add   r1, r0, #0x34   /* SYS_FLAGSCLR register */
> > +	dsb
> > +	mov   r2, #0xffffffff
> > +	str   r2, [r1]
> 
> Why not str r2, [r0, #0x34] and save the explicit add (and similarly
below)?
Good idea

> > +	dsb
> > +	add   r1, r0, #0x30   /* SYS_FLAGSSET register */
> > +	ldr   r2, =start
> > +	add   r2, r2, r10
> > +	str   r2, [r1]
> > +	dsb
> > +	/* send an interrupt */
> > +	ldr   r0, =0x2c001000 /* base GICD MMIO address */
> > +	mov   r1, r0
> > +	mov   r2, #0x1        /* GICD_CTLR */
> > +	str   r2, [r1]        /* enable distributor */
> > +	add   r1, r0, #0xf00  /* GICD_SGIR */
> 
> These GIGD_* addresses and constants are already defined in config.h and
> gic.h; can you reuse those definitions?
Yep

Tim Deegan

2012-Oct-24 16:02 UTC

head link

Re: [PATCH 0/7] xen/arm: run on real hardware

At 16:03 +0100 on 24 Oct (1351094597), Stefano Stabellini
wrote:> Hi all,
> this is a collection of fixes that I wrote trying to run Xen on a real
> Versatile Express Cortex A15 machine.
Congratulations on getting it running on real hardware!  That''s great
progress.

I''ve replied to 2, 5 and 6.  For 1, 3, 4 and 7,
Acked-by: Tim Deegan <tim@xen.org>

Cheers,

Tim.

Tim Deegan

2012-Oct-24 16:05 UTC

head link

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

At 16:59 +0100 on 24 Oct (1351097986), Stefano Stabellini
wrote:> On Wed, 24 Oct 2012, Tim Deegan wrote:
> > At 16:03 +0100 on 24 Oct (1351094625), Stefano Stabellini wrote:
> > > Secondary cpus are held by the firmware until we send an IPI to
them.
> > > 
> > > Signed-off-by: Stefano Stabellini
<stefano.stabellini@eu.citrix.com>
> > > ---
> > >  xen/arch/arm/head.S        |    8 ++++++--
> > >  xen/arch/arm/mode_switch.S |   31
+++++++++++++++++++++++++++++++
> > >  2 files changed, 37 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/xen/arch/arm/head.S b/xen/arch/arm/head.S
> > > index c784f4d..39c4774 100644
> > > --- a/xen/arch/arm/head.S
> > > +++ b/xen/arch/arm/head.S
> > > @@ -79,12 +79,12 @@ start:
> > >  	beq   boot_cpu               /* If we''re CPU 0, boot
now */
> > >  
> > >  	/* Non-boot CPUs wait here to be woken up one at a time. */
> > > -1:	wfe
> > > -	dsb
> > > +1:	dsb
> > >  	ldr   r0, =smp_up_cpu        /* VA of gate */
> > >  	add   r0, r0, r10            /* PA of gate */
> > >  	ldr   r1, [r0]               /* Which CPU is being booted? */
> > >  	teq   r1, r12                /* Is it us? */
> > > +	wfene
> > 
> > Shouldn''t that be wf_i_ne?
> 
> Nope. We need wfe here, in fact the corresponding code is
> xen/arch/arm/smpboot.c:make_cpus_ready that uses sev to wake the other
> cpus up. The code running wfi on secondary cpus is actually in the
> firmware.
Ah, I see - thanks.

Tim.

Ian Campbell

2012-Oct-24 16:05 UTC

head link

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

On Wed, 2012-10-24 at 16:59 +0100, Tim Deegan wrote:> > +    asm volatile (
> > +        "dsb;"
> > +        STORE_CP32(0, DCCMVAC)
> > +        "isb;"
> > +        : : "r" (r0) : "memory");
> 
> Does this need a ''memory'' clobber?  Can we get away with
just saying it
> consumes *va as an input?  All we need to be sure of is that the
> particular thing we''re flushing has been written out; no need to
stop
> any other optimizations.
> 
> I guess it might need to be re-cast as a macro so the compiler knows how
> big *va is?
Does it matter that this flushes the cache line containing va, which
might be larger than whatever type va is?

Tim Deegan

2012-Oct-24 16:17 UTC

head link

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

At 17:05 +0100 on 24 Oct (1351098349), Ian Campbell
wrote:> On Wed, 2012-10-24 at 16:59 +0100, Tim Deegan wrote:
> > > +    asm volatile (
> > > +        "dsb;"
> > > +        STORE_CP32(0, DCCMVAC)
> > > +        "isb;"
> > > +        : : "r" (r0) : "memory");
> > 
> > Does this need a ''memory'' clobber?  Can we get away
with just saying it
> > consumes *va as an input?  All we need to be sure of is that the
> > particular thing we''re flushing has been written out; no need
to stop
> > any other optimizations.
> > 
> > I guess it might need to be re-cast as a macro so the compiler knows
how
> > big *va is?
> 
> Does it matter that this flushes the cache line containing va, which
> might be larger than whatever type va is?
That depends why you''re flushing. :)  From this CPU''s PoV the
flush
should have no effect at all, so the things to worry about are:

 - the write of the thing we''re trying to flush gets moved by the
   compiler so it''s after the flush.   That case is handled by using an
   input memory operand of the right size.
 - something else on the cacheline that''s written after the flush gets
   hoisted and written before.  It seems like anything that''s relying
on
   that (?!) is on very thin ice already and ought to be using (at least)
   explicit memory barriers of its own.

Another interesting case is where we''re flushing a large area by
cachelines (like the new loop after relocation), so effectively we
don''t
know the size of the input operand (a.k.a. 1 cacheline) at compile time.
But there I think a single explicit barrier before the loop isn''t too
much
to ask.  We could break that and the loop out into a separate function.

Tim.

Stefano Stabellini

2012-Oct-24 17:35 UTC

head link

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

On Wed, 24 Oct 2012, Tim Deegan wrote:> At 16:03 +0100 on 24 Oct (1351094626), Stefano Stabellini wrote:
> > - invalidate tlb after setting WXN;
> > - flush D-cache and I-cache after relocation;
> > - flush D-cache after writing to smp_up_cpu;
> > - flush TLB before changing HTTBR;
> > - flush I-cache after changing HTTBR;
> > - flush I-cache and branch predictor after writing Xen text ptes.
> > 
> > Signed-off-by: Stefano Stabellini
<stefano.stabellini@eu.citrix.com>
> 
> > @@ -244,10 +245,18 @@ void __init setup_pagetables(unsigned long
boot_phys_offset, paddr_t xen_paddr)
> >  
> >      /* Change pagetables to the copy in the relocated Xen */
> >      boot_httbr = (unsigned long) xen_pgtable + phys_offset;
> > +    flush_xen_dcache_va((unsigned long)&boot_httbr);
> > +    for ( i = 0; i < _end - _start; i += cacheline_size )
> > +        flush_xen_dcache_va(dest_va + i);
> > +    flush_xen_text_tlb();
> > +
> >      asm volatile (
> > +        "dsb;"                        /* Ensure visibility
of HTTBR update */
> 
> That comment should be changed -- this dsb is to make sure all the PT
> changes are completed, right?
The comment is certainly wrong. I actually think that this dsb is
not necessary, thanks to flush_xen_text_tlb.

> >          STORE_CP64(0, HTTBR)          /* Change translation base */
> >          "dsb;"                        /* Ensure visibility
of HTTBR update */
> > +        "isb;"
> >          STORE_CP32(0, TLBIALLH)       /* Flush hypervisor TLB */
> > +        STORE_CP32(0, ICIALLU)        /* Flush I-cache */
> >          STORE_CP32(0, BPIALL)         /* Flush branch predictor */
> >          "dsb;"                        /* Ensure completion
of TLB+BP flush */
> >          "isb;"
> 
> > diff --git a/xen/include/asm-arm/page.h b/xen/include/asm-arm/page.h
> > index 9511c45..7d70d8c 100644
> > --- a/xen/include/asm-arm/page.h
> > +++ b/xen/include/asm-arm/page.h
> > @@ -232,13 +232,26 @@ static inline lpae_t mfn_to_p2m_entry(unsigned
long mfn, unsigned int mattr)
> >  static inline void write_pte(lpae_t *p, lpae_t pte)
> >  {
> >      asm volatile (
> > +        "dsb;"
> 
> I guess this is to make sure all writes that used the old mapping have
> completed?  Can you add a comment?
Yes, that is exactly the reason. I''ll add a comment.

> >          /* Safely write the entry (STRD is atomic on CPUs that
support LPAE) */
> >          "strd %0, %H0, [%1];"
> > +        "dsb;"
> >          /* Push this cacheline to the PoC so the rest of the system
sees it. */
> >          STORE_CP32(1, DCCMVAC)
> > +        "isb;"
> 
> This is for code modifications, right?  I think we can drop it, and have
> all the paths that modify text mappings do explicit isb()s there -- the
> vast majority of PTE writes don''t need it. 
Yes. Thinking twice about it we should have a dsb there instead, for data
mappings too, otherwise we can''t be sure that the DCCMVAC is completed
before returning.

> >          : : "r" (pte.bits), "r" (p) :
"memory");
> >  }
> >  
> > +static inline void flush_xen_dcache_va(unsigned long va)
> 
> Three of the four users of this function cast their arguments from
> pointer types - maybe it should take a void * instead?.
I can do that

> > +{
> > +    register unsigned long r0 asm ("r0") = va;
> 
> I don''t think this is necessary - why not just pass va directly to
the
> inline asm?  We don''t care what register it''s in (and if
we did I''m not
> convinced this would guarantee it was r0).
> 
> > +    asm volatile (
> > +        "dsb;"
> > +        STORE_CP32(0, DCCMVAC)
> > +        "isb;"
> > +        : : "r" (r0) : "memory");
> 
> Does this need a ''memory'' clobber?  Can we get away with
just saying it
> consumes *va as an input?  All we need to be sure of is that the
> particular thing we''re flushing has been written out; no need to
stop
> any other optimizations.
you are right on both points

> I guess it might need to be re-cast as a macro so the compiler knows how
> big *va is?
I don''t think it is necessary, after all the size of a register has to
be the same of a virtual address

Ian Campbell

2012-Oct-25 09:27 UTC

head link

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

On Wed, 2012-10-24 at 16:03 +0100, Stefano Stabellini
wrote:> Each rank holds 32 irqs, so we should divide by 32 rather than by 4.
I think this is wrong, because idx isn''t used in the way your reasoning
implies, when we use it we assume 8 bits-per-interrupt in the register
not 1.

The accessor function vgix_irq_rank is couched in terms of
GICD_<FOO><n>, which is convenient where we are emulating register
accesses but is very confusing in this one function where we aren''t
thinking in terms of registers!

vgic_irq_rank(v, 8, idx) is saying "find me the rank containing
GICD_<FOO><idx> where FOO has 8 bits per interrupt". Since 4
lots of 8
bits goes into 32 we therefore need to divide by 4. This makes sense if
you consider that FOO here is PRIORITY and that GICD_PRIORITY0 controls
irq 0..3, GICD_PRIORITY1 irq 4..7 etc.

With idx = irq >> 2 you get:

IRQ00: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE = 0
IRQ01: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE = 1
IRQ02: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE = 2
IRQ03: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE = 3
IRQ04: idx = 01 REG_RANK_NR(08, 01) = 0, REG_RANK_INDEX(08, 01) = 1, BYTE = 0
IRQ05: idx = 01 REG_RANK_NR(08, 01) = 0, REG_RANK_INDEX(08, 01) = 1, BYTE = 1
[...]
IRQ30: idx = 07 REG_RANK_NR(08, 07) = 0, REG_RANK_INDEX(08, 07) = 7, BYTE = 2
IRQ31: idx = 07 REG_RANK_NR(08, 07) = 0, REG_RANK_INDEX(08, 07) = 7, BYTE = 3
IRQ32: idx = 08 REG_RANK_NR(08, 08) = 1, REG_RANK_INDEX(08, 08) = 0, BYTE = 0
IRQ33: idx = 08 REG_RANK_NR(08, 08) = 1, REG_RANK_INDEX(08, 08) = 0, BYTE = 1
[...]
IRQ62: idx = 15 REG_RANK_NR(08, 15) = 1, REG_RANK_INDEX(08, 15) = 7, BYTE = 2
IRQ63: idx = 15 REG_RANK_NR(08, 15) = 1, REG_RANK_INDEX(08, 15) = 7, BYTE = 3
IRQ64: idx = 16 REG_RANK_NR(08, 16) = 2, REG_RANK_INDEX(08, 16) = 0, BYTE = 0
IRQ65: idx = 16 REG_RANK_NR(08, 16) = 2, REG_RANK_INDEX(08, 16) = 0, BYTE = 1
[...]

IOW interrupts 0..31 are in rank 0, 32..63 are rank 1 etc, which is correct

With idx = irq / 32 you instead get:

IRQ00: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE = 0
IRQ01: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE = 1
IRQ02: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE = 2
IRQ03: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE = 3
IRQ04: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE = 0
IRQ05: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE = 1
[...]
IRQ30: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE = 2
IRQ31: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE = 3
IRQ32: idx = 01 REG_RANK_NR(08, 01) = 0, REG_RANK_INDEX(08, 01) = 1, BYTE = 0
IRQ33: idx = 01 REG_RANK_NR(08, 01) = 0, REG_RANK_INDEX(08, 01) = 1, BYTE = 1
[...]
IRQ62: idx = 01 REG_RANK_NR(08, 01) = 0, REG_RANK_INDEX(08, 01) = 1, BYTE = 2
IRQ63: idx = 01 REG_RANK_NR(08, 01) = 0, REG_RANK_INDEX(08, 01) = 1, BYTE = 3
IRQ64: idx = 02 REG_RANK_NR(08, 02) = 0, REG_RANK_INDEX(08, 02) = 2, BYTE = 0
IRQ65: idx = 02 REG_RANK_NR(08, 02) = 0, REG_RANK_INDEX(08, 02) = 2, BYTE = 1
[...]
IRQ255: idx = 07 REG_RANK_NR(08, 07) = 0, REG_RANK_INDEX(08, 07) = 7, BYTE = 3
IRQ256: idx = 08 REG_RANK_NR(08, 08) = 1, REG_RANK_INDEX(08, 08) = 0, BYTE = 0

Here allinterrupts up to 255 are in rank 0, which is wrong.

For idx = irq / 32 you would need to use REG_RANK_NR(1, idx) not (8,
idx). (and you''d need to trivially fix REG_RANK_NR to handle b == 1).

Perhaps the way to think of it is not as "/ 32" but rather as
">> 5".
Since REG_RANK_NR(8, ) is effectively >> 3 when combined with the
existing >> 2 we get >> 5 overall.

If you change it as you have done then you are doing >> 5 *and* >>
3,
i.e. >> 8 aka "/ 256" -- which explains why interrupts up to 255
end up
in rank 0 with your change.

Perhaps this could be made clearer by renaming vgic_irq_rank to
vgic_register_rank? Then perhaps as a helper:
#define VGIC_IRQ_RANK(v, irq) vgic_register_rank(v, 1, irq/32) for the
case of looking up a rank from an irq rather than a register offset?

Ian.

Ian Campbell

2012-Oct-25 09:33 UTC

head link

Re: [PATCH 2/7] xen/arm: setup the fixmap in head.S

On Wed, 2012-10-24 at 16:03 +0100, Stefano Stabellini
wrote:> @@ -183,6 +184,16 @@ skip_bss:
>         teq   r12, #0
>         bne   pt_ready
>         
> +       /* console fixmap */
> +       ldr   r1, =xen_fixmap
> +       add   r1, r1, r10            /* r1 := paddr (xen_fixmap) */
> +       mov   r3, #0
> +       lsr   r2, r11, #12
> +       lsl   r2, r2, #12            /* 4K aligned paddr of UART */
> +       orr   r2, r2, #PT_UPPER(DEV_L3)
> +       orr   r2, r2, #PT_LOWER(DEV_L3) /* r2:r3 := 4K dev map including
UART */
> +       strd  r2, r3, [r1, #0]       /* Map it in the first
fixmap''s slot */
> +       
>         /* Build the baseline idle pagetable''s first-level entries
*/
>         ldr   r1, =xen_second
>         add   r1, r1, r10            /* r1 := paddr (xen_second) */
[...]> @@ -205,12 +216,13 @@ skip_bss:
>  	ldr   r4, =start
>  	lsr   r4, #18                /* Slot for vaddr(start) */
>  	strd  r2, r3, [r1, r4]       /* Map Xen there too */
> +
>  #ifdef EARLY_UART_ADDRESS
> -	ldr   r3, =(1<<(54-32))      /* NS for device mapping */
> -	lsr   r2, r11, #21
> -	lsl   r2, r2, #21            /* 2MB-aligned paddr of UART */
> -	orr   r2, r2, #PT_UPPER(DEV)
> -	orr   r2, r2, #PT_LOWER(DEV) /* r2:r3 := 2MB dev map including UART */
> +	/* xen_fixmap pagetable */
> +	ldr r2, =xen_fixmap
> +	add r2, r2, r10
> +	orr   r2, r2, #PT_UPPER(PT)
> +	orr   r2, r2, #PT_LOWER(PT) /* r2:r3 := paddr (xen_fixmap) */
We should setup the fixmap infrastructure even if !EARLY_UART_ADDRESS,
unless we set it up again later?

As it stands it looks like you setup the generic fixmap mapping iff
EARLY_UART_ADDRESS (second hunk above) but setup the UART mapping within
that unconditionally (first hunk) which is exactly backwards, isn''t it
?

Ian.

Ian Campbell

2012-Oct-25 09:37 UTC

head link

Re: [PATCH 3/7] pl011: set baud and clock_hz to the right defaults for Versatile Express

Can we get these (or at least the clock_hz) from DTB?

On Wed, 2012-10-24 at 16:03 +0100, Stefano Stabellini
wrote:> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> ---
>  xen/drivers/char/pl011.c |    4 ++--
>  1 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/xen/drivers/char/pl011.c b/xen/drivers/char/pl011.c
> index 6af45aa..6ccb73a 100644
> --- a/xen/drivers/char/pl011.c
> +++ b/xen/drivers/char/pl011.c
> @@ -241,8 +241,8 @@ void __init pl011_init(int index, unsigned long
register_base_address)
>  
>      uart = &pl011_com[index];
>  
> -    uart->clock_hz  = 7372800;
> -    uart->baud      = 115200;
> +    uart->clock_hz  = 0x16e3600;
> +    uart->baud      = 38400;
>      uart->data_bits = 8;
>      uart->parity    = PARITY_NONE;
>      uart->stop_bits = 1;

Ian Campbell

2012-Oct-25 09:52 UTC

head link

Re: [PATCH 4/7] xen/arm: set the SMP bit in the ACTLR register

On Wed, 2012-10-24 at 16:03 +0100, Stefano Stabellini
wrote:> From the Cortex A15 manual:
> 
> "Enables the processor to receive instruction cache, BTB, and TLB
maintenance
> operations from other processors"
> 
> ...
> 
> "You must set this bit before enabling the caches and MMU, or
> performing any cache and TLB maintenance operations. The only time
> you must clear this bit is during a processor power-down sequence"
Is it considered a bug that the firmware doesn''t do this?
> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> ---
>  xen/arch/arm/head.S             |    6 ++++++
>  xen/include/asm-arm/processor.h |   30 ++++++++++++++++++++++++++++++
>  2 files changed, 36 insertions(+), 0 deletions(-)
> 
> diff --git a/xen/arch/arm/head.S b/xen/arch/arm/head.S
> index 3d01be0..c784f4d 100644
> --- a/xen/arch/arm/head.S
> +++ b/xen/arch/arm/head.S
> @@ -148,6 +148,12 @@ skip_bss:
>  
>  	PRINT("- Setting up control registers -\r\n")
>  	
> +	/* XXX: Cortex A15 specific */
We probably need some sort of early proc info keyed of the processor id
(like Linux) has so we can push this processor specific stuff into the
right places.
> +	/* Set up the SMP bit in ACTLR */
> +	mrc   CP32(r0, ACTLR)
> +	orr   r0, r0, #(ACTLR_SMP) /* enable SMP bit*/
> +	mcr   CP32(r0, ACTLR)
Linux does this IFF it isn''t already set for some reason:
#ifdef CONFIG_SMP
        ALT_SMP(mrc     p15, 0, r0, c1, c0, 1)
        ALT_UP(mov      r0, #(1 << 6))          @ fake it for UP
        tst     r0, #(1 << 6)                   @ SMP/nAMP mode enabled?
        orreq   r0, r0, #(1 << 6)               @ Enable SMP/nAMP mode
        orreq   r0, r0, r10                     @ Enable CPU-specific SMP bits
        mcreq   p15, 0, r0, c1, c0, 1
#endif

Might just relate to the "fake it up for UP" thing I suppose.
> +
>  	/* Set up memory attribute type tables */
>  	ldr   r0, =MAIR0VAL
>  	ldr   r1, =MAIR1VAL
> diff --git a/xen/include/asm-arm/processor.h
b/xen/include/asm-arm/processor.h
> index 3849b23..91a5836 100644
> --- a/xen/include/asm-arm/processor.h
> +++ b/xen/include/asm-arm/processor.h
> @@ -34,6 +34,36 @@
>  #define SCTLR_A         (1<<1)
>  #define SCTLR_M         (1<<0)
>  
> +/* ACTLR Auxiliary Control Register */
> +#define ACTLR_SNOOP_DELAYED      (1<<31)
If these are CortexA15 specific then I think they ought to be
ACTLR_CA15_FOO or something. And possibly in a new processor-ca15.h
header instead.
> +#define ACTLR_MAIN_CLOCK         (1<<30)
> +#define ACTLR_NEON_CLOCK         (1<<29)
> +#define ACTLR_NONCACHE           (1<<24)
> +#define ACTLR_INORDER_REQ        (1<<23)
> +#define ACTLR_INORDER_LOAD       (1<<22)
> +#define ACTLR_L2_TLB_PREFETCH    (1<<21)
> +#define ACTLR_L2_IPA_PA_CACHE    (1<<20)
> +#define ACTLR_L2_CACHE           (1<<19)
> +#define ACTLR_L2_PA_CACHE        (1<<18)
> +#define ACTLR_TLB                (1<<17)
> +#define ACTLR_STRONGY_ORDERED    (1<<16)
> +#define ACTLR_INORDER            (1<<15)
> +#define ACTLR_FORCE_LIM          (1<<14)
> +#define ACTLR_CP_FLUSH           (1<<13)
> +#define ACTLR_CP_PUSH            (1<<12)
> +#define ACTLR_LIM                (1<<11)
> +#define ACTLR_SER                (1<<10)
> +#define ACTLR_OPT                (1<<9)
> +#define ACTLR_WFI                (1<<8)
> +#define ACTLR_WFE                (1<<7)
> +#define ACTLR_SMP                (1<<6)
> +#define ACTLR_PLD                (1<<5)
> +#define ACTLR_IP                 (1<<4)
> +#define ACTLR_MICRO_BTB          (1<<3)
> +#define ACTLR_LOOP_ONE           (1<<2)
> +#define ACTLR_LOOP_DISABLE       (1<<1)
> +#define ACTLR_BTB                (1<<0)
> +
>  #define SCTLR_BASE        0x00c50078
>  #define HSCTLR_BASE       0x30c51878
>

Ian Campbell

2012-Oct-25 09:59 UTC

head link

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

On Wed, 2012-10-24 at 16:03 +0100, Stefano Stabellini
wrote:> Secondary cpus are held by the firmware until we send an IPI to them.
> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> ---
>  xen/arch/arm/head.S        |    8 ++++++--
>  xen/arch/arm/mode_switch.S |   31 +++++++++++++++++++++++++++++++
>  2 files changed, 37 insertions(+), 2 deletions(-)
> 
> diff --git a/xen/arch/arm/head.S b/xen/arch/arm/head.S
> index c784f4d..39c4774 100644
> --- a/xen/arch/arm/head.S
> +++ b/xen/arch/arm/head.S
> @@ -79,12 +79,12 @@ start:
>  	beq   boot_cpu               /* If we''re CPU 0, boot now */
>  
>  	/* Non-boot CPUs wait here to be woken up one at a time. */
> -1:	wfe
> -	dsb
> +1:	dsb
>  	ldr   r0, =smp_up_cpu        /* VA of gate */
>  	add   r0, r0, r10            /* PA of gate */
>  	ldr   r1, [r0]               /* Which CPU is being booted? */
>  	teq   r1, r12                /* Is it us? */
> +	wfene
>  	bne   1b
This is a semi-independent bug fix relating to unnecessarily waiting
(and perhaps blocking indefinitely) on the first iteration of the loop?
>  
>  boot_cpu:
> @@ -98,6 +98,10 @@ boot_cpu:
>  	PRINT(" booting -\r\n")
>  #endif
>  
> +	/* Wake up secondary cpus */
> +	teq   r12, #0
> +	bleq  kick_cpus
Does this have to be done this early? Couldn''t we defer it to C land
where it would be easier to isolate the processor/platform specific
behaviour?
>  	/* Check that this CPU has Hyp mode */
>  	mrc   CP32(r0, ID_PFR1)
>  	and   r0, r0, #0xf000        /* Bits 12-15 define virt extensions */
> diff --git a/xen/arch/arm/mode_switch.S b/xen/arch/arm/mode_switch.S
> index 83a682b..39d80e8 100644
> --- a/xen/arch/arm/mode_switch.S
> +++ b/xen/arch/arm/mode_switch.S
> @@ -21,6 +21,37 @@
>  #include <asm/page.h>
>  #include <asm/asm_defns.h>
>  
> +/* XXX: Versatile Express specific code */
> +/* wake up secondary cpus */
> +.globl kick_cpus
> +kick_cpus:
My understanding was the mode_switch.S was intended to be a place to
hold the hacks and fixups required because there is no firmware on the
models and/or to address short comings in the firmware. This kick
function is a normal/expected part of booting on a vexpress so I don''t
think it especially belongs here.
> +    /* write start paddr to v2m sysreg FLAGSSET register */
> +	ldr   r0, =0x1c010000 /* base V2M sysreg MMIO address */
> +	add   r1, r0, #0x34   /* SYS_FLAGSCLR register */
As well as Tim''s comment about using the GICD_* registers you should
define names for the V2M registers somewhere too.
> +	dsb
> +	mov   r2, #0xffffffff
> +	str   r2, [r1]
> +	dsb
> +	add   r1, r0, #0x30   /* SYS_FLAGSSET register */
> +	ldr   r2, =start
> +	add   r2, r2, r10
> +	str   r2, [r1]
> +	dsb
> +	/* send an interrupt */
> +	ldr   r0, =0x2c001000 /* base GICD MMIO address */
> +	mov   r1, r0
> +	mov   r2, #0x1        /* GICD_CTLR */
> +	str   r2, [r1]        /* enable distributor */
> +	add   r1, r0, #0xf00  /* GICD_SGIR */
> +	mov   r2, #0xfe0000
> +	str   r2, [r1]        /* send IPI to everybody */
> +	dsb
> +	mov   r1, r0
> +	mov   r2, #0x0        /* GICD_CTLR */
> +	str   r2, [r1]        /* disable distributor */
> +	mov   pc, lr
> +
> +
>  /* Get up a CPU into Hyp mode.  Clobbers r0-r3.
>   *
>   * Expects r12 == CPU number

Ian Campbell

2012-Oct-25 10:02 UTC

head link

Re: [PATCH 7/7] xen/arm: get the number of cpus from device tree

> diff --git a/xen/common/device_tree.c b/xen/common/device_tree.c
> index 3d1f0f4..3ae0f4d 100644
> --- a/xen/common/device_tree.c
> +++ b/xen/common/device_tree.c
> @@ -153,6 +153,26 @@ const char *device_tree_bootargs(const void *fdt)
>      return prop->data;
>  }
>  
> +int device_tree_cpus(const void *fdt)
> +{
> +    int node, depth;
> +    int cpus = 0;
> +
> +    node = fdt_path_offset(fdt, "/cpus");
> +    if ( node < 0 )
> +        return -1;
With this we will stumble on with cpus == -1 (~4 billion, which would
make a nice system ;-) ). Perhaps we should either assume UP (with a big
printk) or BUG on failure here?
> +    do {
> +        node = fdt_next_node(fdt, node, &depth);
> +        if ( node < 0 )
> +            break;
> +        if ( strncmp(fdt_get_name(fdt, node, NULL), "cpu", 3) )
> +            break;
If we find /cpus/foo (for any foo != "cpu*") then we stop counting,
rather than just ignoring that node?
> +        cpus++;
> +    } while ( depth > 0 );
> +
> +    return cpus;
> +}
> +
>  static int dump_node(const void *fdt, int node, const char *name, int
depth,
>                       u32 address_cells, u32 size_cells, void *data)
>  {
> diff --git a/xen/include/xen/device_tree.h b/xen/include/xen/device_tree.h
> index 4d010c0..46bc0f8 100644
> --- a/xen/include/xen/device_tree.h
> +++ b/xen/include/xen/device_tree.h
> @@ -49,6 +49,7 @@ bool_t device_tree_node_matches(const void *fdt, int
node, const char *match);
>  int device_tree_for_each_node(const void *fdt,
>                                device_tree_node_func func, void *data);
>  const char *device_tree_bootargs(const void *fdt);
> +int device_tree_cpus(const void *fdt);
>  void device_tree_dump(const void *fdt);
>  
>  #endif

Stefano Stabellini

2012-Oct-25 10:57 UTC

head link

Re: [PATCH 3/7] pl011: set baud and clock_hz to the right defaults for Versatile Express

On Thu, 25 Oct 2012, Ian Campbell wrote:> Can we get these (or at least the clock_hz) from DTB?
Unfortunately we cannot.

> On Wed, 2012-10-24 at 16:03 +0100, Stefano Stabellini wrote:
> > Signed-off-by: Stefano Stabellini
<stefano.stabellini@eu.citrix.com>
> > ---
> >  xen/drivers/char/pl011.c |    4 ++--
> >  1 files changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/xen/drivers/char/pl011.c b/xen/drivers/char/pl011.c
> > index 6af45aa..6ccb73a 100644
> > --- a/xen/drivers/char/pl011.c
> > +++ b/xen/drivers/char/pl011.c
> > @@ -241,8 +241,8 @@ void __init pl011_init(int index, unsigned long
register_base_address)
> >  
> >      uart = &pl011_com[index];
> >  
> > -    uart->clock_hz  = 7372800;
> > -    uart->baud      = 115200;
> > +    uart->clock_hz  = 0x16e3600;
> > +    uart->baud      = 38400;
> >      uart->data_bits = 8;
> >      uart->parity    = PARITY_NONE;
> >      uart->stop_bits = 1;
> 
> 
>

Stefano Stabellini

2012-Oct-25 11:00 UTC

head link

Re: [PATCH 2/7] xen/arm: setup the fixmap in head.S

On Thu, 25 Oct 2012, Ian Campbell wrote:> On Wed, 2012-10-24 at 16:03 +0100, Stefano Stabellini wrote:
> > @@ -183,6 +184,16 @@ skip_bss:
> >         teq   r12, #0
> >         bne   pt_ready
> >         
> > +       /* console fixmap */
> > +       ldr   r1, =xen_fixmap
> > +       add   r1, r1, r10            /* r1 := paddr (xen_fixmap) */
> > +       mov   r3, #0
> > +       lsr   r2, r11, #12
> > +       lsl   r2, r2, #12            /* 4K aligned paddr of UART */
> > +       orr   r2, r2, #PT_UPPER(DEV_L3)
> > +       orr   r2, r2, #PT_LOWER(DEV_L3) /* r2:r3 := 4K dev map
including UART */
> > +       strd  r2, r3, [r1, #0]       /* Map it in the first
fixmap''s slot */
> > +       
> >         /* Build the baseline idle pagetable''s first-level
entries */
> >         ldr   r1, =xen_second
> >         add   r1, r1, r10            /* r1 := paddr (xen_second) */
> [...]
> > @@ -205,12 +216,13 @@ skip_bss:
> >  	ldr   r4, =start
> >  	lsr   r4, #18                /* Slot for vaddr(start) */
> >  	strd  r2, r3, [r1, r4]       /* Map Xen there too */
> > +
> >  #ifdef EARLY_UART_ADDRESS
> > -	ldr   r3, =(1<<(54-32))      /* NS for device mapping */
> > -	lsr   r2, r11, #21
> > -	lsl   r2, r2, #21            /* 2MB-aligned paddr of UART */
> > -	orr   r2, r2, #PT_UPPER(DEV)
> > -	orr   r2, r2, #PT_LOWER(DEV) /* r2:r3 := 2MB dev map including UART
*/
> > +	/* xen_fixmap pagetable */
> > +	ldr r2, =xen_fixmap
> > +	add r2, r2, r10
> > +	orr   r2, r2, #PT_UPPER(PT)
> > +	orr   r2, r2, #PT_LOWER(PT) /* r2:r3 := paddr (xen_fixmap) */
> 
> We should setup the fixmap infrastructure even if !EARLY_UART_ADDRESS,
> unless we set it up again later?
> 
> As it stands it looks like you setup the generic fixmap mapping iff
> EARLY_UART_ADDRESS (second hunk above) but setup the UART mapping within
> that unconditionally (first hunk) which is exactly backwards,
isn''t it ?
Yes, you are right, they should be inverted.

Ian Campbell

2012-Oct-25 11:00 UTC

head link

Re: [PATCH 3/7] pl011: set baud and clock_hz to the right defaults for Versatile Express

On Thu, 2012-10-25 at 11:57 +0100, Stefano Stabellini
wrote:> On Thu, 25 Oct 2012, Ian Campbell wrote:
> > Can we get these (or at least the clock_hz) from DTB?
> 
> Unfortunately we cannot.
I suppose we ought to get them from the pl011 equivalent of the com1
command line parameter then.

Ian.

Stefano Stabellini

2012-Oct-25 11:57 UTC

head link

Re: [PATCH 4/7] xen/arm: set the SMP bit in the ACTLR register

On Thu, 25 Oct 2012, Ian Campbell wrote:> On Wed, 2012-10-24 at 16:03 +0100, Stefano Stabellini wrote:
> > From the Cortex A15 manual:
> > 
> > "Enables the processor to receive instruction cache, BTB, and TLB
maintenance
> > operations from other processors"
> > 
> > ...
> > 
> > "You must set this bit before enabling the caches and MMU, or
> > performing any cache and TLB maintenance operations. The only time
> > you must clear this bit is during a processor power-down
sequence"
> 
> Is it considered a bug that the firmware doesn''t do this?
Why would it be? You can run fairly complicated pieces of software
without caches or MMU.

> > Signed-off-by: Stefano Stabellini
<stefano.stabellini@eu.citrix.com>
> > ---
> >  xen/arch/arm/head.S             |    6 ++++++
> >  xen/include/asm-arm/processor.h |   30 ++++++++++++++++++++++++++++++
> >  2 files changed, 36 insertions(+), 0 deletions(-)
> > 
> > diff --git a/xen/arch/arm/head.S b/xen/arch/arm/head.S
> > index 3d01be0..c784f4d 100644
> > --- a/xen/arch/arm/head.S
> > +++ b/xen/arch/arm/head.S
> > @@ -148,6 +148,12 @@ skip_bss:
> >  
> >  	PRINT("- Setting up control registers -\r\n")
> >  	
> > +	/* XXX: Cortex A15 specific */
> 
> We probably need some sort of early proc info keyed of the processor id
> (like Linux) has so we can push this processor specific stuff into the
> right places.
Indeed.
I wrote something simple that should be OK until we get more than
~10 different processors.

> > +	/* Set up the SMP bit in ACTLR */
> > +	mrc   CP32(r0, ACTLR)
> > +	orr   r0, r0, #(ACTLR_SMP) /* enable SMP bit*/
> > +	mcr   CP32(r0, ACTLR)
> 
> Linux does this IFF it isn''t already set for some reason:
> #ifdef CONFIG_SMP
>         ALT_SMP(mrc     p15, 0, r0, c1, c0, 1)
>         ALT_UP(mov      r0, #(1 << 6))          @ fake it for UP
>         tst     r0, #(1 << 6)                   @ SMP/nAMP mode
enabled?
>         orreq   r0, r0, #(1 << 6)               @ Enable SMP/nAMP
mode
>         orreq   r0, r0, r10                     @ Enable CPU-specific SMP
bits
>         mcreq   p15, 0, r0, c1, c0, 1
> #endif
> 
> Might just relate to the "fake it up for UP" thing I suppose.
Considering that we don''t have a UP config option for Xen (I am aware
of), there is no need for us to do that, I think.

> > +
> >  	/* Set up memory attribute type tables */
> >  	ldr   r0, =MAIR0VAL
> >  	ldr   r1, =MAIR1VAL
> > diff --git a/xen/include/asm-arm/processor.h
b/xen/include/asm-arm/processor.h
> > index 3849b23..91a5836 100644
> > --- a/xen/include/asm-arm/processor.h
> > +++ b/xen/include/asm-arm/processor.h
> > @@ -34,6 +34,36 @@
> >  #define SCTLR_A         (1<<1)
> >  #define SCTLR_M         (1<<0)
> >  
> > +/* ACTLR Auxiliary Control Register */
> > +#define ACTLR_SNOOP_DELAYED      (1<<31)
> 
> If these are CortexA15 specific then I think they ought to be
> ACTLR_CA15_FOO or something. And possibly in a new processor-ca15.h
> header instead.
OK

Ian Campbell

2012-Oct-25 12:04 UTC

head link

Re: [PATCH 4/7] xen/arm: set the SMP bit in the ACTLR register

On Thu, 2012-10-25 at 12:57 +0100, Stefano Stabellini
wrote:> On Thu, 25 Oct 2012, Ian Campbell wrote:
> > On Wed, 2012-10-24 at 16:03 +0100, Stefano Stabellini wrote:
> > > From the Cortex A15 manual:
> > > 
> > > "Enables the processor to receive instruction cache, BTB,
and TLB maintenance
> > > operations from other processors"
> > > 
> > > ...
> > > 
> > > "You must set this bit before enabling the caches and MMU,
or
> > > performing any cache and TLB maintenance operations. The only
time
> > > you must clear this bit is during a processor power-down
sequence"
> > 
> > Is it considered a bug that the firmware doesn''t do this?
> 
> Why would it be? You can run fairly complicated pieces of software
> without caches or MMU.
True, I guess I just considered that not setting the SMP bit on the
second processor when the firmware starts it seemed a bit odd. If you
aren''t caches/MMU/etc them then setting the bit is pretty much a NOP
(or
maybe it isn''t?).
> 
> 
> > > Signed-off-by: Stefano Stabellini
<stefano.stabellini@eu.citrix.com>
> > > ---
> > >  xen/arch/arm/head.S             |    6 ++++++
> > >  xen/include/asm-arm/processor.h |   30
++++++++++++++++++++++++++++++
> > >  2 files changed, 36 insertions(+), 0 deletions(-)
> > > 
> > > diff --git a/xen/arch/arm/head.S b/xen/arch/arm/head.S
> > > index 3d01be0..c784f4d 100644
> > > --- a/xen/arch/arm/head.S
> > > +++ b/xen/arch/arm/head.S
> > > @@ -148,6 +148,12 @@ skip_bss:
> > >  
> > >  	PRINT("- Setting up control registers -\r\n")
> > >  	
> > > +	/* XXX: Cortex A15 specific */
> > 
> > We probably need some sort of early proc info keyed of the processor
id
> > (like Linux) has so we can push this processor specific stuff into the
> > right places.
> 
> Indeed.
> I wrote something simple that should be OK until we get more than
> ~10 different processors.
> 
> 
> > > +	/* Set up the SMP bit in ACTLR */
> > > +	mrc   CP32(r0, ACTLR)
> > > +	orr   r0, r0, #(ACTLR_SMP) /* enable SMP bit*/
> > > +	mcr   CP32(r0, ACTLR)
> > 
> > Linux does this IFF it isn''t already set for some reason:
> > #ifdef CONFIG_SMP
> >         ALT_SMP(mrc     p15, 0, r0, c1, c0, 1)
> >         ALT_UP(mov      r0, #(1 << 6))          @ fake it for UP
> >         tst     r0, #(1 << 6)                   @ SMP/nAMP mode
enabled?
> >         orreq   r0, r0, #(1 << 6)               @ Enable
SMP/nAMP mode
> >         orreq   r0, r0, r10                     @ Enable CPU-specific
SMP bits
> >         mcreq   p15, 0, r0, c1, c0, 1
> > #endif
> > 
> > Might just relate to the "fake it up for UP" thing I
suppose.
> 
> Considering that we don''t have a UP config option for Xen (I am
aware
> of), there is no need for us to do that, I think.
> 
> 
> > > +
> > >  	/* Set up memory attribute type tables */
> > >  	ldr   r0, =MAIR0VAL
> > >  	ldr   r1, =MAIR1VAL
> > > diff --git a/xen/include/asm-arm/processor.h
b/xen/include/asm-arm/processor.h
> > > index 3849b23..91a5836 100644
> > > --- a/xen/include/asm-arm/processor.h
> > > +++ b/xen/include/asm-arm/processor.h
> > > @@ -34,6 +34,36 @@
> > >  #define SCTLR_A         (1<<1)
> > >  #define SCTLR_M         (1<<0)
> > >  
> > > +/* ACTLR Auxiliary Control Register */
> > > +#define ACTLR_SNOOP_DELAYED      (1<<31)
> > 
> > If these are CortexA15 specific then I think they ought to be
> > ACTLR_CA15_FOO or something. And possibly in a new processor-ca15.h
> > header instead.
> 
> OK

Stefano Stabellini

2012-Oct-25 17:45 UTC

head link

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

On Thu, 25 Oct 2012, Ian Campbell wrote:> On Wed, 2012-10-24 at 16:03 +0100, Stefano Stabellini wrote:
> > Secondary cpus are held by the firmware until we send an IPI to them.
> > 
> > Signed-off-by: Stefano Stabellini
<stefano.stabellini@eu.citrix.com>
> > ---
> >  xen/arch/arm/head.S        |    8 ++++++--
> >  xen/arch/arm/mode_switch.S |   31 +++++++++++++++++++++++++++++++
> >  2 files changed, 37 insertions(+), 2 deletions(-)
> > 
> > diff --git a/xen/arch/arm/head.S b/xen/arch/arm/head.S
> > index c784f4d..39c4774 100644
> > --- a/xen/arch/arm/head.S
> > +++ b/xen/arch/arm/head.S
> > @@ -79,12 +79,12 @@ start:
> >  	beq   boot_cpu               /* If we''re CPU 0, boot now */
> >  
> >  	/* Non-boot CPUs wait here to be woken up one at a time. */
> > -1:	wfe
> > -	dsb
> > +1:	dsb
> >  	ldr   r0, =smp_up_cpu        /* VA of gate */
> >  	add   r0, r0, r10            /* PA of gate */
> >  	ldr   r1, [r0]               /* Which CPU is being booted? */
> >  	teq   r1, r12                /* Is it us? */
> > +	wfene
> >  	bne   1b
> 
> This is a semi-independent bug fix relating to unnecessarily waiting
> (and perhaps blocking indefinitely) on the first iteration of the loop?
yes

> >  
> >  boot_cpu:
> > @@ -98,6 +98,10 @@ boot_cpu:
> >  	PRINT(" booting -\r\n")
> >  #endif
> >  
> > +	/* Wake up secondary cpus */
> > +	teq   r12, #0
> > +	bleq  kick_cpus
> 
> Does this have to be done this early? Couldn''t we defer it to C
land
> where it would be easier to isolate the processor/platform specific
> behaviour?
Yes, it does because we need to send an interrupt to cpus running in
secure mode, so this has to happen before we drop off secure state and we
enter hypervisor state.

> >  	/* Check that this CPU has Hyp mode */
> >  	mrc   CP32(r0, ID_PFR1)
> >  	and   r0, r0, #0xf000        /* Bits 12-15 define virt extensions */
> > diff --git a/xen/arch/arm/mode_switch.S b/xen/arch/arm/mode_switch.S
> > index 83a682b..39d80e8 100644
> > --- a/xen/arch/arm/mode_switch.S
> > +++ b/xen/arch/arm/mode_switch.S
> > @@ -21,6 +21,37 @@
> >  #include <asm/page.h>
> >  #include <asm/asm_defns.h>
> >  
> > +/* XXX: Versatile Express specific code */
> > +/* wake up secondary cpus */
> > +.globl kick_cpus
> > +kick_cpus:
> 
> My understanding was the mode_switch.S was intended to be a place to
> hold the hacks and fixups required because there is no firmware on the
> models and/or to address short comings in the firmware. This kick
> function is a normal/expected part of booting on a vexpress so I
don''t
> think it especially belongs here.
I have created a processor.S file for processor specific initializations
(see ACTLR), maybe I can move it there.

> > +    /* write start paddr to v2m sysreg FLAGSSET register */
> > +	ldr   r0, =0x1c010000 /* base V2M sysreg MMIO address */
> > +	add   r1, r0, #0x34   /* SYS_FLAGSCLR register */
> 
> As well as Tim''s comment about using the GICD_* registers you
should
> define names for the V2M registers somewhere too.
sure

Ian Campbell

2012-Oct-26 07:30 UTC

head link

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

On Thu, 2012-10-25 at 18:45 +0100, Stefano Stabellini
wrote:> > >  
> > >  boot_cpu:
> > > @@ -98,6 +98,10 @@ boot_cpu:
> > >  	PRINT(" booting -\r\n")
> > >  #endif
> > >  
> > > +	/* Wake up secondary cpus */
> > > +	teq   r12, #0
> > > +	bleq  kick_cpus
> > 
> > Does this have to be done this early? Couldn''t we defer it to
C land
> > where it would be easier to isolate the processor/platform specific
> > behaviour?
> 
> Yes, it does because we need to send an interrupt to cpus running in
> secure mode, so this has to happen before we drop off secure state and we
> enter hypervisor state.
Hrm, so maybe this stuff does belong in mode_switch.S after all?

Or is there perhaps some register (e.g. in the GIC?) which would allow
non-secure hyp mode to sent an event to a processor in secure monitor
mode? Or are secondary CPUs actually spinning in secure supervisor mode?

I guess this works in Linux because the boot CPU is in *secure* kernel
mode and that is allowed to send events to other secure modes? That''s a
further argument that this is related to the firmware not bringing us up
in Hyp / NS mode and therefore that the fix should be in mode_switch.S.
> > >  	/* Check that this CPU has Hyp mode */
> > >  	mrc   CP32(r0, ID_PFR1)
> > >  	and   r0, r0, #0xf000        /* Bits 12-15 define virt
extensions */
> > > diff --git a/xen/arch/arm/mode_switch.S
b/xen/arch/arm/mode_switch.S
> > > index 83a682b..39d80e8 100644
> > > --- a/xen/arch/arm/mode_switch.S
> > > +++ b/xen/arch/arm/mode_switch.S
> > > @@ -21,6 +21,37 @@
> > >  #include <asm/page.h>
> > >  #include <asm/asm_defns.h>
> > >  
> > > +/* XXX: Versatile Express specific code */
> > > +/* wake up secondary cpus */
> > > +.globl kick_cpus
> > > +kick_cpus:
> > 
> > My understanding was the mode_switch.S was intended to be a place to
> > hold the hacks and fixups required because there is no firmware on the
> > models and/or to address short comings in the firmware. This kick
> > function is a normal/expected part of booting on a vexpress so I
don''t
> > think it especially belongs here.
> 
> I have created a processor.S file for processor specific initializations
> (see ACTLR), maybe I can move it there.
proc-ca15.S perhaps? So we can add proc-exynos.S etc in the future?

Ian

Tim Deegan

2012-Oct-26 08:56 UTC

head link

Re: [PATCH 4/7] xen/arm: set the SMP bit in the ACTLR register

At 13:04 +0100 on 25 Oct (1351170270), Ian Campbell
wrote:> On Thu, 2012-10-25 at 12:57 +0100, Stefano Stabellini wrote:
> > On Thu, 25 Oct 2012, Ian Campbell wrote:
> > > On Wed, 2012-10-24 at 16:03 +0100, Stefano Stabellini wrote:
> > > > From the Cortex A15 manual:
> > > > 
> > > > "Enables the processor to receive instruction cache,
BTB, and TLB maintenance
> > > > operations from other processors"
> > > > 
> > > > ...
> > > > 
> > > > "You must set this bit before enabling the caches and
MMU, or
> > > > performing any cache and TLB maintenance operations. The
only time
> > > > you must clear this bit is during a processor power-down
sequence"
> > > 
> > > Is it considered a bug that the firmware doesn''t do
this?
> > 
> > Why would it be? You can run fairly complicated pieces of software
> > without caches or MMU.
> 
> True, I guess I just considered that not setting the SMP bit on the
> second processor when the firmware starts it seemed a bit odd. If you
> aren''t caches/MMU/etc them then setting the bit is pretty much a
NOP (or
> maybe it isn''t?).
Well, given that you''re not allowed to clear it except at power-down, I
think it would be impolite of the firmware to set it if there''s _any_
reason the OS might not want it set.

Tim.

Ian Campbell

2012-Oct-26 08:59 UTC

head link

Re: [PATCH 4/7] xen/arm: set the SMP bit in the ACTLR register

On Fri, 2012-10-26 at 09:56 +0100, Tim Deegan wrote:> At 13:04 +0100 on 25 Oct (1351170270), Ian Campbell wrote:
> > On Thu, 2012-10-25 at 12:57 +0100, Stefano Stabellini wrote:
> > > On Thu, 25 Oct 2012, Ian Campbell wrote:
> > > > On Wed, 2012-10-24 at 16:03 +0100, Stefano Stabellini wrote:
> > > > > From the Cortex A15 manual:
> > > > > 
> > > > > "Enables the processor to receive instruction
cache, BTB, and TLB maintenance
> > > > > operations from other processors"
> > > > > 
> > > > > ...
> > > > > 
> > > > > "You must set this bit before enabling the caches
and MMU, or
> > > > > performing any cache and TLB maintenance operations.
The only time
> > > > > you must clear this bit is during a processor
power-down sequence"
> > > > 
> > > > Is it considered a bug that the firmware doesn''t do
this?
> > > 
> > > Why would it be? You can run fairly complicated pieces of
software
> > > without caches or MMU.
> > 
> > True, I guess I just considered that not setting the SMP bit on the
> > second processor when the firmware starts it seemed a bit odd. If you
> > aren''t caches/MMU/etc them then setting the bit is pretty
much a NOP (or
> > maybe it isn''t?).
> 
> Well, given that you''re not allowed to clear it except at
power-down, I
> think it would be impolite of the firmware to set it if there''s
_any_
> reason the OS might not want it set.
True.

Tim Deegan

2012-Oct-26 09:01 UTC

head link

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

At 18:35 +0100 on 24 Oct (1351103740), Stefano Stabellini
wrote:> > I don''t think this is necessary - why not just pass va
directly to the
> > inline asm?  We don''t care what register it''s in
(and if we did I''m not
> > convinced this would guarantee it was r0).
> > 
> > > +    asm volatile (
> > > +        "dsb;"
> > > +        STORE_CP32(0, DCCMVAC)
> > > +        "isb;"
> > > +        : : "r" (r0) : "memory");
> > 
> > Does this need a ''memory'' clobber?  Can we get away
with just saying it
> > consumes *va as an input?  All we need to be sure of is that the
> > particular thing we''re flushing has been written out; no need
to stop
> > any other optimizations.
> 
> you are right on both points
> 
> > I guess it might need to be re-cast as a macro so the compiler knows
how
> > big *va is?
> 
> I don''t think it is necessary, after all the size of a register
has to
> be the same of a virtual address
But it''s the size of the thing in memory that''s being flushed
that
matters, not the size of the pointer to it!  E.g. after a PTE write we
need a 64-bit memory input operand to stop the compiler from hoisting
any part of the PTE write past the cache flush. (well OK we explicitly use
a 64-bit atomic write for PTE writes, but YKWIM).

Tim.

Stefano Stabellini

2012-Oct-26 11:18 UTC

head link

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

On Fri, 26 Oct 2012, Ian Campbell wrote:> On Thu, 2012-10-25 at 18:45 +0100, Stefano Stabellini wrote:
> > > >  
> > > >  boot_cpu:
> > > > @@ -98,6 +98,10 @@ boot_cpu:
> > > >  	PRINT(" booting -\r\n")
> > > >  #endif
> > > >  
> > > > +	/* Wake up secondary cpus */
> > > > +	teq   r12, #0
> > > > +	bleq  kick_cpus
> > > 
> > > Does this have to be done this early? Couldn''t we defer
it to C land
> > > where it would be easier to isolate the processor/platform
specific
> > > behaviour?
> > 
> > Yes, it does because we need to send an interrupt to cpus running in
> > secure mode, so this has to happen before we drop off secure state and
we
> > enter hypervisor state.
> 
> Hrm, so maybe this stuff does belong in mode_switch.S after all?
> 
> Or is there perhaps some register (e.g. in the GIC?) which would allow
> non-secure hyp mode to sent an event to a processor in secure monitor
> mode?
Whether the target processor in secure mode receives the interrupt or
not, depends only on the GIC configuration on the target processor.
I don''t think there is anything we can do from cpu0 in non-secure mode.

> Or are secondary CPUs actually spinning in secure supervisor mode?
Yes.

> I guess this works in Linux because the boot CPU is in *secure* kernel
> mode and that is allowed to send events to other secure modes?
That''s a
> further argument that this is related to the firmware not bringing us up
> in Hyp / NS mode and therefore that the fix should be in mode_switch.S.
That''s right.

> > I have created a processor.S file for processor specific
initializations
> > (see ACTLR), maybe I can move it there.
> 
> proc-ca15.S perhaps? So we can add proc-exynos.S etc in the future? 
OK

Ian Campbell

2012-Oct-26 12:16 UTC

head link

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

On Fri, 2012-10-26 at 12:18 +0100, Stefano Stabellini
wrote:> On Fri, 26 Oct 2012, Ian Campbell wrote:
> > On Thu, 2012-10-25 at 18:45 +0100, Stefano Stabellini wrote:
> > > > >  
> > > > >  boot_cpu:
> > > > > @@ -98,6 +98,10 @@ boot_cpu:
> > > > >  	PRINT(" booting -\r\n")
> > > > >  #endif
> > > > >  
> > > > > +	/* Wake up secondary cpus */
> > > > > +	teq   r12, #0
> > > > > +	bleq  kick_cpus
> > > > 
> > > > Does this have to be done this early? Couldn''t we
defer it to C land
> > > > where it would be easier to isolate the processor/platform
specific
> > > > behaviour?
> > > 
> > > Yes, it does because we need to send an interrupt to cpus running
in
> > > secure mode, so this has to happen before we drop off secure
state and we
> > > enter hypervisor state.
> > 
> > Hrm, so maybe this stuff does belong in mode_switch.S after all?
> > 
> > Or is there perhaps some register (e.g. in the GIC?) which would allow
> > non-secure hyp mode to sent an event to a processor in secure monitor
> > mode?
> 
> Whether the target processor in secure mode receives the interrupt or
> not, depends only on the GIC configuration on the target processor.
> I don''t think there is anything we can do from cpu0 in non-secure
mode.
Isn''t this controlled by the GICD_GROUP register, which can be set from
cpu0 but affects all cpus?

Or is it the case that we need to do this poke before mode_switch.S
makes all interrupts into grp1 interrupts?

Ian.

Stefano Stabellini

2012-Oct-26 15:24 UTC

head link

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

On Fri, 26 Oct 2012, Ian Campbell wrote:> On Fri, 2012-10-26 at 12:18 +0100, Stefano Stabellini wrote:
> > On Fri, 26 Oct 2012, Ian Campbell wrote:
> > > On Thu, 2012-10-25 at 18:45 +0100, Stefano Stabellini wrote:
> > > > > >  
> > > > > >  boot_cpu:
> > > > > > @@ -98,6 +98,10 @@ boot_cpu:
> > > > > >  	PRINT(" booting -\r\n")
> > > > > >  #endif
> > > > > >  
> > > > > > +	/* Wake up secondary cpus */
> > > > > > +	teq   r12, #0
> > > > > > +	bleq  kick_cpus
> > > > > 
> > > > > Does this have to be done this early? Couldn''t
we defer it to C land
> > > > > where it would be easier to isolate the
processor/platform specific
> > > > > behaviour?
> > > > 
> > > > Yes, it does because we need to send an interrupt to cpus
running in
> > > > secure mode, so this has to happen before we drop off secure
state and we
> > > > enter hypervisor state.
> > > 
> > > Hrm, so maybe this stuff does belong in mode_switch.S after all?
> > > 
> > > Or is there perhaps some register (e.g. in the GIC?) which would
allow
> > > non-secure hyp mode to sent an event to a processor in secure
monitor
> > > mode?
> > 
> > Whether the target processor in secure mode receives the interrupt or
> > not, depends only on the GIC configuration on the target processor.
> > I don''t think there is anything we can do from cpu0 in
non-secure mode.
> 
> Isn''t this controlled by the GICD_GROUP register, which can be set
from
> cpu0 but affects all cpus?
GICD_GROUP0 is banked

> Or is it the case that we need to do this poke before mode_switch.S
> makes all interrupts into grp1 interrupts?
I don''t think we can do that, it is the receiver cpu that would need to
do it

Ian Campbell

2012-Oct-26 15:32 UTC

head link

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

On Fri, 2012-10-26 at 16:24 +0100, Stefano Stabellini
wrote:> On Fri, 26 Oct 2012, Ian Campbell wrote:
> > On Fri, 2012-10-26 at 12:18 +0100, Stefano Stabellini wrote:
> > > On Fri, 26 Oct 2012, Ian Campbell wrote:
> > > > On Thu, 2012-10-25 at 18:45 +0100, Stefano Stabellini wrote:
> > > > > > >  
> > > > > > >  boot_cpu:
> > > > > > > @@ -98,6 +98,10 @@ boot_cpu:
> > > > > > >  	PRINT(" booting -\r\n")
> > > > > > >  #endif
> > > > > > >  
> > > > > > > +	/* Wake up secondary cpus */
> > > > > > > +	teq   r12, #0
> > > > > > > +	bleq  kick_cpus
> > > > > > 
> > > > > > Does this have to be done this early?
Couldn''t we defer it to C land
> > > > > > where it would be easier to isolate the
processor/platform specific
> > > > > > behaviour?
> > > > > 
> > > > > Yes, it does because we need to send an interrupt to
cpus running in
> > > > > secure mode, so this has to happen before we drop off
secure state and we
> > > > > enter hypervisor state.
> > > > 
> > > > Hrm, so maybe this stuff does belong in mode_switch.S after
all?
> > > > 
> > > > Or is there perhaps some register (e.g. in the GIC?) which
would allow
> > > > non-secure hyp mode to sent an event to a processor in
secure monitor
> > > > mode?
> > > 
> > > Whether the target processor in secure mode receives the
interrupt or
> > > not, depends only on the GIC configuration on the target
processor.
> > > I don''t think there is anything we can do from cpu0 in
non-secure mode.
> > 
> > Isn''t this controlled by the GICD_GROUP register, which can
be set from
> > cpu0 but affects all cpus?
> 
> GICD_GROUP0 is banked
Damn, right.
> 
> 
> > Or is it the case that we need to do this poke before mode_switch.S
> > makes all interrupts into grp1 interrupts?
> 
> I don''t think we can do that, it is the receiver cpu that would
need to
> do it

Stefano Stabellini

2012-Oct-26 15:53 UTC

head link

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

On Fri, 26 Oct 2012, Tim Deegan wrote:> At 18:35 +0100 on 24 Oct (1351103740), Stefano Stabellini wrote:
> > > I don''t think this is necessary - why not just pass va
directly to the
> > > inline asm?  We don''t care what register it''s
in (and if we did I''m not
> > > convinced this would guarantee it was r0).
> > > 
> > > > +    asm volatile (
> > > > +        "dsb;"
> > > > +        STORE_CP32(0, DCCMVAC)
> > > > +        "isb;"
> > > > +        : : "r" (r0) : "memory");
> > > 
> > > Does this need a ''memory'' clobber?  Can we get
away with just saying it
> > > consumes *va as an input?  All we need to be sure of is that the
> > > particular thing we''re flushing has been written out; no
need to stop
> > > any other optimizations.
> > 
> > you are right on both points
> > 
> > > I guess it might need to be re-cast as a macro so the compiler
knows how
> > > big *va is?
> > 
> > I don''t think it is necessary, after all the size of a
register has to
> > be the same of a virtual address
> 
> But it''s the size of the thing in memory that''s being
flushed that
> matters, not the size of the pointer to it!
>
> E.g. after a PTE write we
> need a 64-bit memory input operand to stop the compiler from hoisting
> any part of the PTE write past the cache flush. (well OK we explicitly use
> a 64-bit atomic write for PTE writes, but YKWIM).
The implementation of write_pte is entirely in assembly so I doubt that
the compiler is going to reorder it.

However I see your point in case of flush_xen_dcache_va.
Wouldn''t a barrier() at the beginning of the function be enough?

Stefano Stabellini

2012-Oct-26 15:55 UTC

head link

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

On Fri, 26 Oct 2012, Stefano Stabellini wrote:> On Fri, 26 Oct 2012, Tim Deegan wrote:
> > At 18:35 +0100 on 24 Oct (1351103740), Stefano Stabellini wrote:
> > > > I don''t think this is necessary - why not just pass
va directly to the
> > > > inline asm?  We don''t care what register
it''s in (and if we did I''m not
> > > > convinced this would guarantee it was r0).
> > > > 
> > > > > +    asm volatile (
> > > > > +        "dsb;"
> > > > > +        STORE_CP32(0, DCCMVAC)
> > > > > +        "isb;"
> > > > > +        : : "r" (r0) : "memory");
> > > > 
> > > > Does this need a ''memory'' clobber?  Can we
get away with just saying it
> > > > consumes *va as an input?  All we need to be sure of is that
the
> > > > particular thing we''re flushing has been written
out; no need to stop
> > > > any other optimizations.
> > > 
> > > you are right on both points
> > > 
> > > > I guess it might need to be re-cast as a macro so the
compiler knows how
> > > > big *va is?
> > > 
> > > I don''t think it is necessary, after all the size of a
register has to
> > > be the same of a virtual address
> > 
> > But it''s the size of the thing in memory that''s
being flushed that
> > matters, not the size of the pointer to it!
> >
> > E.g. after a PTE write we
> > need a 64-bit memory input operand to stop the compiler from hoisting
> > any part of the PTE write past the cache flush. (well OK we explicitly
use
> > a 64-bit atomic write for PTE writes, but YKWIM).
> 
> The implementation of write_pte is entirely in assembly so I doubt that
> the compiler is going to reorder it.
> 
> However I see your point in case of flush_xen_dcache_va.
> Wouldn''t a barrier() at the beginning of the function be enough?
>
Actually we already have a memory clobber in flush_xen_dcache_va,
shouldn''t that prevent the compiler from reordering instructions?

Stefano Stabellini

2012-Oct-26 16:03 UTC

head link

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

On Fri, 26 Oct 2012, Stefano Stabellini wrote:> On Fri, 26 Oct 2012, Stefano Stabellini wrote:
> > On Fri, 26 Oct 2012, Tim Deegan wrote:
> > > At 18:35 +0100 on 24 Oct (1351103740), Stefano Stabellini wrote:
> > > > > I don''t think this is necessary - why not just
pass va directly to the
> > > > > inline asm?  We don''t care what register
it''s in (and if we did I''m not
> > > > > convinced this would guarantee it was r0).
> > > > > 
> > > > > > +    asm volatile (
> > > > > > +        "dsb;"
> > > > > > +        STORE_CP32(0, DCCMVAC)
> > > > > > +        "isb;"
> > > > > > +        : : "r" (r0) :
"memory");
> > > > > 
> > > > > Does this need a ''memory'' clobber? 
Can we get away with just saying it
> > > > > consumes *va as an input?  All we need to be sure of is
that the
> > > > > particular thing we''re flushing has been
written out; no need to stop
> > > > > any other optimizations.
> > > > 
> > > > you are right on both points
> > > > 
> > > > > I guess it might need to be re-cast as a macro so the
compiler knows how
> > > > > big *va is?
> > > > 
> > > > I don''t think it is necessary, after all the size
of a register has to
> > > > be the same of a virtual address
> > > 
> > > But it''s the size of the thing in memory that''s
being flushed that
> > > matters, not the size of the pointer to it!
> > >
> > > E.g. after a PTE write we
> > > need a 64-bit memory input operand to stop the compiler from
hoisting
> > > any part of the PTE write past the cache flush. (well OK we
explicitly use
> > > a 64-bit atomic write for PTE writes, but YKWIM).
> > 
> > The implementation of write_pte is entirely in assembly so I doubt
that
> > the compiler is going to reorder it.
> > 
> > However I see your point in case of flush_xen_dcache_va.
> > Wouldn''t a barrier() at the beginning of the function be
enough?
> >
> 
> Actually we already have a memory clobber in flush_xen_dcache_va,
> shouldn''t that prevent the compiler from reordering instructions?
> 
After reading again the entire thread I think that I understand what you
meant: can''t we get rid of the memory clobber and specify *va as input?

However in order to do that correctly we need to make clear how big *va is
otherwise the compiler could reorder things.

The problem is that we don''t know at compile time how big a cacheline
is, so I think that we need to keep the memory clobber.

Stefano Stabellini

2012-Oct-26 16:33 UTC

head link

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

On Thu, 25 Oct 2012, Ian Campbell wrote:> On Wed, 2012-10-24 at 16:03 +0100, Stefano Stabellini wrote:
> > Each rank holds 32 irqs, so we should divide by 32 rather than by 4.
> 
> I think this is wrong, because idx isn''t used in the way your
reasoning
> implies, when we use it we assume 8 bits-per-interrupt in the register
> not 1.
> 
> The accessor function vgix_irq_rank is couched in terms of
> GICD_<FOO><n>, which is convenient where we are emulating
register
> accesses but is very confusing in this one function where we
aren''t
> thinking in terms of registers!
> 
> vgic_irq_rank(v, 8, idx) is saying "find me the rank containing
> GICD_<FOO><idx> where FOO has 8 bits per interrupt". Since
4 lots of 8
> bits goes into 32 we therefore need to divide by 4. This makes sense if
> you consider that FOO here is PRIORITY and that GICD_PRIORITY0 controls
> irq 0..3, GICD_PRIORITY1 irq 4..7 etc.
> 
> With idx = irq >> 2 you get:
> 
> IRQ00: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE =
0
> IRQ01: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE =
1
> IRQ02: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE =
2
> IRQ03: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE =
3
> IRQ04: idx = 01 REG_RANK_NR(08, 01) = 0, REG_RANK_INDEX(08, 01) = 1, BYTE =
0
> IRQ05: idx = 01 REG_RANK_NR(08, 01) = 0, REG_RANK_INDEX(08, 01) = 1, BYTE =
1
> [...]
> IRQ30: idx = 07 REG_RANK_NR(08, 07) = 0, REG_RANK_INDEX(08, 07) = 7, BYTE =
2
> IRQ31: idx = 07 REG_RANK_NR(08, 07) = 0, REG_RANK_INDEX(08, 07) = 7, BYTE =
3
> IRQ32: idx = 08 REG_RANK_NR(08, 08) = 1, REG_RANK_INDEX(08, 08) = 0, BYTE =
0
> IRQ33: idx = 08 REG_RANK_NR(08, 08) = 1, REG_RANK_INDEX(08, 08) = 0, BYTE =
1
> [...]
> IRQ62: idx = 15 REG_RANK_NR(08, 15) = 1, REG_RANK_INDEX(08, 15) = 7, BYTE =
2
> IRQ63: idx = 15 REG_RANK_NR(08, 15) = 1, REG_RANK_INDEX(08, 15) = 7, BYTE =
3
> IRQ64: idx = 16 REG_RANK_NR(08, 16) = 2, REG_RANK_INDEX(08, 16) = 0, BYTE =
0
> IRQ65: idx = 16 REG_RANK_NR(08, 16) = 2, REG_RANK_INDEX(08, 16) = 0, BYTE =
1
> [...]
> 
> IOW interrupts 0..31 are in rank 0, 32..63 are rank 1 etc, which is correct
> 
> With idx = irq / 32 you instead get:
> 
> IRQ00: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE =
0
> IRQ01: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE =
1
> IRQ02: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE =
2
> IRQ03: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE =
3
> IRQ04: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE =
0
> IRQ05: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE =
1
> [...]
> IRQ30: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE =
2
> IRQ31: idx = 00 REG_RANK_NR(08, 00) = 0, REG_RANK_INDEX(08, 00) = 0, BYTE =
3
> IRQ32: idx = 01 REG_RANK_NR(08, 01) = 0, REG_RANK_INDEX(08, 01) = 1, BYTE =
0
> IRQ33: idx = 01 REG_RANK_NR(08, 01) = 0, REG_RANK_INDEX(08, 01) = 1, BYTE =
1
> [...]
> IRQ62: idx = 01 REG_RANK_NR(08, 01) = 0, REG_RANK_INDEX(08, 01) = 1, BYTE =
2
> IRQ63: idx = 01 REG_RANK_NR(08, 01) = 0, REG_RANK_INDEX(08, 01) = 1, BYTE =
3
> IRQ64: idx = 02 REG_RANK_NR(08, 02) = 0, REG_RANK_INDEX(08, 02) = 2, BYTE =
0
> IRQ65: idx = 02 REG_RANK_NR(08, 02) = 0, REG_RANK_INDEX(08, 02) = 2, BYTE =
1
> [...]
> IRQ255: idx = 07 REG_RANK_NR(08, 07) = 0, REG_RANK_INDEX(08, 07) = 7, BYTE
= 3
> IRQ256: idx = 08 REG_RANK_NR(08, 08) = 1, REG_RANK_INDEX(08, 08) = 0, BYTE
= 0
> 
> Here allinterrupts up to 255 are in rank 0, which is wrong.
> 
> For idx = irq / 32 you would need to use REG_RANK_NR(1, idx) not (8,
> idx). (and you''d need to trivially fix REG_RANK_NR to handle b ==
1).
> 
> Perhaps the way to think of it is not as "/ 32" but rather as
">> 5".
> Since REG_RANK_NR(8, ) is effectively >> 3 when combined with the
> existing >> 2 we get >> 5 overall.
> 
> If you change it as you have done then you are doing >> 5 *and*
>> 3,
> i.e. >> 8 aka "/ 256" -- which explains why interrupts up
to 255 end up
> in rank 0 with your change.
Your explanation is sounds, but the fact remains that the code doesn''t
work as is :)
It means that this patch hides the bug rather than fixing it...

Ian Campbell

2012-Oct-26 16:40 UTC

head link

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

On Fri, 2012-10-26 at 17:33 +0100, Stefano Stabellini wrote:
> Your explanation is sounds, but the fact remains that the code
doesn''t
> work as is :)
;-)
> It means that this patch hides the bug rather than fixing it...
What are the symptoms?

BTW I noticed that "byte = irq & 0x3;" is redundant since
byte_read also
has the & in it. But that won''t cause problems.

Ian.

Tim Deegan

2012-Oct-26 16:45 UTC

head link

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

At 16:53 +0100 on 26 Oct (1351270394), Stefano Stabellini
wrote:> On Fri, 26 Oct 2012, Tim Deegan wrote:
> > At 18:35 +0100 on 24 Oct (1351103740), Stefano Stabellini wrote:
> > > > I don''t think this is necessary - why not just pass
va directly to the
> > > > inline asm?  We don''t care what register
it''s in (and if we did I''m not
> > > > convinced this would guarantee it was r0).
> > > > 
> > > > > +    asm volatile (
> > > > > +        "dsb;"
> > > > > +        STORE_CP32(0, DCCMVAC)
> > > > > +        "isb;"
> > > > > +        : : "r" (r0) : "memory");
> > > > 
> > > > Does this need a ''memory'' clobber?  Can we
get away with just saying it
> > > > consumes *va as an input?  All we need to be sure of is that
the
> > > > particular thing we''re flushing has been written
out; no need to stop
> > > > any other optimizations.
> > > 
> > > you are right on both points
> > > 
> > > > I guess it might need to be re-cast as a macro so the
compiler knows how
> > > > big *va is?
> > > 
> > > I don''t think it is necessary, after all the size of a
register has to
> > > be the same of a virtual address
> > 
> > But it''s the size of the thing in memory that''s
being flushed that
> > matters, not the size of the pointer to it!
> >
> > E.g. after a PTE write we
> > need a 64-bit memory input operand to stop the compiler from hoisting
> > any part of the PTE write past the cache flush. (well OK we explicitly
use
> > a 64-bit atomic write for PTE writes, but YKWIM).
> 
> The implementation of write_pte is entirely in assembly so I doubt that
> the compiler is going to reorder it.
Augh!  Yes, like I said, PTE writes are fine.
> However I see your point in case of flush_xen_dcache_va.
> Wouldn''t a barrier() at the beginning of the function be enough?
More than enough.  That would be exactly equivalent to the "memory"
clobber above.  What I''m arguing for is a _less_ restrictive
constraint,
that only restricts delaying writes, and only affects the thing actually
being flushed (whatever size that is).

For larger regions we should have a function with a single barrier at
the top and then a loop of DCCMVAC writes.  For single objects smaller
than a cacheline we need to pass the object itself as a memory input
operand.  Probably we should also have a compile-time check that the
object is smaller than the smallest supported cache-line (i.e. one
DCCMVAC is enough).

Tim.

Tim Deegan

2012-Oct-26 16:55 UTC

head link

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

At 17:03 +0100 on 26 Oct (1351271018), Stefano Stabellini
wrote:> On Fri, 26 Oct 2012, Stefano Stabellini wrote:
> > On Fri, 26 Oct 2012, Stefano Stabellini wrote:
> > > On Fri, 26 Oct 2012, Tim Deegan wrote:
> > > > At 18:35 +0100 on 24 Oct (1351103740), Stefano Stabellini
wrote:
> > > > > > I don''t think this is necessary - why not
just pass va directly to the
> > > > > > inline asm?  We don''t care what register
it''s in (and if we did I''m not
> > > > > > convinced this would guarantee it was r0).
> > > > > > 
> > > > > > > +    asm volatile (
> > > > > > > +        "dsb;"
> > > > > > > +        STORE_CP32(0, DCCMVAC)
> > > > > > > +        "isb;"
> > > > > > > +        : : "r" (r0) :
"memory");
> > > > > > 
> > > > > > Does this need a ''memory''
clobber?  Can we get away with just saying it
> > > > > > consumes *va as an input?  All we need to be sure
of is that the
> > > > > > particular thing we''re flushing has been
written out; no need to stop
> > > > > > any other optimizations.
> > > > > 
> > > > > you are right on both points
> > > > > 
> > > > > > I guess it might need to be re-cast as a macro so
the compiler knows how
> > > > > > big *va is?
> > > > > 
> > > > > I don''t think it is necessary, after all the
size of a register has to
> > > > > be the same of a virtual address
> > > > 
> > > > But it''s the size of the thing in memory
that''s being flushed that
> > > > matters, not the size of the pointer to it!
> > > >
> > > > E.g. after a PTE write we
> > > > need a 64-bit memory input operand to stop the compiler from
hoisting
> > > > any part of the PTE write past the cache flush. (well OK we
explicitly use
> > > > a 64-bit atomic write for PTE writes, but YKWIM).
> > > 
> > > The implementation of write_pte is entirely in assembly so I
doubt that
> > > the compiler is going to reorder it.
> > > 
> > > However I see your point in case of flush_xen_dcache_va.
> > > Wouldn''t a barrier() at the beginning of the function be
enough?
> > >
> > 
> > Actually we already have a memory clobber in flush_xen_dcache_va,
> > shouldn''t that prevent the compiler from reordering
instructions?
> > 
> 
> After reading again the entire thread I think that I understand what you
> meant: can''t we get rid of the memory clobber and specify *va as
input?
> 
> However in order to do that correctly we need to make clear how big *va is
> otherwise the compiler could reorder things.
Yes!
> The problem is that we don''t know at compile time how big a
cacheline
> is, so I think that we need to keep the memory clobber.
No!  It''s always safe to flush the entire line -- regardless of what
other writes might be happening to it.  After all, the cache controller
might evict that line on its own at any time, so nothing can be relying
on its _not_ being flushed.

The only problem with not knowing how big a cacheline is is this: if the
object is _bigger_ than a cache line we might use one DCCMVAC where more
than one is needed.  We can avoid that by a compile-time check on some
sort of architectural minimum cacheline size.

Tim.

Stefano Stabellini

2012-Oct-26 18:40 UTC

head link

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

On Fri, 26 Oct 2012, Tim Deegan wrote:> At 17:03 +0100 on 26 Oct (1351271018), Stefano Stabellini wrote:
> > On Fri, 26 Oct 2012, Stefano Stabellini wrote:
> > > On Fri, 26 Oct 2012, Stefano Stabellini wrote:
> > > > On Fri, 26 Oct 2012, Tim Deegan wrote:
> > > > > At 18:35 +0100 on 24 Oct (1351103740), Stefano
Stabellini wrote:
> > > > > > > I don''t think this is necessary -
why not just pass va directly to the
> > > > > > > inline asm?  We don''t care what
register it''s in (and if we did I''m not
> > > > > > > convinced this would guarantee it was r0).
> > > > > > > 
> > > > > > > > +    asm volatile (
> > > > > > > > +        "dsb;"
> > > > > > > > +        STORE_CP32(0, DCCMVAC)
> > > > > > > > +        "isb;"
> > > > > > > > +        : : "r" (r0) :
"memory");
> > > > > > > 
> > > > > > > Does this need a ''memory''
clobber?  Can we get away with just saying it
> > > > > > > consumes *va as an input?  All we need to be
sure of is that the
> > > > > > > particular thing we''re flushing has
been written out; no need to stop
> > > > > > > any other optimizations.
> > > > > > 
> > > > > > you are right on both points
> > > > > > 
> > > > > > > I guess it might need to be re-cast as a
macro so the compiler knows how
> > > > > > > big *va is?
> > > > > > 
> > > > > > I don''t think it is necessary, after all
the size of a register has to
> > > > > > be the same of a virtual address
> > > > > 
> > > > > But it''s the size of the thing in memory
that''s being flushed that
> > > > > matters, not the size of the pointer to it!
> > > > >
> > > > > E.g. after a PTE write we
> > > > > need a 64-bit memory input operand to stop the compiler
from hoisting
> > > > > any part of the PTE write past the cache flush. (well
OK we explicitly use
> > > > > a 64-bit atomic write for PTE writes, but YKWIM).
> > > > 
> > > > The implementation of write_pte is entirely in assembly so I
doubt that
> > > > the compiler is going to reorder it.
> > > > 
> > > > However I see your point in case of flush_xen_dcache_va.
> > > > Wouldn''t a barrier() at the beginning of the
function be enough?
> > > >
> > > 
> > > Actually we already have a memory clobber in flush_xen_dcache_va,
> > > shouldn''t that prevent the compiler from reordering
instructions?
> > > 
> > 
> > After reading again the entire thread I think that I understand what
you
> > meant: can''t we get rid of the memory clobber and specify *va
as input?
> > 
> > However in order to do that correctly we need to make clear how big
*va is
> > otherwise the compiler could reorder things.
> 
> Yes!
> 
> > The problem is that we don''t know at compile time how big a
cacheline
> > is, so I think that we need to keep the memory clobber.
> 
> No!  It''s always safe to flush the entire line -- regardless of
what
> other writes might be happening to it.  After all, the cache controller
> might evict that line on its own at any time, so nothing can be relying
> on its _not_ being flushed.
> 
> The only problem with not knowing how big a cacheline is is this: if the
> object is _bigger_ than a cache line we might use one DCCMVAC where more
> than one is needed.  We can avoid that by a compile-time check on some
> sort of architectural minimum cacheline size. 
If we want to do that then we need to pass a size argument to
flush_xen_dcache_va and have a loop in the function, each iteration
flushing a cacheline:


static inline void flush_xen_dcache_va(void *p, unsigned long size)
{
    unsigned long cacheline_size = READ_CP32(CCSIDR);
    unsigned long i;
    for (i = 0; i < size; i += cacheline_size, p += cacheline_size) {
        asm volatile (
            "dsb;"
            STORE_CP32(0, DCCMVAC)
            "dsb;"
            : : "r" (p));
    }
}


Even though I think that it is really unlikely that the compiler moves
the writes after the loop, in theory we would still need to specify the
correct size of the input parameter to the inline assembly.

Is there a better way?

Stefano Stabellini

2012-Oct-26 18:42 UTC

head link

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

On Fri, 26 Oct 2012, Ian Campbell wrote:> > It means that this patch hides the bug rather than fixing it...
> 
> What are the symptoms?
> 
> BTW I noticed that "byte = irq & 0x3;" is redundant since
byte_read also
> has the & in it. But that won''t cause problems.
The ienable bit is wrong for irq > 32.

I think that the problem is the usage of vgic_irq_rank with registers
that have 1 bit per interrupt.
In fact the following patch fixes the issue.

---


xen/arm: pass the correct bit-per-interrupt argument to vgic_irq_rank

Use 1 for registers that have 1 bit per irq.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>

diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
index 3c3983f..92731b6 100644
--- a/xen/arch/arm/vgic.c
+++ b/xen/arch/arm/vgic.c
@@ -42,13 +42,7 @@
  */
 static inline int REG_RANK_NR(int b, uint32_t n)
 {
-    switch ( b )
-    {
-    case 8: return n >> 3;
-    case 4: return n >> 2;
-    case 2: return n >> 1;
-    default: BUG();
-    }
+    return n / b;
 }
 
 /*
@@ -199,7 +193,7 @@ static int vgic_distr_mmio_read(struct vcpu *v, mmio_info_t
*info)
 
     case GICD_ISENABLER ... GICD_ISENABLERN:
         if ( dabt.size != 2 ) goto bad_width;
-        rank = vgic_irq_rank(v, 8, gicd_reg - GICD_ISENABLER);
+        rank = vgic_irq_rank(v, 1, gicd_reg - GICD_ISENABLER);
         if ( rank == NULL) goto read_as_zero;
         vgic_lock_rank(v, rank);
         *r = rank->ienable;
@@ -208,7 +202,7 @@ static int vgic_distr_mmio_read(struct vcpu *v, mmio_info_t
*info)
 
     case GICD_ICENABLER ... GICD_ICENABLERN:
         if ( dabt.size != 2 ) goto bad_width;
-        rank = vgic_irq_rank(v, 8, gicd_reg - GICD_ICENABLER);
+        rank = vgic_irq_rank(v, 1, gicd_reg - GICD_ICENABLER);
         if ( rank == NULL) goto read_as_zero;
         vgic_lock_rank(v, rank);
         *r = rank->ienable;
@@ -217,7 +211,7 @@ static int vgic_distr_mmio_read(struct vcpu *v, mmio_info_t
*info)
 
     case GICD_ISPENDR ... GICD_ISPENDRN:
         if ( dabt.size != 0 && dabt.size != 2 ) goto bad_width;
-        rank = vgic_irq_rank(v, 8, gicd_reg - GICD_ISPENDR);
+        rank = vgic_irq_rank(v, 1, gicd_reg - GICD_ISPENDR);
         if ( rank == NULL) goto read_as_zero;
         vgic_lock_rank(v, rank);
         *r = byte_read(rank->ipend, dabt.sign, offset);
@@ -226,7 +220,7 @@ static int vgic_distr_mmio_read(struct vcpu *v, mmio_info_t
*info)
 
     case GICD_ICPENDR ... GICD_ICPENDRN:
         if ( dabt.size != 0 && dabt.size != 2 ) goto bad_width;
-        rank = vgic_irq_rank(v, 8, gicd_reg - GICD_ICPENDR);
+        rank = vgic_irq_rank(v, 1, gicd_reg - GICD_ICPENDR);
         if ( rank == NULL) goto read_as_zero;
         vgic_lock_rank(v, rank);
         *r = byte_read(rank->ipend, dabt.sign, offset);
@@ -235,7 +229,7 @@ static int vgic_distr_mmio_read(struct vcpu *v, mmio_info_t
*info)
 
     case GICD_ISACTIVER ... GICD_ISACTIVERN:
         if ( dabt.size != 2 ) goto bad_width;
-        rank = vgic_irq_rank(v, 8, gicd_reg - GICD_ISACTIVER);
+        rank = vgic_irq_rank(v, 1, gicd_reg - GICD_ISACTIVER);
         if ( rank == NULL) goto read_as_zero;
         vgic_lock_rank(v, rank);
         *r = rank->iactive;
@@ -244,7 +238,7 @@ static int vgic_distr_mmio_read(struct vcpu *v, mmio_info_t
*info)
 
     case GICD_ICACTIVER ... GICD_ICACTIVERN:
         if ( dabt.size != 2 ) goto bad_width;
-        rank = vgic_irq_rank(v, 8, gicd_reg - GICD_ICACTIVER);
+        rank = vgic_irq_rank(v, 1, gicd_reg - GICD_ICACTIVER);
         if ( rank == NULL) goto read_as_zero;
         vgic_lock_rank(v, rank);
         *r = rank->iactive;
@@ -296,7 +290,7 @@ static int vgic_distr_mmio_read(struct vcpu *v, mmio_info_t
*info)
 
     case GICD_CPENDSGIR ... GICD_CPENDSGIRN:
         if ( dabt.size != 0 && dabt.size != 2 ) goto bad_width;
-        rank = vgic_irq_rank(v, 8, gicd_reg - GICD_CPENDSGIR);
+        rank = vgic_irq_rank(v, 1, gicd_reg - GICD_CPENDSGIR);
         if ( rank == NULL) goto read_as_zero;
         vgic_lock_rank(v, rank);
         *r = byte_read(rank->pendsgi, dabt.sign, offset);
@@ -305,7 +299,7 @@ static int vgic_distr_mmio_read(struct vcpu *v, mmio_info_t
*info)
 
     case GICD_SPENDSGIR ... GICD_SPENDSGIRN:
         if ( dabt.size != 0 && dabt.size != 2 ) goto bad_width;
-        rank = vgic_irq_rank(v, 8, gicd_reg - GICD_SPENDSGIR);
+        rank = vgic_irq_rank(v, 1, gicd_reg - GICD_SPENDSGIR);
         if ( rank == NULL) goto read_as_zero;
         vgic_lock_rank(v, rank);
         *r = byte_read(rank->pendsgi, dabt.sign, offset);
@@ -400,7 +394,7 @@ static int vgic_distr_mmio_write(struct vcpu *v, mmio_info_t
*info)
 
     case GICD_ISENABLER ... GICD_ISENABLERN:
         if ( dabt.size != 2 ) goto bad_width;
-        rank = vgic_irq_rank(v, 8, gicd_reg - GICD_ISENABLER);
+        rank = vgic_irq_rank(v, 1, gicd_reg - GICD_ISENABLER);
         if ( rank == NULL) goto write_ignore;
         vgic_lock_rank(v, rank);
         tr = rank->ienable;
@@ -411,7 +405,7 @@ static int vgic_distr_mmio_write(struct vcpu *v, mmio_info_t
*info)
 
     case GICD_ICENABLER ... GICD_ICENABLERN:
         if ( dabt.size != 2 ) goto bad_width;
-        rank = vgic_irq_rank(v, 8, gicd_reg - GICD_ICENABLER);
+        rank = vgic_irq_rank(v, 1, gicd_reg - GICD_ICENABLER);
         if ( rank == NULL) goto write_ignore;
         vgic_lock_rank(v, rank);
         rank->ienable &= ~*r;
@@ -432,7 +426,7 @@ static int vgic_distr_mmio_write(struct vcpu *v, mmio_info_t
*info)
 
     case GICD_ISACTIVER ... GICD_ISACTIVERN:
         if ( dabt.size != 2 ) goto bad_width;
-        rank = vgic_irq_rank(v, 8, gicd_reg - GICD_ISACTIVER);
+        rank = vgic_irq_rank(v, 1, gicd_reg - GICD_ISACTIVER);
         if ( rank == NULL) goto write_ignore;
         vgic_lock_rank(v, rank);
         rank->iactive &= ~*r;
@@ -441,7 +435,7 @@ static int vgic_distr_mmio_write(struct vcpu *v, mmio_info_t
*info)
 
     case GICD_ICACTIVER ... GICD_ICACTIVERN:
         if ( dabt.size != 2 ) goto bad_width;
-        rank = vgic_irq_rank(v, 8, gicd_reg - GICD_ICACTIVER);
+        rank = vgic_irq_rank(v, 1, gicd_reg - GICD_ICACTIVER);
         if ( rank == NULL) goto write_ignore;
         vgic_lock_rank(v, rank);
         rank->iactive &= ~*r;
@@ -577,9 +571,9 @@ struct pending_irq *irq_to_pending(struct vcpu *v, unsigned
int irq)
 
 void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int irq, int virtual)
 {
-    int idx = irq >> 2, byte = irq & 0x3;
+    int byte = irq & 0x3;
     uint8_t priority;
-    struct vgic_irq_rank *rank = vgic_irq_rank(v, 8, idx);
+    struct vgic_irq_rank *rank = vgic_irq_rank(v, 1, irq / 32);
     struct pending_irq *iter, *n = irq_to_pending(v, irq);
     unsigned long flags;
 
@@ -587,7 +581,7 @@ void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int irq,
int virtual)
     if (!list_empty(&n->inflight))
         return;
 
-    priority = byte_read(rank->ipriority[REG_RANK_INDEX(8, idx)], 0, byte);
+    priority = byte_read(rank->ipriority[REG_RANK_INDEX(8, irq / 4)], 0,
byte);
 
     n->irq = irq;
     n->priority = priority;

Ian Campbell

2012-Oct-26 20:47 UTC

head link

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

On Fri, 2012-10-26 at 19:42 +0100, Stefano Stabellini
wrote:> I think that the problem is the usage of vgic_irq_rank with registers
> that have 1 bit per interrupt.
That''s very plausible indeed.
> diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
> index 3c3983f..92731b6 100644
> --- a/xen/arch/arm/vgic.c
> +++ b/xen/arch/arm/vgic.c
> @@ -42,13 +42,7 @@
>   */
>  static inline int REG_RANK_NR(int b, uint32_t n)
>  {
> -    switch ( b )
> -    {
> -    case 8: return n >> 3;
> -    case 4: return n >> 2;
> -    case 2: return n >> 1;
> -    default: BUG();
> -    }
> +    return n / b;
All the infrastructure will fall apart if b isn''t a power of two,
that''s
why I used the switch. Can you just add the appropriate case 1 instead?
Probably the bug should be a call to an undefined function to make this
a compile time rather than runtime failure too.
> @@ -577,9 +571,9 @@ struct pending_irq *irq_to_pending(struct vcpu *v,
unsigned int irq)
>  
>  void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int irq, int virtual)
The changes to this function are all unnecessary, since it was already
consistent within itself. If you want to change it please add a helper
which takes an irq and hides the shifts.

I''d be inclined to rename the existing vgic_irq_rank to vgic_reg_rank
and then call the new helper vgic_irq_rank since that would better
reflect the purpose of both.
>  {
> -    int idx = irq >> 2, byte = irq & 0x3;
> +    int byte = irq & 0x3;
byte isn''t actually needed either.
>      uint8_t priority;
> -    struct vgic_irq_rank *rank = vgic_irq_rank(v, 8, idx);
> +    struct vgic_irq_rank *rank = vgic_irq_rank(v, 1, irq / 32);
>      struct pending_irq *iter, *n = irq_to_pending(v, irq);
>      unsigned long flags;
>  
> @@ -587,7 +581,7 @@ void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int
irq, int virtual)
>      if (!list_empty(&n->inflight))
>          return;
>  
> -    priority = byte_read(rank->ipriority[REG_RANK_INDEX(8, idx)], 0,
byte);
> +    priority = byte_read(rank->ipriority[REG_RANK_INDEX(8, irq / 4)],
0, byte);
>  
>      n->irq = irq;
>      n->priority = priority;

Tim Deegan

2012-Oct-27 10:09 UTC

head link

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

At 21:47 +0100 on 26 Oct (1351288040), Ian Campbell
wrote:> On Fri, 2012-10-26 at 19:42 +0100, Stefano Stabellini wrote:
> > I think that the problem is the usage of vgic_irq_rank with registers
> > that have 1 bit per interrupt.
> 
> That''s very plausible indeed.
> 
> > diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
> > index 3c3983f..92731b6 100644
> > --- a/xen/arch/arm/vgic.c
> > +++ b/xen/arch/arm/vgic.c
> > @@ -42,13 +42,7 @@
> >   */
> >  static inline int REG_RANK_NR(int b, uint32_t n)
> >  {
> > -    switch ( b )
> > -    {
> > -    case 8: return n >> 3;
> > -    case 4: return n >> 2;
> > -    case 2: return n >> 1;
> > -    default: BUG();
> > -    }
> > +    return n / b;
> 
> All the infrastructure will fall apart if b isn''t a power of two,
that''s
> why I used the switch. Can you just add the appropriate case 1 instead?
> Probably the bug should be a call to an undefined function to make this
> a compile time rather than runtime failure too.
BUILD_BUG_ON((b & -b) != b); ?

Tim.

Tim Deegan

2012-Oct-27 10:44 UTC

head link

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

At 19:40 +0100 on 26 Oct (1351280413), Stefano Stabellini
wrote:> > No!  It''s always safe to flush the entire line -- regardless
of what
> > other writes might be happening to it.  After all, the cache
controller
> > might evict that line on its own at any time, so nothing can be
relying
> > on its _not_ being flushed.
> > 
> > The only problem with not knowing how big a cacheline is is this: if
the
> > object is _bigger_ than a cache line we might use one DCCMVAC where
more
> > than one is needed.  We can avoid that by a compile-time check on some
> > sort of architectural minimum cacheline size.
>  
> If we want to do that then we need to pass a size argument to
> flush_xen_dcache_va and have a loop in the function, each iteration
> flushing a cacheline:
> 
> static inline void flush_xen_dcache_va(void *p, unsigned long size)
> {
>     unsigned long cacheline_size = READ_CP32(CCSIDR);
>     unsigned long i;
>     for (i = 0; i < size; i += cacheline_size, p += cacheline_size) {
>         asm volatile (
>             "dsb;"
>             STORE_CP32(0, DCCMVAC)
>             "dsb;"
>             : : "r" (p));
>     }
> }
I think we should have two functions.  One should look almost like that
and be for flushing large ranges, and one much simpler for flushing
small items.  Like this (totally untested, uncompiled even):

 #define MIN_CACHELINE_BYTES 32 // or whatever

 /* In setup.c somewhere. */
 if ( READ_CP32(CCSIDR) < MIN_CACHELINE_BYTES )
     panic("CPU has preposterously small cache lines");

 /* Function for flushing medium-sized areas.
  * if ''range'' is large enough we might want to use
model-specific
  * full-cache flushes. */
 static inline void flush_xen_dcache_va_range(void *p, unsigned long size)
 {
     void *end;
     unsigned long cacheline_bytes = READ_CP32(CCSIDR);
     barrier();       /* So the compiler issues all writes to the range */
     dsb();           /* So the CPU issues all writes to the range */ 
     for ( end = p + size; p < end; p += cacheline_bytes )
         WRITE_CP32(DCCMVAC, p);
     dsb();           /* So we know the flushes happen before continuing */
 }

 /* Macro for flushing a single small item.  The predicate is always 
  * compile-time constant so this will compile down to 3 instructions in
  * the common case.  Make sure to call it with the correct type of
  * pointer! */
 #define flush_xen_dcache_va(p) do {                       \
     typeof(p) _p = (p);                                   \
     if ( (sizeof *_p) > MIN_CACHELINE_BYTES )             \
         flush_xen_dcache_va_range(_p, sizeof *_p);        \
     else                                                  \
         asm volatile (                                    \
             "dsb;"   /* Finish all earlier writes */      \
             STORE_CP32(0, DCCMVAC)                        \
             "dsb;"   /* Finish flush before continuing */ \
             : : "r" (_p), "m" (*_p));                     \
 } while (0)

What do you think?

Tim.

Ian Campbell

2012-Oct-27 10:44 UTC

head link

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

On Sat, 2012-10-27 at 11:09 +0100, Tim Deegan wrote:> At 21:47 +0100 on 26 Oct (1351288040), Ian Campbell wrote:
> > All the infrastructure will fall apart if b isn''t a power of
two, that''s
> > why I used the switch. Can you just add the appropriate case 1
instead?
> > Probably the bug should be a call to an undefined function to make
this
> > a compile time rather than runtime failure too.
> 
> BUILD_BUG_ON((b & -b) != b); ?
Sure.

Ian.

Tim Deegan

2012-Oct-27 11:54 UTC

head link

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

At 11:44 +0100 on 27 Oct (1351338268), Tim Deegan wrote:>  /* Function for flushing medium-sized areas.
>   * if ''range'' is large enough we might want to use
model-specific
>   * full-cache flushes. */
>  static inline void flush_xen_dcache_va_range(void *p, unsigned long size)
>  {
>      void *end;
>      unsigned long cacheline_bytes = READ_CP32(CCSIDR);
>      barrier();       /* So the compiler issues all writes to the range */
>      dsb();           /* So the CPU issues all writes to the range */ 
Oh - I just noticed that the way we''ve defined dsb() it includes a
memory clobber.   So I guess we don''t need barrier() as well there.

We might want to look at the other users of dsb() and see if we want to
drop the memory clobber from it as well.  But OTOH we may be getting
way into premature optimization already. :)

Tim.

Stefano Stabellini

2012-Oct-29 09:52 UTC

head link

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

On Sat, 27 Oct 2012, Tim Deegan wrote:> At 19:40 +0100 on 26 Oct (1351280413), Stefano Stabellini wrote:
> > > No!  It''s always safe to flush the entire line --
regardless of what
> > > other writes might be happening to it.  After all, the cache
controller
> > > might evict that line on its own at any time, so nothing can be
relying
> > > on its _not_ being flushed.
> > > 
> > > The only problem with not knowing how big a cacheline is is this:
if the
> > > object is _bigger_ than a cache line we might use one DCCMVAC
where more
> > > than one is needed.  We can avoid that by a compile-time check on
some
> > > sort of architectural minimum cacheline size.
> >  
> > If we want to do that then we need to pass a size argument to
> > flush_xen_dcache_va and have a loop in the function, each iteration
> > flushing a cacheline:
> > 
> > static inline void flush_xen_dcache_va(void *p, unsigned long size)
> > {
> >     unsigned long cacheline_size = READ_CP32(CCSIDR);
> >     unsigned long i;
> >     for (i = 0; i < size; i += cacheline_size, p += cacheline_size)
{
> >         asm volatile (
> >             "dsb;"
> >             STORE_CP32(0, DCCMVAC)
> >             "dsb;"
> >             : : "r" (p));
> >     }
> > }
> 
> I think we should have two functions.  One should look almost like that
> and be for flushing large ranges, and one much simpler for flushing
> small items.  Like this (totally untested, uncompiled even):
> 
>  #define MIN_CACHELINE_BYTES 32 // or whatever
> 
>  /* In setup.c somewhere. */
>  if ( READ_CP32(CCSIDR) < MIN_CACHELINE_BYTES )
>      panic("CPU has preposterously small cache lines");
> 
>  /* Function for flushing medium-sized areas.
>   * if ''range'' is large enough we might want to use
model-specific
>   * full-cache flushes. */
>  static inline void flush_xen_dcache_va_range(void *p, unsigned long size)
>  {
>      void *end;
>      unsigned long cacheline_bytes = READ_CP32(CCSIDR);
>      barrier();       /* So the compiler issues all writes to the range */
>      dsb();           /* So the CPU issues all writes to the range */ 
>      for ( end = p + size; p < end; p += cacheline_bytes )
>          WRITE_CP32(DCCMVAC, p);
>      dsb();           /* So we know the flushes happen before continuing */
>  }
> 
>  /* Macro for flushing a single small item.  The predicate is always 
>   * compile-time constant so this will compile down to 3 instructions in
>   * the common case.  Make sure to call it with the correct type of
>   * pointer! */
>  #define flush_xen_dcache_va(p) do {                       \
>      typeof(p) _p = (p);                                   \
>      if ( (sizeof *_p) > MIN_CACHELINE_BYTES )             \
>          flush_xen_dcache_va_range(_p, sizeof *_p);        \
>      else                                                  \
>          asm volatile (                                    \
>              "dsb;"   /* Finish all earlier writes */      \
>              STORE_CP32(0, DCCMVAC)                        \
>              "dsb;"   /* Finish flush before continuing */ \
>              : : "r" (_p), "m" (*_p));                 
\
>  } while (0)
> 
> What do you think? 
I think that''s OK, the macro doesn''t look as bad as I thought
it would
:)

Stefano Stabellini

2012-Oct-29 09:53 UTC

head link

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

On Sat, 27 Oct 2012, Tim Deegan wrote:> At 11:44 +0100 on 27 Oct (1351338268), Tim Deegan wrote:
> >  /* Function for flushing medium-sized areas.
> >   * if ''range'' is large enough we might want to use
model-specific
> >   * full-cache flushes. */
> >  static inline void flush_xen_dcache_va_range(void *p, unsigned long
size)
> >  {
> >      void *end;
> >      unsigned long cacheline_bytes = READ_CP32(CCSIDR);
> >      barrier();       /* So the compiler issues all writes to the
range */
> >      dsb();           /* So the CPU issues all writes to the range */ 
> 
> Oh - I just noticed that the way we''ve defined dsb() it includes a
> memory clobber.   So I guess we don''t need barrier() as well
there.
> 
> We might want to look at the other users of dsb() and see if we want to
> drop the memory clobber from it as well.
> But OTOH we may be getting way into premature optimization already. :) 
My thoughts exactly.

Stefano Stabellini

2012-Nov-13 11:57 UTC

head link

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

On Sat, 27 Oct 2012, Tim Deegan wrote:> At 21:47 +0100 on 26 Oct (1351288040), Ian Campbell wrote:
> > On Fri, 2012-10-26 at 19:42 +0100, Stefano Stabellini wrote:
> > > I think that the problem is the usage of vgic_irq_rank with
registers
> > > that have 1 bit per interrupt.
> > 
> > That''s very plausible indeed.
> > 
> > > diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
> > > index 3c3983f..92731b6 100644
> > > --- a/xen/arch/arm/vgic.c
> > > +++ b/xen/arch/arm/vgic.c
> > > @@ -42,13 +42,7 @@
> > >   */
> > >  static inline int REG_RANK_NR(int b, uint32_t n)
> > >  {
> > > -    switch ( b )
> > > -    {
> > > -    case 8: return n >> 3;
> > > -    case 4: return n >> 2;
> > > -    case 2: return n >> 1;
> > > -    default: BUG();
> > > -    }
> > > +    return n / b;
> > 
> > All the infrastructure will fall apart if b isn''t a power of
two, that''s
> > why I used the switch. Can you just add the appropriate case 1
instead?
> > Probably the bug should be a call to an undefined function to make
this
> > a compile time rather than runtime failure too.
> 
> BUILD_BUG_ON((b & -b) != b); ?
I can''t do that:

error: expression in static assertion is not constant

Ian Campbell

2012-Nov-13 12:00 UTC

head link

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

On Tue, 2012-11-13 at 11:57 +0000, Stefano Stabellini
wrote:> On Sat, 27 Oct 2012, Tim Deegan wrote:
> > At 21:47 +0100 on 26 Oct (1351288040), Ian Campbell wrote:
> > > On Fri, 2012-10-26 at 19:42 +0100, Stefano Stabellini wrote:
> > > > I think that the problem is the usage of vgic_irq_rank with
registers
> > > > that have 1 bit per interrupt.
> > > 
> > > That''s very plausible indeed.
> > > 
> > > > diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
> > > > index 3c3983f..92731b6 100644
> > > > --- a/xen/arch/arm/vgic.c
> > > > +++ b/xen/arch/arm/vgic.c
> > > > @@ -42,13 +42,7 @@
> > > >   */
> > > >  static inline int REG_RANK_NR(int b, uint32_t n)
> > > >  {
> > > > -    switch ( b )
> > > > -    {
> > > > -    case 8: return n >> 3;
> > > > -    case 4: return n >> 2;
> > > > -    case 2: return n >> 1;
> > > > -    default: BUG();
> > > > -    }
> > > > +    return n / b;
> > > 
> > > All the infrastructure will fall apart if b isn''t a
power of two, that''s
> > > why I used the switch. Can you just add the appropriate case 1
instead?
> > > Probably the bug should be a call to an undefined function to
make this
> > > a compile time rather than runtime failure too.
> > 
> > BUILD_BUG_ON((b & -b) != b); ?
> 
> I can''t do that:
> 
> error: expression in static assertion is not constant
I suppose you could make REG_RANK_NR a macro (as it''s name implies it
ought to be, oops!).

If not then given that b must be in {1,2,4,8} maybe the switch is
ok/tolerable? If we end up supporting b=16 or 32 then more thought is
probably required around the place anyway.

Ian.

Stefano Stabellini

2012-Nov-13 12:01 UTC

head link

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

On Sat, 27 Oct 2012, Tim Deegan wrote:> At 19:40 +0100 on 26 Oct (1351280413), Stefano Stabellini wrote:
> > > No!  It''s always safe to flush the entire line --
regardless of what
> > > other writes might be happening to it.  After all, the cache
controller
> > > might evict that line on its own at any time, so nothing can be
relying
> > > on its _not_ being flushed.
> > > 
> > > The only problem with not knowing how big a cacheline is is this:
if the
> > > object is _bigger_ than a cache line we might use one DCCMVAC
where more
> > > than one is needed.  We can avoid that by a compile-time check on
some
> > > sort of architectural minimum cacheline size.
> >  
> > If we want to do that then we need to pass a size argument to
> > flush_xen_dcache_va and have a loop in the function, each iteration
> > flushing a cacheline:
> > 
> > static inline void flush_xen_dcache_va(void *p, unsigned long size)
> > {
> >     unsigned long cacheline_size = READ_CP32(CCSIDR);
> >     unsigned long i;
> >     for (i = 0; i < size; i += cacheline_size, p += cacheline_size)
{
> >         asm volatile (
> >             "dsb;"
> >             STORE_CP32(0, DCCMVAC)
> >             "dsb;"
> >             : : "r" (p));
> >     }
> > }
> 
> I think we should have two functions.  One should look almost like that
> and be for flushing large ranges, and one much simpler for flushing
> small items.  Like this (totally untested, uncompiled even):
> 
>  #define MIN_CACHELINE_BYTES 32 // or whatever
> 
>  /* In setup.c somewhere. */
>  if ( READ_CP32(CCSIDR) < MIN_CACHELINE_BYTES )
>      panic("CPU has preposterously small cache lines");
> 
>  /* Function for flushing medium-sized areas.
>   * if ''range'' is large enough we might want to use
model-specific
>   * full-cache flushes. */
>  static inline void flush_xen_dcache_va_range(void *p, unsigned long size)
>  {
>      void *end;
>      unsigned long cacheline_bytes = READ_CP32(CCSIDR);
>      barrier();       /* So the compiler issues all writes to the range */
>      dsb();           /* So the CPU issues all writes to the range */ 
>      for ( end = p + size; p < end; p += cacheline_bytes )
>          WRITE_CP32(DCCMVAC, p);
>      dsb();           /* So we know the flushes happen before continuing */
>  }
> 
>  /* Macro for flushing a single small item.  The predicate is always 
>   * compile-time constant so this will compile down to 3 instructions in
>   * the common case.  Make sure to call it with the correct type of
>   * pointer! */
>  #define flush_xen_dcache_va(p) do {                       \
>      typeof(p) _p = (p);                                   \
>      if ( (sizeof *_p) > MIN_CACHELINE_BYTES )             \
>          flush_xen_dcache_va_range(_p, sizeof *_p);        \
>      else                                                  \
>          asm volatile (                                    \
>              "dsb;"   /* Finish all earlier writes */      \
>              STORE_CP32(0, DCCMVAC)                        \
>              "dsb;"   /* Finish flush before continuing */ \
>              : : "r" (_p), "m" (*_p));                 
\
>  } while (0)
> 
> What do you think?
I think that is OK, but I would like to avoid reading CCSIDR every
single time we need to do a dcache flush, I don''t know how slow that
coprocessor read is but I wouldn''t want to have to find out :)

I am thinking of introducing a global variable to hold the cacheline
size and initialize it in start_xen.

Tim Deegan

2012-Nov-13 12:15 UTC

head link

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

At 12:01 +0000 on 13 Nov (1352808094), Stefano Stabellini
wrote:> > I think we should have two functions.  One should look almost like
that
> > and be for flushing large ranges, and one much simpler for flushing
> > small items.  Like this (totally untested, uncompiled even):
> > 
> >  #define MIN_CACHELINE_BYTES 32 // or whatever
> > 
> >  /* In setup.c somewhere. */
> >  if ( READ_CP32(CCSIDR) < MIN_CACHELINE_BYTES )
> >      panic("CPU has preposterously small cache lines");
> > 
> >  /* Function for flushing medium-sized areas.
> >   * if ''range'' is large enough we might want to use
model-specific
> >   * full-cache flushes. */
> >  static inline void flush_xen_dcache_va_range(void *p, unsigned long
size)
> >  {
> >      void *end;
> >      unsigned long cacheline_bytes = READ_CP32(CCSIDR);
> >      barrier();       /* So the compiler issues all writes to the
range */
> >      dsb();           /* So the CPU issues all writes to the range */ 
> >      for ( end = p + size; p < end; p += cacheline_bytes )
> >          WRITE_CP32(DCCMVAC, p);
> >      dsb();           /* So we know the flushes happen before
continuing */
> >  }
> > 
> >  /* Macro for flushing a single small item.  The predicate is always 
> >   * compile-time constant so this will compile down to 3 instructions
in
> >   * the common case.  Make sure to call it with the correct type of
> >   * pointer! */
> >  #define flush_xen_dcache_va(p) do {                       \
> >      typeof(p) _p = (p);                                   \
> >      if ( (sizeof *_p) > MIN_CACHELINE_BYTES )             \
> >          flush_xen_dcache_va_range(_p, sizeof *_p);        \
> >      else                                                  \
> >          asm volatile (                                    \
> >              "dsb;"   /* Finish all earlier writes */      \
> >              STORE_CP32(0, DCCMVAC)                        \
> >              "dsb;"   /* Finish flush before continuing */ \
> >              : : "r" (_p), "m" (*_p));            
\
> >  } while (0)
> > 
> > What do you think?
> 
> I think that is OK, but I would like to avoid reading CCSIDR every
> single time we need to do a dcache flush
The code above only reads it once for each large dcache flush.  When I
was writing it I did think of just stashing cacheline_bytes in a
read_mostly somewhere, but I had the opposite concern -- wouldn''t
reading this constant from on-chip be quicker than going to the memory
bus for it? :)

I''m happy either way.

Tim.

Stefano Stabellini

2012-Nov-13 12:23 UTC

head link

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

On Tue, 13 Nov 2012, Ian Campbell wrote:> On Tue, 2012-11-13 at 11:57 +0000, Stefano Stabellini wrote:
> > On Sat, 27 Oct 2012, Tim Deegan wrote:
> > > At 21:47 +0100 on 26 Oct (1351288040), Ian Campbell wrote:
> > > > On Fri, 2012-10-26 at 19:42 +0100, Stefano Stabellini wrote:
> > > > > I think that the problem is the usage of vgic_irq_rank
with registers
> > > > > that have 1 bit per interrupt.
> > > > 
> > > > That''s very plausible indeed.
> > > > 
> > > > > diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
> > > > > index 3c3983f..92731b6 100644
> > > > > --- a/xen/arch/arm/vgic.c
> > > > > +++ b/xen/arch/arm/vgic.c
> > > > > @@ -42,13 +42,7 @@
> > > > >   */
> > > > >  static inline int REG_RANK_NR(int b, uint32_t n)
> > > > >  {
> > > > > -    switch ( b )
> > > > > -    {
> > > > > -    case 8: return n >> 3;
> > > > > -    case 4: return n >> 2;
> > > > > -    case 2: return n >> 1;
> > > > > -    default: BUG();
> > > > > -    }
> > > > > +    return n / b;
> > > > 
> > > > All the infrastructure will fall apart if b isn''t a
power of two, that''s
> > > > why I used the switch. Can you just add the appropriate case
1 instead?
> > > > Probably the bug should be a call to an undefined function
to make this
> > > > a compile time rather than runtime failure too.
> > > 
> > > BUILD_BUG_ON((b & -b) != b); ?
> > 
> > I can''t do that:
> > 
> > error: expression in static assertion is not constant
> 
> I suppose you could make REG_RANK_NR a macro (as it''s name implies
it
> ought to be, oops!).
> 
REG_RANK_NR is called by vgic_irq_rank, so that would need to become a
macro too

> If not then given that b must be in {1,2,4,8} maybe the switch is
> ok/tolerable? If we end up supporting b=16 or 32 then more thought is
> probably required around the place anyway.
yeah I think is OK

Xen devel - Oct 2012 - [PATCH 0/7] xen/arm: run on real hardware

[PATCH 0/7] xen/arm: run on real hardware

[PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

[PATCH 2/7] xen/arm: setup the fixmap in head.S

[PATCH 3/7] pl011: set baud and clock_hz to the right defaults for Versatile Express

[PATCH 4/7] xen/arm: set the SMP bit in the ACTLR register

[PATCH 5/7] xen/arm: wake up secondary cpus

[PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

[PATCH 7/7] xen/arm: get the number of cpus from device tree

Re: [PATCH 2/7] xen/arm: setup the fixmap in head.S

Re: [PATCH 2/7] xen/arm: setup the fixmap in head.S

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

Re: [PATCH 0/7] xen/arm: run on real hardware

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

Re: [PATCH 2/7] xen/arm: setup the fixmap in head.S

Re: [PATCH 3/7] pl011: set baud and clock_hz to the right defaults for Versatile Express

Re: [PATCH 4/7] xen/arm: set the SMP bit in the ACTLR register

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

Re: [PATCH 7/7] xen/arm: get the number of cpus from device tree

Re: [PATCH 3/7] pl011: set baud and clock_hz to the right defaults for Versatile Express

Re: [PATCH 2/7] xen/arm: setup the fixmap in head.S

Re: [PATCH 3/7] pl011: set baud and clock_hz to the right defaults for Versatile Express

Re: [PATCH 4/7] xen/arm: set the SMP bit in the ACTLR register

Re: [PATCH 4/7] xen/arm: set the SMP bit in the ACTLR register

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

Re: [PATCH 4/7] xen/arm: set the SMP bit in the ACTLR register

Re: [PATCH 4/7] xen/arm: set the SMP bit in the ACTLR register

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

Re: [PATCH 5/7] xen/arm: wake up secondary cpus

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

Re: [PATCH 6/7] xen/arm: flush D-cache and I-cache when appropriate

Re: [PATCH 1/7] xen/arm: fix rank calculation in vgic_vcpu_inject_irq