Hello, This is the third version of this patch series. I have only implemented the VFP context switch support for arm32 and add dummy function to avoid avoid compilation breakage on arm64. For all the changes see each patch. Cheers, Julien Grall (2): xen/arm: don''t enable VFP on XEN during the boot xen/arm32: implement VFP context switch xen/arch/arm/Rules.mk | 2 +- xen/arch/arm/arm32/Makefile | 1 + xen/arch/arm/arm32/vfp.c | 99 +++++++++++++++++++++++++++++++++++++++ xen/arch/arm/arm64/Makefile | 1 + xen/arch/arm/arm64/vfp.c | 13 +++++ xen/arch/arm/domain.c | 7 ++- xen/arch/arm/setup.c | 3 -- xen/arch/arm/smpboot.c | 2 - xen/include/asm-arm/arm32/vfp.h | 41 ++++++++++++++++ xen/include/asm-arm/arm64/vfp.h | 16 +++++++ xen/include/asm-arm/cpregs.h | 9 ++++ xen/include/asm-arm/domain.h | 4 ++ xen/include/asm-arm/vfp.h | 42 +++++------------ 13 files changed, 202 insertions(+), 38 deletions(-) create mode 100644 xen/arch/arm/arm32/vfp.c create mode 100644 xen/arch/arm/arm64/vfp.c create mode 100644 xen/include/asm-arm/arm32/vfp.h create mode 100644 xen/include/asm-arm/arm64/vfp.h -- 1.7.10.4
Julien Grall
2013-Jun-03 14:00 UTC
[PATCH v3 1/2] xen/arm: don''t enable VFP on XEN during the boot
We can safely remove VFP support in XEN because: - the guest will enable VFP support when a process requires it - XEN doesn''t use VFP Signed-off-by: Julien Grall <julien.grall@linaro.org> Changes in v2: - Remove asm/vfp.h and all the inclusions of this header --- xen/arch/arm/Rules.mk | 2 +- xen/arch/arm/setup.c | 3 --- xen/arch/arm/smpboot.c | 2 -- xen/include/asm-arm/vfp.h | 43 ------------------------------------------- 4 files changed, 1 insertion(+), 49 deletions(-) delete mode 100644 xen/include/asm-arm/vfp.h diff --git a/xen/arch/arm/Rules.mk b/xen/arch/arm/Rules.mk index 422ed04..a18e7fd 100644 --- a/xen/arch/arm/Rules.mk +++ b/xen/arch/arm/Rules.mk @@ -22,7 +22,7 @@ arm := y ifeq ($(TARGET_SUBARCH),arm32) # Prevent floating-point variables from creeping into Xen. CFLAGS += -msoft-float -CFLAGS += -mcpu=cortex-a15 -mfpu=vfpv3 -mfloat-abi=softfp +CFLAGS += -mcpu=cortex-a15 arm32 := y arm64 := n endif diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c index da2a734..b192d15 100644 --- a/xen/arch/arm/setup.c +++ b/xen/arch/arm/setup.c @@ -38,7 +38,6 @@ #include <asm/page.h> #include <asm/current.h> #include <asm/setup.h> -#include <asm/vfp.h> #include <asm/early_printk.h> #include <asm/gic.h> #include <asm/cpufeature.h> @@ -457,8 +456,6 @@ void __init start_xen(unsigned long boot_phys_offset, setup_virt_paging(); - enable_vfp(); - softirq_init(); tasklet_subsys_init(); diff --git a/xen/arch/arm/smpboot.c b/xen/arch/arm/smpboot.c index 8011987..c7421fc 100644 --- a/xen/arch/arm/smpboot.c +++ b/xen/arch/arm/smpboot.c @@ -28,7 +28,6 @@ #include <xen/softirq.h> #include <xen/timer.h> #include <xen/irq.h> -#include <asm/vfp.h> #include <asm/gic.h> cpumask_t cpu_online_map; @@ -153,7 +152,6 @@ void __cpuinit start_secondary(unsigned long boot_phys_offset, setup_virt_paging(); mmu_init_secondary_cpu(); - enable_vfp(); gic_init_secondary_cpu(); diff --git a/xen/include/asm-arm/vfp.h b/xen/include/asm-arm/vfp.h deleted file mode 100644 index b800816..0000000 --- a/xen/include/asm-arm/vfp.h +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef __ARM_VFP_H_ -#define __ARM_VFP_H_ - -#include <xen/types.h> - - -#ifdef CONFIG_ARM_32 - -#define FPEXC_EN (1u << 30) - -/* Save and restore FP state. - * Ought to be using the new vmrs/vmsr names, but older binutils has a - * bug where it only allows them to target fpscr (and not, say, fpexc). */ -#define READ_FP(reg) ({ \ - uint32_t val; \ - asm volatile ("fmrx %0, fp" #reg : "=r" (val)); \ - val; }) - -#define WRITE_FP(reg, val) do { \ - asm volatile ("fmxr fp" #reg ", %0" : : "r" (val)); \ -} while (0) - -/* Start-of-day: Turn on VFP */ -static inline void enable_vfp(void) -{ - WRITE_FP(exc, READ_FP(exc) | FPEXC_EN); -} -#else -static inline void enable_vfp(void) -{ - /* Always enable on 64-bit */ -} -#endif - -#endif -/* - * Local variables: - * mode: C - * c-file-style: "BSD" - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - */ -- 1.7.10.4
Add support for VFP context switch on arm32 and a dummy support for arm64 Signed-off-by: Julien Grall <julien.grall@linaro.org> Changes in v3: - Add vfp_init to check if the processor supports VFP 3 - Add clobber memory - Remove tmps - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h Changes in v2: - Fix all the small errors (type, lost headers...) - Add some comments --- xen/arch/arm/arm32/Makefile | 1 + xen/arch/arm/arm32/vfp.c | 99 +++++++++++++++++++++++++++++++++++++++ xen/arch/arm/arm64/Makefile | 1 + xen/arch/arm/arm64/vfp.c | 13 +++++ xen/arch/arm/domain.c | 7 ++- xen/include/asm-arm/arm32/vfp.h | 41 ++++++++++++++++ xen/include/asm-arm/arm64/vfp.h | 16 +++++++ xen/include/asm-arm/cpregs.h | 9 ++++ xen/include/asm-arm/domain.h | 4 ++ xen/include/asm-arm/vfp.h | 25 ++++++++++ 10 files changed, 214 insertions(+), 2 deletions(-) create mode 100644 xen/arch/arm/arm32/vfp.c create mode 100644 xen/arch/arm/arm64/vfp.c create mode 100644 xen/include/asm-arm/arm32/vfp.h create mode 100644 xen/include/asm-arm/arm64/vfp.h create mode 100644 xen/include/asm-arm/vfp.h diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile index aaf277a..b903803 100644 --- a/xen/arch/arm/arm32/Makefile +++ b/xen/arch/arm/arm32/Makefile @@ -6,5 +6,6 @@ obj-y += proc-ca15.o obj-y += traps.o obj-y += domain.o +obj-y += vfp.o obj-$(EARLY_PRINTK) += debug.o diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c new file mode 100644 index 0000000..2ece43d --- /dev/null +++ b/xen/arch/arm/arm32/vfp.c @@ -0,0 +1,99 @@ +#include <xen/sched.h> +#include <xen/init.h> +#include <asm/processor.h> +#include <asm/vfp.h> + +void vfp_save_state(struct vcpu *v) +{ + v->arch.vfp.fpexc = READ_CP32(FPEXC); + + WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC); + + v->arch.vfp.fpscr = READ_CP32(FPSCR); + + if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */ + { + v->arch.vfp.fpinst = READ_CP32(FPINST); + + if ( v->arch.vfp.fpexc & FPEXC_FP2V ) + v->arch.vfp.fpinst2 = READ_CP32(FPINST2); + /* Disable FPEXC_EX */ + WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC); + } + + /* Save {d0-d15} */ + asm volatile("stc p11, cr0, [%0], #32*4" + : : "r" (v->arch.vfp.fpregs1) + : "memory"); + + /* 32 x 64 bits registers? */ + if ( (READ_CP32(MVFR0) & MVFR0_A_SIMD_MASK) == 2 ) + { + /* Save {d16-d31} */ + asm volatile("stcl p11, cr0, [%0], #32*4" + : : "r" (v->arch.vfp.fpregs2) + : "memory"); + } + + WRITE_CP32(v->arch.vfp.fpexc & ~(FPEXC_EN), FPEXC); +} + +void vfp_restore_state(struct vcpu *v) +{ + WRITE_CP32(READ_CP32(FPEXC) | FPEXC_EN, FPEXC); + + /* Restore {d0-d15} */ + asm volatile("ldc p11, cr0, [%0], #32*4" + : : "r" (v->arch.vfp.fpregs1) + : "memory"); + + /* 32 x 64 bits registers? */ + if ( (READ_CP32(MVFR0) & MVFR0_A_SIMD_MASK) == 2 ) /* 32 x 64 bits registers */ + /* Restore {d16-d31} */ + asm volatile("ldcl p11, cr0, [%0], #32*4" + : : "r" (v->arch.vfp.fpregs2) + : "memory"); + + if ( v->arch.vfp.fpexc & FPEXC_EX ) + { + WRITE_CP32(v->arch.vfp.fpinst, FPINST); + if ( v->arch.vfp.fpexc & FPEXC_FP2V ) + WRITE_CP32(v->arch.vfp.fpinst2, FPINST2); + } + + WRITE_CP32(v->arch.vfp.fpscr, FPSCR); + + WRITE_CP32(v->arch.vfp.fpexc, FPEXC); +} + +static __init int vfp_init(void) +{ + unsigned int vfpsid; + unsigned int vfparch; + + vfpsid = READ_CP32(FPSID); + + printk("VFP implementer 0x%02x architecture %d part 0x%02x variant 0x%x " + "rev 0x%x\n", + (vfpsid & FPSID_IMPLEMENTER_MASK) >> FPSID_IMPLEMENTER_BIT, + (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT, + (vfpsid & FPSID_PART_MASK) >> FPSID_PART_BIT, + (vfpsid & FPSID_VARIANT_MASK) >> FPSID_VARIANT_BIT, + (vfpsid & FPSID_REV_MASK) >> FPSID_REV_BIT); + + vfparch = (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT; + if ( vfparch < 2 ) + panic("Xen only support VFP 3\n"); + + return 0; +} +presmp_initcall(vfp_init); + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/arch/arm/arm64/Makefile b/xen/arch/arm/arm64/Makefile index 9484548..e06a0a9 100644 --- a/xen/arch/arm/arm64/Makefile +++ b/xen/arch/arm/arm64/Makefile @@ -5,5 +5,6 @@ obj-y += mode_switch.o obj-y += traps.o obj-y += domain.o +obj-y += vfp.o obj-$(EARLY_PRINTK) += debug.o diff --git a/xen/arch/arm/arm64/vfp.c b/xen/arch/arm/arm64/vfp.c new file mode 100644 index 0000000..74e6a50 --- /dev/null +++ b/xen/arch/arm/arm64/vfp.c @@ -0,0 +1,13 @@ +#include <xen/sched.h> +#include <asm/processor.h> +#include <asm/vfp.h> + +void vfp_save_state(struct vcpu *v) +{ + /* TODO: implement it */ +} + +void vfp_restore_state(struct vcpu *v) +{ + /* TODO: implement it */ +} diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c index 4c434a1..f465ab7 100644 --- a/xen/arch/arm/domain.c +++ b/xen/arch/arm/domain.c @@ -27,6 +27,7 @@ #include <asm/p2m.h> #include <asm/irq.h> #include <asm/cpufeature.h> +#include <asm/vfp.h> #include <asm/gic.h> #include "vtimer.h" @@ -117,7 +118,8 @@ static void ctxt_switch_from(struct vcpu *p) /* XXX MPU */ - /* XXX VFP */ + /* VFP */ + vfp_save_state(p); /* VGIC */ gic_save_state(p); @@ -143,7 +145,8 @@ static void ctxt_switch_to(struct vcpu *n) /* VGIC */ gic_restore_state(n); - /* XXX VFP */ + /* VFP */ + vfp_restore_state(n); /* XXX MPU */ diff --git a/xen/include/asm-arm/arm32/vfp.h b/xen/include/asm-arm/arm32/vfp.h new file mode 100644 index 0000000..bade3bc --- /dev/null +++ b/xen/include/asm-arm/arm32/vfp.h @@ -0,0 +1,41 @@ +#ifndef _ARM_ARM32_VFP_H +#define _ARM_ARM32_VFP_H + +#define FPEXC_EX (1u << 31) +#define FPEXC_EN (1u << 30) +#define FPEXC_FP2V (1u << 28) + +#define MVFR0_A_SIMD_MASK (0xf << 0) + + +#define FPSID_IMPLEMENTER_BIT (24) +#define FPSID_IMPLEMENTER_MASK (0xff << FPSID_IMPLEMENTER_BIT) +#define FPSID_ARCH_BIT (16) +#define FPSID_ARCH_MASK (0xf << FPSID_ARCH_BIT) +#define FPSID_PART_BIT (8) +#define FPSID_PART_MASK (0xff << FPSID_PART_BIT) +#define FPSID_VARIANT_BIT (4) +#define FPSID_VARIANT_MASK (0xf << FPSID_VARIANT_BIT) +#define FPSID_REV_BIT (0) +#define FPSID_REV_MASK (0xf << FPSID_REV_BIT) + +struct vfp_state +{ + uint64_t fpregs1[16]; /* {d0-d15} */ + uint64_t fpregs2[16]; /* {d16-d31} */ + uint32_t fpexc; + uint32_t fpscr; + /* VFP implementation specific state */ + uint32_t fpinst; + uint32_t fpinst2; +}; + +#endif /* _ARM_ARM32_VFP_H */ +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/include/asm-arm/arm64/vfp.h b/xen/include/asm-arm/arm64/vfp.h new file mode 100644 index 0000000..3733d2c --- /dev/null +++ b/xen/include/asm-arm/arm64/vfp.h @@ -0,0 +1,16 @@ +#ifndef _ARM_ARM64_VFP_H +#define _ARM_ARM64_VFP_H + +struct vfp_state +{ +}; + +#endif /* _ARM_ARM64_VFP_H */ +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/include/asm-arm/cpregs.h b/xen/include/asm-arm/cpregs.h index f08d59a..122dd1a 100644 --- a/xen/include/asm-arm/cpregs.h +++ b/xen/include/asm-arm/cpregs.h @@ -60,6 +60,15 @@ * arguments, which are cp,opc1,crn,crm,opc2. */ +/* Coprocessor 10 */ + +#define FPSID p10,7,c0,c0,0 /* Floating-Point System ID Register */ +#define FPSCR p10,7,c1,c0,0 /* Floating-Point Status and Control Register */ +#define MVFR0 p10,7,c7,c0,0 /* Media and VFP Feature Register 0 */ +#define FPEXC p10,7,c8,c0,0 /* Floating-Point Exception Control Register */ +#define FPINST p10,7,c9,c0,0 /* Floating-Point Instruction Register */ +#define FPINST2 p10,7,c10,c0,0 /* Floating-point Instruction Register 2 */ + /* Coprocessor 14 */ /* CP14 CR0: */ diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h index cb251cc..339b6e6 100644 --- a/xen/include/asm-arm/domain.h +++ b/xen/include/asm-arm/domain.h @@ -6,6 +6,7 @@ #include <xen/sched.h> #include <asm/page.h> #include <asm/p2m.h> +#include <asm/vfp.h> #include <public/hvm/params.h> /* Represents state corresponding to a block of 32 interrupts */ @@ -188,6 +189,9 @@ struct arch_vcpu uint32_t joscr, jmcr; #endif + /* Float-pointer */ + struct vfp_state vfp; + /* CP 15 */ uint32_t csselr; diff --git a/xen/include/asm-arm/vfp.h b/xen/include/asm-arm/vfp.h new file mode 100644 index 0000000..5f10fe5 --- /dev/null +++ b/xen/include/asm-arm/vfp.h @@ -0,0 +1,25 @@ +#ifndef _ASM_VFP_H +#define _ASM_VFP_H + +#include <xen/sched.h> + +#if defined(CONFIG_ARM_32) +# include <asm/arm32/vfp.h> +#elif defined(CONFIG_ARM_64) +# include <asm/arm64/vfp.h> +#else +# error "Unknown ARM variant" +#endif + +void vfp_save_state(struct vcpu *v); +void vfp_restore_state(struct vcpu *v); + +#endif /* _ASM_VFP_H */ +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + */ -- 1.7.10.4
Ian Campbell
2013-Jun-03 14:15 UTC
Re: [PATCH v3 2/2] xen/arm32: implement VFP context switch
On Mon, 2013-06-03 at 15:00 +0100, Julien Grall wrote:> Add support for VFP context switch on arm32 and a dummy support for arm64 > > Signed-off-by: Julien Grall <julien.grall@linaro.org> > > Changes in v3: > - Add vfp_init to check if the processor supports VFP 3 > - Add clobber memory > - Remove tmps > - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h > > Changes in v2: > - Fix all the small errors (type, lost headers...) > - Add some comments > --- > xen/arch/arm/arm32/Makefile | 1 + > xen/arch/arm/arm32/vfp.c | 99 +++++++++++++++++++++++++++++++++++++++ > xen/arch/arm/arm64/Makefile | 1 + > xen/arch/arm/arm64/vfp.c | 13 +++++ > xen/arch/arm/domain.c | 7 ++- > xen/include/asm-arm/arm32/vfp.h | 41 ++++++++++++++++ > xen/include/asm-arm/arm64/vfp.h | 16 +++++++ > xen/include/asm-arm/cpregs.h | 9 ++++ > xen/include/asm-arm/domain.h | 4 ++ > xen/include/asm-arm/vfp.h | 25 ++++++++++ > 10 files changed, 214 insertions(+), 2 deletions(-) > create mode 100644 xen/arch/arm/arm32/vfp.c > create mode 100644 xen/arch/arm/arm64/vfp.c > create mode 100644 xen/include/asm-arm/arm32/vfp.h > create mode 100644 xen/include/asm-arm/arm64/vfp.h > create mode 100644 xen/include/asm-arm/vfp.h > > diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile > index aaf277a..b903803 100644 > --- a/xen/arch/arm/arm32/Makefile > +++ b/xen/arch/arm/arm32/Makefile > @@ -6,5 +6,6 @@ obj-y += proc-ca15.o > > obj-y += traps.o > obj-y += domain.o > +obj-y += vfp.o > > obj-$(EARLY_PRINTK) += debug.o > diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c > new file mode 100644 > index 0000000..2ece43d > --- /dev/null > +++ b/xen/arch/arm/arm32/vfp.c > @@ -0,0 +1,99 @@ > +#include <xen/sched.h> > +#include <xen/init.h> > +#include <asm/processor.h> > +#include <asm/vfp.h> > + > +void vfp_save_state(struct vcpu *v) > +{ > + v->arch.vfp.fpexc = READ_CP32(FPEXC); > + > + WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC); > + > + v->arch.vfp.fpscr = READ_CP32(FPSCR); > + > + if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */ > + { > + v->arch.vfp.fpinst = READ_CP32(FPINST); > + > + if ( v->arch.vfp.fpexc & FPEXC_FP2V ) > + v->arch.vfp.fpinst2 = READ_CP32(FPINST2); > + /* Disable FPEXC_EX */ > + WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC); > + } > + > + /* Save {d0-d15} */ > + asm volatile("stc p11, cr0, [%0], #32*4" > + : : "r" (v->arch.vfp.fpregs1) > + : "memory");http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html suggests that "=m" (v->arch.vfp.fpregs1) or "=Q" (...) as output constraints might do the job without clobbering the whole of memory. I can''t see any examples of this on ARM though, e.g. in Linux (other than =Qo in the atomics) though and I''ve expected this patter to be widespread so perhaps that isn''t right. Alternatively it''s quite a tricky construct to grep for, so maybe I missed all the uses. We do use =m a fair bit in x86 Xen FWIW.> static __init int vfp_init(void) > +{ > + unsigned int vfpsid; > + unsigned int vfparch; > + > + vfpsid = READ_CP32(FPSID); > + > + printk("VFP implementer 0x%02x architecture %d part 0x%02x variant 0x%x " > + "rev 0x%x\n", > + (vfpsid & FPSID_IMPLEMENTER_MASK) >> FPSID_IMPLEMENTER_BIT, > + (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT, > + (vfpsid & FPSID_PART_MASK) >> FPSID_PART_BIT, > + (vfpsid & FPSID_VARIANT_MASK) >> FPSID_VARIANT_BIT, > + (vfpsid & FPSID_REV_MASK) >> FPSID_REV_BIT); > + > + vfparch = (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT; > + if ( vfparch < 2 ) > + panic("Xen only support VFP 3\n");Should be "supports". Don''t bother resending just for that though. Ian.
Julien Grall
2013-Jun-03 14:32 UTC
Re: [PATCH v3 2/2] xen/arm32: implement VFP context switch
On 06/03/2013 03:15 PM, Ian Campbell wrote:> On Mon, 2013-06-03 at 15:00 +0100, Julien Grall wrote: >> Add support for VFP context switch on arm32 and a dummy support for arm64 >> >> Signed-off-by: Julien Grall <julien.grall@linaro.org> >> >> Changes in v3: >> - Add vfp_init to check if the processor supports VFP 3 >> - Add clobber memory >> - Remove tmps >> - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h >> >> Changes in v2: >> - Fix all the small errors (type, lost headers...) >> - Add some comments >> --- >> xen/arch/arm/arm32/Makefile | 1 + >> xen/arch/arm/arm32/vfp.c | 99 +++++++++++++++++++++++++++++++++++++++ >> xen/arch/arm/arm64/Makefile | 1 + >> xen/arch/arm/arm64/vfp.c | 13 +++++ >> xen/arch/arm/domain.c | 7 ++- >> xen/include/asm-arm/arm32/vfp.h | 41 ++++++++++++++++ >> xen/include/asm-arm/arm64/vfp.h | 16 +++++++ >> xen/include/asm-arm/cpregs.h | 9 ++++ >> xen/include/asm-arm/domain.h | 4 ++ >> xen/include/asm-arm/vfp.h | 25 ++++++++++ >> 10 files changed, 214 insertions(+), 2 deletions(-) >> create mode 100644 xen/arch/arm/arm32/vfp.c >> create mode 100644 xen/arch/arm/arm64/vfp.c >> create mode 100644 xen/include/asm-arm/arm32/vfp.h >> create mode 100644 xen/include/asm-arm/arm64/vfp.h >> create mode 100644 xen/include/asm-arm/vfp.h >> >> diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile >> index aaf277a..b903803 100644 >> --- a/xen/arch/arm/arm32/Makefile >> +++ b/xen/arch/arm/arm32/Makefile >> @@ -6,5 +6,6 @@ obj-y += proc-ca15.o >> >> obj-y += traps.o >> obj-y += domain.o >> +obj-y += vfp.o >> >> obj-$(EARLY_PRINTK) += debug.o >> diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c >> new file mode 100644 >> index 0000000..2ece43d >> --- /dev/null >> +++ b/xen/arch/arm/arm32/vfp.c >> @@ -0,0 +1,99 @@ >> +#include <xen/sched.h> >> +#include <xen/init.h> >> +#include <asm/processor.h> >> +#include <asm/vfp.h> >> + >> +void vfp_save_state(struct vcpu *v) >> +{ >> + v->arch.vfp.fpexc = READ_CP32(FPEXC); >> + >> + WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC); >> + >> + v->arch.vfp.fpscr = READ_CP32(FPSCR); >> + >> + if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */ >> + { >> + v->arch.vfp.fpinst = READ_CP32(FPINST); >> + >> + if ( v->arch.vfp.fpexc & FPEXC_FP2V ) >> + v->arch.vfp.fpinst2 = READ_CP32(FPINST2); >> + /* Disable FPEXC_EX */ >> + WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC); >> + } >> + >> + /* Save {d0-d15} */ >> + asm volatile("stc p11, cr0, [%0], #32*4" >> + : : "r" (v->arch.vfp.fpregs1) >> + : "memory"); > > http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html suggests that > "=m" (v->arch.vfp.fpregs1) or "=Q" (...) as output constraints might do > the job without clobbering the whole of memory.I''m not sure to fully understand the concept behind "=m". Does it mean that gcc will clobber all the memory range addressed by fpregs? -- Julien
Ian Campbell
2013-Jun-03 14:38 UTC
Re: [PATCH v3 2/2] xen/arm32: implement VFP context switch
On Mon, 2013-06-03 at 15:32 +0100, Julien Grall wrote:> On 06/03/2013 03:15 PM, Ian Campbell wrote: > > > On Mon, 2013-06-03 at 15:00 +0100, Julien Grall wrote: > >> Add support for VFP context switch on arm32 and a dummy support for arm64 > >> > >> Signed-off-by: Julien Grall <julien.grall@linaro.org> > >> > >> Changes in v3: > >> - Add vfp_init to check if the processor supports VFP 3 > >> - Add clobber memory > >> - Remove tmps > >> - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h > >> > >> Changes in v2: > >> - Fix all the small errors (type, lost headers...) > >> - Add some comments > >> --- > >> xen/arch/arm/arm32/Makefile | 1 + > >> xen/arch/arm/arm32/vfp.c | 99 +++++++++++++++++++++++++++++++++++++++ > >> xen/arch/arm/arm64/Makefile | 1 + > >> xen/arch/arm/arm64/vfp.c | 13 +++++ > >> xen/arch/arm/domain.c | 7 ++- > >> xen/include/asm-arm/arm32/vfp.h | 41 ++++++++++++++++ > >> xen/include/asm-arm/arm64/vfp.h | 16 +++++++ > >> xen/include/asm-arm/cpregs.h | 9 ++++ > >> xen/include/asm-arm/domain.h | 4 ++ > >> xen/include/asm-arm/vfp.h | 25 ++++++++++ > >> 10 files changed, 214 insertions(+), 2 deletions(-) > >> create mode 100644 xen/arch/arm/arm32/vfp.c > >> create mode 100644 xen/arch/arm/arm64/vfp.c > >> create mode 100644 xen/include/asm-arm/arm32/vfp.h > >> create mode 100644 xen/include/asm-arm/arm64/vfp.h > >> create mode 100644 xen/include/asm-arm/vfp.h > >> > >> diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile > >> index aaf277a..b903803 100644 > >> --- a/xen/arch/arm/arm32/Makefile > >> +++ b/xen/arch/arm/arm32/Makefile > >> @@ -6,5 +6,6 @@ obj-y += proc-ca15.o > >> > >> obj-y += traps.o > >> obj-y += domain.o > >> +obj-y += vfp.o > >> > >> obj-$(EARLY_PRINTK) += debug.o > >> diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c > >> new file mode 100644 > >> index 0000000..2ece43d > >> --- /dev/null > >> +++ b/xen/arch/arm/arm32/vfp.c > >> @@ -0,0 +1,99 @@ > >> +#include <xen/sched.h> > >> +#include <xen/init.h> > >> +#include <asm/processor.h> > >> +#include <asm/vfp.h> > >> + > >> +void vfp_save_state(struct vcpu *v) > >> +{ > >> + v->arch.vfp.fpexc = READ_CP32(FPEXC); > >> + > >> + WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC); > >> + > >> + v->arch.vfp.fpscr = READ_CP32(FPSCR); > >> + > >> + if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */ > >> + { > >> + v->arch.vfp.fpinst = READ_CP32(FPINST); > >> + > >> + if ( v->arch.vfp.fpexc & FPEXC_FP2V ) > >> + v->arch.vfp.fpinst2 = READ_CP32(FPINST2); > >> + /* Disable FPEXC_EX */ > >> + WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC); > >> + } > >> + > >> + /* Save {d0-d15} */ > >> + asm volatile("stc p11, cr0, [%0], #32*4" > >> + : : "r" (v->arch.vfp.fpregs1) > >> + : "memory"); > > > > http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html suggests that > > "=m" (v->arch.vfp.fpregs1) or "=Q" (...) as output constraints might do > > the job without clobbering the whole of memory. > > I''m not sure to fully understand the concept behind "=m". Does it mean > that gcc will clobber all the memory range addressed by fpregs?I''m not totally confident in this stuff myself.... Apparently the "=" modified means[0] "this operand is write-only for this instruction: the previous value is discarded and replaced by output data." In the case of a memory constraint you''d want to hope that "discarded and replaced" would be equivalent to clobbering the address, at least in cases where the compiler knows the size of the thing, as it does here. Ian. [0] http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/Modifiers.html#Modifiers
Julien Grall
2013-Jun-03 14:57 UTC
Re: [PATCH v3 2/2] xen/arm32: implement VFP context switch
On 06/03/2013 03:38 PM, Ian Campbell wrote:> On Mon, 2013-06-03 at 15:32 +0100, Julien Grall wrote: >> On 06/03/2013 03:15 PM, Ian Campbell wrote: >> >>> On Mon, 2013-06-03 at 15:00 +0100, Julien Grall wrote: >>>> Add support for VFP context switch on arm32 and a dummy support for arm64 >>>> >>>> Signed-off-by: Julien Grall <julien.grall@linaro.org> >>>> >>>> Changes in v3: >>>> - Add vfp_init to check if the processor supports VFP 3 >>>> - Add clobber memory >>>> - Remove tmps >>>> - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h >>>> >>>> Changes in v2: >>>> - Fix all the small errors (type, lost headers...) >>>> - Add some comments >>>> --- >>>> xen/arch/arm/arm32/Makefile | 1 + >>>> xen/arch/arm/arm32/vfp.c | 99 +++++++++++++++++++++++++++++++++++++++ >>>> xen/arch/arm/arm64/Makefile | 1 + >>>> xen/arch/arm/arm64/vfp.c | 13 +++++ >>>> xen/arch/arm/domain.c | 7 ++- >>>> xen/include/asm-arm/arm32/vfp.h | 41 ++++++++++++++++ >>>> xen/include/asm-arm/arm64/vfp.h | 16 +++++++ >>>> xen/include/asm-arm/cpregs.h | 9 ++++ >>>> xen/include/asm-arm/domain.h | 4 ++ >>>> xen/include/asm-arm/vfp.h | 25 ++++++++++ >>>> 10 files changed, 214 insertions(+), 2 deletions(-) >>>> create mode 100644 xen/arch/arm/arm32/vfp.c >>>> create mode 100644 xen/arch/arm/arm64/vfp.c >>>> create mode 100644 xen/include/asm-arm/arm32/vfp.h >>>> create mode 100644 xen/include/asm-arm/arm64/vfp.h >>>> create mode 100644 xen/include/asm-arm/vfp.h >>>> >>>> diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile >>>> index aaf277a..b903803 100644 >>>> --- a/xen/arch/arm/arm32/Makefile >>>> +++ b/xen/arch/arm/arm32/Makefile >>>> @@ -6,5 +6,6 @@ obj-y += proc-ca15.o >>>> >>>> obj-y += traps.o >>>> obj-y += domain.o >>>> +obj-y += vfp.o >>>> >>>> obj-$(EARLY_PRINTK) += debug.o >>>> diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c >>>> new file mode 100644 >>>> index 0000000..2ece43d >>>> --- /dev/null >>>> +++ b/xen/arch/arm/arm32/vfp.c >>>> @@ -0,0 +1,99 @@ >>>> +#include <xen/sched.h> >>>> +#include <xen/init.h> >>>> +#include <asm/processor.h> >>>> +#include <asm/vfp.h> >>>> + >>>> +void vfp_save_state(struct vcpu *v) >>>> +{ >>>> + v->arch.vfp.fpexc = READ_CP32(FPEXC); >>>> + >>>> + WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC); >>>> + >>>> + v->arch.vfp.fpscr = READ_CP32(FPSCR); >>>> + >>>> + if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */ >>>> + { >>>> + v->arch.vfp.fpinst = READ_CP32(FPINST); >>>> + >>>> + if ( v->arch.vfp.fpexc & FPEXC_FP2V ) >>>> + v->arch.vfp.fpinst2 = READ_CP32(FPINST2); >>>> + /* Disable FPEXC_EX */ >>>> + WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC); >>>> + } >>>> + >>>> + /* Save {d0-d15} */ >>>> + asm volatile("stc p11, cr0, [%0], #32*4" >>>> + : : "r" (v->arch.vfp.fpregs1) >>>> + : "memory"); >>> >>> http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html suggests that >>> "=m" (v->arch.vfp.fpregs1) or "=Q" (...) as output constraints might do >>> the job without clobbering the whole of memory. >> >> I''m not sure to fully understand the concept behind "=m". Does it mean >> that gcc will clobber all the memory range addressed by fpregs? > > I''m not totally confident in this stuff myself.... > > Apparently the "=" modified means[0] "this operand is write-only for > this instruction: the previous value is discarded and replaced by output > data." In the case of a memory constraint you''d want to hope that > "discarded and replaced" would be equivalent to clobbering the address, > at least in cases where the compiler knows the size of the thing, as it > does here. > > Ian. > > [0] http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/Modifiers.html#Modifiers >Shall I use this contrainst for the stc instructions and send a new patch series? -- Julien
Ian Campbell
2013-Jun-03 15:14 UTC
Re: [PATCH v3 2/2] xen/arm32: implement VFP context switch
On Mon, 2013-06-03 at 15:57 +0100, Julien Grall wrote:> On 06/03/2013 03:38 PM, Ian Campbell wrote: > > > On Mon, 2013-06-03 at 15:32 +0100, Julien Grall wrote: > >> On 06/03/2013 03:15 PM, Ian Campbell wrote: > >> > >>> On Mon, 2013-06-03 at 15:00 +0100, Julien Grall wrote: > >>>> Add support for VFP context switch on arm32 and a dummy support for arm64 > >>>> > >>>> Signed-off-by: Julien Grall <julien.grall@linaro.org> > >>>> > >>>> Changes in v3: > >>>> - Add vfp_init to check if the processor supports VFP 3 > >>>> - Add clobber memory > >>>> - Remove tmps > >>>> - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h > >>>> > >>>> Changes in v2: > >>>> - Fix all the small errors (type, lost headers...) > >>>> - Add some comments > >>>> --- > >>>> xen/arch/arm/arm32/Makefile | 1 + > >>>> xen/arch/arm/arm32/vfp.c | 99 +++++++++++++++++++++++++++++++++++++++ > >>>> xen/arch/arm/arm64/Makefile | 1 + > >>>> xen/arch/arm/arm64/vfp.c | 13 +++++ > >>>> xen/arch/arm/domain.c | 7 ++- > >>>> xen/include/asm-arm/arm32/vfp.h | 41 ++++++++++++++++ > >>>> xen/include/asm-arm/arm64/vfp.h | 16 +++++++ > >>>> xen/include/asm-arm/cpregs.h | 9 ++++ > >>>> xen/include/asm-arm/domain.h | 4 ++ > >>>> xen/include/asm-arm/vfp.h | 25 ++++++++++ > >>>> 10 files changed, 214 insertions(+), 2 deletions(-) > >>>> create mode 100644 xen/arch/arm/arm32/vfp.c > >>>> create mode 100644 xen/arch/arm/arm64/vfp.c > >>>> create mode 100644 xen/include/asm-arm/arm32/vfp.h > >>>> create mode 100644 xen/include/asm-arm/arm64/vfp.h > >>>> create mode 100644 xen/include/asm-arm/vfp.h > >>>> > >>>> diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile > >>>> index aaf277a..b903803 100644 > >>>> --- a/xen/arch/arm/arm32/Makefile > >>>> +++ b/xen/arch/arm/arm32/Makefile > >>>> @@ -6,5 +6,6 @@ obj-y += proc-ca15.o > >>>> > >>>> obj-y += traps.o > >>>> obj-y += domain.o > >>>> +obj-y += vfp.o > >>>> > >>>> obj-$(EARLY_PRINTK) += debug.o > >>>> diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c > >>>> new file mode 100644 > >>>> index 0000000..2ece43d > >>>> --- /dev/null > >>>> +++ b/xen/arch/arm/arm32/vfp.c > >>>> @@ -0,0 +1,99 @@ > >>>> +#include <xen/sched.h> > >>>> +#include <xen/init.h> > >>>> +#include <asm/processor.h> > >>>> +#include <asm/vfp.h> > >>>> + > >>>> +void vfp_save_state(struct vcpu *v) > >>>> +{ > >>>> + v->arch.vfp.fpexc = READ_CP32(FPEXC); > >>>> + > >>>> + WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC); > >>>> + > >>>> + v->arch.vfp.fpscr = READ_CP32(FPSCR); > >>>> + > >>>> + if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */ > >>>> + { > >>>> + v->arch.vfp.fpinst = READ_CP32(FPINST); > >>>> + > >>>> + if ( v->arch.vfp.fpexc & FPEXC_FP2V ) > >>>> + v->arch.vfp.fpinst2 = READ_CP32(FPINST2); > >>>> + /* Disable FPEXC_EX */ > >>>> + WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC); > >>>> + } > >>>> + > >>>> + /* Save {d0-d15} */ > >>>> + asm volatile("stc p11, cr0, [%0], #32*4" > >>>> + : : "r" (v->arch.vfp.fpregs1) > >>>> + : "memory"); > >>> > >>> http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html suggests that > >>> "=m" (v->arch.vfp.fpregs1) or "=Q" (...) as output constraints might do > >>> the job without clobbering the whole of memory. > >> > >> I''m not sure to fully understand the concept behind "=m". Does it mean > >> that gcc will clobber all the memory range addressed by fpregs? > > > > I''m not totally confident in this stuff myself.... > > > > Apparently the "=" modified means[0] "this operand is write-only for > > this instruction: the previous value is discarded and replaced by output > > data." In the case of a memory constraint you''d want to hope that > > "discarded and replaced" would be equivalent to clobbering the address, > > at least in cases where the compiler knows the size of the thing, as it > > does here. > > > > Ian. > > > > [0] http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/Modifiers.html#Modifiers > > > > > Shall I use this contrainst for the stc instructions and send a new > patch series?If you buy my reasoning and if it works then I guess so. Ian.
Julien Grall
2013-Jun-03 15:27 UTC
Re: [PATCH v3 2/2] xen/arm32: implement VFP context switch
On 06/03/2013 04:14 PM, Ian Campbell wrote:> On Mon, 2013-06-03 at 15:57 +0100, Julien Grall wrote: >> On 06/03/2013 03:38 PM, Ian Campbell wrote: >> >>> On Mon, 2013-06-03 at 15:32 +0100, Julien Grall wrote: >>>> On 06/03/2013 03:15 PM, Ian Campbell wrote: >>>> >>>>> On Mon, 2013-06-03 at 15:00 +0100, Julien Grall wrote: >>>>>> Add support for VFP context switch on arm32 and a dummy support for arm64 >>>>>> >>>>>> Signed-off-by: Julien Grall <julien.grall@linaro.org> >>>>>> >>>>>> Changes in v3: >>>>>> - Add vfp_init to check if the processor supports VFP 3 >>>>>> - Add clobber memory >>>>>> - Remove tmps >>>>>> - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h >>>>>> >>>>>> Changes in v2: >>>>>> - Fix all the small errors (type, lost headers...) >>>>>> - Add some comments >>>>>> --- >>>>>> xen/arch/arm/arm32/Makefile | 1 + >>>>>> xen/arch/arm/arm32/vfp.c | 99 +++++++++++++++++++++++++++++++++++++++ >>>>>> xen/arch/arm/arm64/Makefile | 1 + >>>>>> xen/arch/arm/arm64/vfp.c | 13 +++++ >>>>>> xen/arch/arm/domain.c | 7 ++- >>>>>> xen/include/asm-arm/arm32/vfp.h | 41 ++++++++++++++++ >>>>>> xen/include/asm-arm/arm64/vfp.h | 16 +++++++ >>>>>> xen/include/asm-arm/cpregs.h | 9 ++++ >>>>>> xen/include/asm-arm/domain.h | 4 ++ >>>>>> xen/include/asm-arm/vfp.h | 25 ++++++++++ >>>>>> 10 files changed, 214 insertions(+), 2 deletions(-) >>>>>> create mode 100644 xen/arch/arm/arm32/vfp.c >>>>>> create mode 100644 xen/arch/arm/arm64/vfp.c >>>>>> create mode 100644 xen/include/asm-arm/arm32/vfp.h >>>>>> create mode 100644 xen/include/asm-arm/arm64/vfp.h >>>>>> create mode 100644 xen/include/asm-arm/vfp.h >>>>>> >>>>>> diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile >>>>>> index aaf277a..b903803 100644 >>>>>> --- a/xen/arch/arm/arm32/Makefile >>>>>> +++ b/xen/arch/arm/arm32/Makefile >>>>>> @@ -6,5 +6,6 @@ obj-y += proc-ca15.o >>>>>> >>>>>> obj-y += traps.o >>>>>> obj-y += domain.o >>>>>> +obj-y += vfp.o >>>>>> >>>>>> obj-$(EARLY_PRINTK) += debug.o >>>>>> diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c >>>>>> new file mode 100644 >>>>>> index 0000000..2ece43d >>>>>> --- /dev/null >>>>>> +++ b/xen/arch/arm/arm32/vfp.c >>>>>> @@ -0,0 +1,99 @@ >>>>>> +#include <xen/sched.h> >>>>>> +#include <xen/init.h> >>>>>> +#include <asm/processor.h> >>>>>> +#include <asm/vfp.h> >>>>>> + >>>>>> +void vfp_save_state(struct vcpu *v) >>>>>> +{ >>>>>> + v->arch.vfp.fpexc = READ_CP32(FPEXC); >>>>>> + >>>>>> + WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC); >>>>>> + >>>>>> + v->arch.vfp.fpscr = READ_CP32(FPSCR); >>>>>> + >>>>>> + if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */ >>>>>> + { >>>>>> + v->arch.vfp.fpinst = READ_CP32(FPINST); >>>>>> + >>>>>> + if ( v->arch.vfp.fpexc & FPEXC_FP2V ) >>>>>> + v->arch.vfp.fpinst2 = READ_CP32(FPINST2); >>>>>> + /* Disable FPEXC_EX */ >>>>>> + WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC); >>>>>> + } >>>>>> + >>>>>> + /* Save {d0-d15} */ >>>>>> + asm volatile("stc p11, cr0, [%0], #32*4" >>>>>> + : : "r" (v->arch.vfp.fpregs1) >>>>>> + : "memory"); >>>>> >>>>> http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html suggests that >>>>> "=m" (v->arch.vfp.fpregs1) or "=Q" (...) as output constraints might do >>>>> the job without clobbering the whole of memory. >>>> >>>> I''m not sure to fully understand the concept behind "=m". Does it mean >>>> that gcc will clobber all the memory range addressed by fpregs? >>> >>> I''m not totally confident in this stuff myself.... >>> >>> Apparently the "=" modified means[0] "this operand is write-only for >>> this instruction: the previous value is discarded and replaced by output >>> data." In the case of a memory constraint you''d want to hope that >>> "discarded and replaced" would be equivalent to clobbering the address, >>> at least in cases where the compiler knows the size of the thing, as it >>> does here. >>> >>> Ian. >>> >>> [0] http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/Modifiers.html#Modifiers >>> >> >> >> Shall I use this contrainst for the stc instructions and send a new >> patch series? > > If you buy my reasoning and if it works then I guess so.I will give a try. -- Julien
Julien Grall
2013-Jun-05 13:06 UTC
Re: [PATCH v3 2/2] xen/arm32: implement VFP context switch
On 06/03/2013 04:14 PM, Ian Campbell wrote:> On Mon, 2013-06-03 at 15:57 +0100, Julien Grall wrote: >> On 06/03/2013 03:38 PM, Ian Campbell wrote: >> >>> On Mon, 2013-06-03 at 15:32 +0100, Julien Grall wrote: >>>> On 06/03/2013 03:15 PM, Ian Campbell wrote: >>>> >>>>> On Mon, 2013-06-03 at 15:00 +0100, Julien Grall wrote: >>>>>> Add support for VFP context switch on arm32 and a dummy support for arm64 >>>>>> >>>>>> Signed-off-by: Julien Grall <julien.grall@linaro.org> >>>>>> >>>>>> Changes in v3: >>>>>> - Add vfp_init to check if the processor supports VFP 3 >>>>>> - Add clobber memory >>>>>> - Remove tmps >>>>>> - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h >>>>>> >>>>>> Changes in v2: >>>>>> - Fix all the small errors (type, lost headers...) >>>>>> - Add some comments >>>>>> --- >>>>>> xen/arch/arm/arm32/Makefile | 1 + >>>>>> xen/arch/arm/arm32/vfp.c | 99 +++++++++++++++++++++++++++++++++++++++ >>>>>> xen/arch/arm/arm64/Makefile | 1 + >>>>>> xen/arch/arm/arm64/vfp.c | 13 +++++ >>>>>> xen/arch/arm/domain.c | 7 ++- >>>>>> xen/include/asm-arm/arm32/vfp.h | 41 ++++++++++++++++ >>>>>> xen/include/asm-arm/arm64/vfp.h | 16 +++++++ >>>>>> xen/include/asm-arm/cpregs.h | 9 ++++ >>>>>> xen/include/asm-arm/domain.h | 4 ++ >>>>>> xen/include/asm-arm/vfp.h | 25 ++++++++++ >>>>>> 10 files changed, 214 insertions(+), 2 deletions(-) >>>>>> create mode 100644 xen/arch/arm/arm32/vfp.c >>>>>> create mode 100644 xen/arch/arm/arm64/vfp.c >>>>>> create mode 100644 xen/include/asm-arm/arm32/vfp.h >>>>>> create mode 100644 xen/include/asm-arm/arm64/vfp.h >>>>>> create mode 100644 xen/include/asm-arm/vfp.h >>>>>> >>>>>> diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile >>>>>> index aaf277a..b903803 100644 >>>>>> --- a/xen/arch/arm/arm32/Makefile >>>>>> +++ b/xen/arch/arm/arm32/Makefile >>>>>> @@ -6,5 +6,6 @@ obj-y += proc-ca15.o >>>>>> >>>>>> obj-y += traps.o >>>>>> obj-y += domain.o >>>>>> +obj-y += vfp.o >>>>>> >>>>>> obj-$(EARLY_PRINTK) += debug.o >>>>>> diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c >>>>>> new file mode 100644 >>>>>> index 0000000..2ece43d >>>>>> --- /dev/null >>>>>> +++ b/xen/arch/arm/arm32/vfp.c >>>>>> @@ -0,0 +1,99 @@ >>>>>> +#include <xen/sched.h> >>>>>> +#include <xen/init.h> >>>>>> +#include <asm/processor.h> >>>>>> +#include <asm/vfp.h> >>>>>> + >>>>>> +void vfp_save_state(struct vcpu *v) >>>>>> +{ >>>>>> + v->arch.vfp.fpexc = READ_CP32(FPEXC); >>>>>> + >>>>>> + WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC); >>>>>> + >>>>>> + v->arch.vfp.fpscr = READ_CP32(FPSCR); >>>>>> + >>>>>> + if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */ >>>>>> + { >>>>>> + v->arch.vfp.fpinst = READ_CP32(FPINST); >>>>>> + >>>>>> + if ( v->arch.vfp.fpexc & FPEXC_FP2V ) >>>>>> + v->arch.vfp.fpinst2 = READ_CP32(FPINST2); >>>>>> + /* Disable FPEXC_EX */ >>>>>> + WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC); >>>>>> + } >>>>>> + >>>>>> + /* Save {d0-d15} */ >>>>>> + asm volatile("stc p11, cr0, [%0], #32*4" >>>>>> + : : "r" (v->arch.vfp.fpregs1) >>>>>> + : "memory"); >>>>> >>>>> http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html suggests that >>>>> "=m" (v->arch.vfp.fpregs1) or "=Q" (...) as output constraints might do >>>>> the job without clobbering the whole of memory. >>>> >>>> I''m not sure to fully understand the concept behind "=m". Does it mean >>>> that gcc will clobber all the memory range addressed by fpregs? >>> >>> I''m not totally confident in this stuff myself.... >>> >>> Apparently the "=" modified means[0] "this operand is write-only for >>> this instruction: the previous value is discarded and replaced by output >>> data." In the case of a memory constraint you''d want to hope that >>> "discarded and replaced" would be equivalent to clobbering the address, >>> at least in cases where the compiler knows the size of the thing, as it >>> does here. >>> >>> Ian. >>> >>> [0] http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/Modifiers.html#Modifiers >>> >> >> >> Shall I use this contrainst for the stc instructions and send a new >> patch series? > > If you buy my reasoning and if it works then I guess so.Hi, So I gave a try and it doesn''t work. The modifiers =m replaces %0 by [rn,imm] which is wrong. So it''s not possible to specify the amount of data which the instruction need to read (the #32*4) because pre-index and post-index aren''t allowed in a same instruction. If I remove #32*4, the instruction is different. With objdump on each generate object: - vstmia r3!, {d0,d15} - vstr d0, [rn, imm] -- Julien
Ian Campbell
2013-Jun-12 15:14 UTC
Re: [PATCH v3 2/2] xen/arm32: implement VFP context switch
On Wed, 2013-06-05 at 14:06 +0100, Julien Grall wrote:> On 06/03/2013 04:14 PM, Ian Campbell wrote: > > > On Mon, 2013-06-03 at 15:57 +0100, Julien Grall wrote: > >> On 06/03/2013 03:38 PM, Ian Campbell wrote: > >> > >>> On Mon, 2013-06-03 at 15:32 +0100, Julien Grall wrote: > >>>> On 06/03/2013 03:15 PM, Ian Campbell wrote: > >>>> > >>>>> On Mon, 2013-06-03 at 15:00 +0100, Julien Grall wrote: > >>>>>> Add support for VFP context switch on arm32 and a dummy support for arm64 > >>>>>> > >>>>>> Signed-off-by: Julien Grall <julien.grall@linaro.org> > >>>>>> > >>>>>> Changes in v3: > >>>>>> - Add vfp_init to check if the processor supports VFP 3 > >>>>>> - Add clobber memory > >>>>>> - Remove tmps > >>>>>> - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h > >>>>>> > >>>>>> Changes in v2: > >>>>>> - Fix all the small errors (type, lost headers...) > >>>>>> - Add some comments > >>>>>> --- > >>>>>> xen/arch/arm/arm32/Makefile | 1 + > >>>>>> xen/arch/arm/arm32/vfp.c | 99 +++++++++++++++++++++++++++++++++++++++ > >>>>>> xen/arch/arm/arm64/Makefile | 1 + > >>>>>> xen/arch/arm/arm64/vfp.c | 13 +++++ > >>>>>> xen/arch/arm/domain.c | 7 ++- > >>>>>> xen/include/asm-arm/arm32/vfp.h | 41 ++++++++++++++++ > >>>>>> xen/include/asm-arm/arm64/vfp.h | 16 +++++++ > >>>>>> xen/include/asm-arm/cpregs.h | 9 ++++ > >>>>>> xen/include/asm-arm/domain.h | 4 ++ > >>>>>> xen/include/asm-arm/vfp.h | 25 ++++++++++ > >>>>>> 10 files changed, 214 insertions(+), 2 deletions(-) > >>>>>> create mode 100644 xen/arch/arm/arm32/vfp.c > >>>>>> create mode 100644 xen/arch/arm/arm64/vfp.c > >>>>>> create mode 100644 xen/include/asm-arm/arm32/vfp.h > >>>>>> create mode 100644 xen/include/asm-arm/arm64/vfp.h > >>>>>> create mode 100644 xen/include/asm-arm/vfp.h > >>>>>> > >>>>>> diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile > >>>>>> index aaf277a..b903803 100644 > >>>>>> --- a/xen/arch/arm/arm32/Makefile > >>>>>> +++ b/xen/arch/arm/arm32/Makefile > >>>>>> @@ -6,5 +6,6 @@ obj-y += proc-ca15.o > >>>>>> > >>>>>> obj-y += traps.o > >>>>>> obj-y += domain.o > >>>>>> +obj-y += vfp.o > >>>>>> > >>>>>> obj-$(EARLY_PRINTK) += debug.o > >>>>>> diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c > >>>>>> new file mode 100644 > >>>>>> index 0000000..2ece43d > >>>>>> --- /dev/null > >>>>>> +++ b/xen/arch/arm/arm32/vfp.c > >>>>>> @@ -0,0 +1,99 @@ > >>>>>> +#include <xen/sched.h> > >>>>>> +#include <xen/init.h> > >>>>>> +#include <asm/processor.h> > >>>>>> +#include <asm/vfp.h> > >>>>>> + > >>>>>> +void vfp_save_state(struct vcpu *v) > >>>>>> +{ > >>>>>> + v->arch.vfp.fpexc = READ_CP32(FPEXC); > >>>>>> + > >>>>>> + WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC); > >>>>>> + > >>>>>> + v->arch.vfp.fpscr = READ_CP32(FPSCR); > >>>>>> + > >>>>>> + if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */ > >>>>>> + { > >>>>>> + v->arch.vfp.fpinst = READ_CP32(FPINST); > >>>>>> + > >>>>>> + if ( v->arch.vfp.fpexc & FPEXC_FP2V ) > >>>>>> + v->arch.vfp.fpinst2 = READ_CP32(FPINST2); > >>>>>> + /* Disable FPEXC_EX */ > >>>>>> + WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC); > >>>>>> + } > >>>>>> + > >>>>>> + /* Save {d0-d15} */ > >>>>>> + asm volatile("stc p11, cr0, [%0], #32*4" > >>>>>> + : : "r" (v->arch.vfp.fpregs1) > >>>>>> + : "memory"); > >>>>> > >>>>> http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html suggests that > >>>>> "=m" (v->arch.vfp.fpregs1) or "=Q" (...) as output constraints might do > >>>>> the job without clobbering the whole of memory. > >>>> > >>>> I''m not sure to fully understand the concept behind "=m". Does it mean > >>>> that gcc will clobber all the memory range addressed by fpregs? > >>> > >>> I''m not totally confident in this stuff myself.... > >>> > >>> Apparently the "=" modified means[0] "this operand is write-only for > >>> this instruction: the previous value is discarded and replaced by output > >>> data." In the case of a memory constraint you''d want to hope that > >>> "discarded and replaced" would be equivalent to clobbering the address, > >>> at least in cases where the compiler knows the size of the thing, as it > >>> does here. > >>> > >>> Ian. > >>> > >>> [0] http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/Modifiers.html#Modifiers > >>> > >> > >> > >> Shall I use this contrainst for the stc instructions and send a new > >> patch series? > > > > If you buy my reasoning and if it works then I guess so. > > > Hi, > > So I gave a try and it doesn''t work.That''s a shame, oh well we''ll have to use the big clobber then -- not the end of the world.> The modifiers =m replaces %0 by [rn,imm] which is wrong. So it''s not > possible to specify the amount of data which the instruction need to > read (the #32*4) because pre-index and post-index aren''t allowed in a > same instruction. > > If I remove #32*4, the instruction is different. With objdump on each > generate object: > - vstmia r3!, {d0,d15} > - vstr d0, [rn, imm] > > -- > Julien >
Ian Campbell
2013-Jun-13 16:39 UTC
Re: [PATCH v3 2/2] xen/arm32: implement VFP context switch
On Wed, 2013-06-12 at 16:14 +0100, Ian Campbell wrote:> On Wed, 2013-06-05 at 14:06 +0100, Julien Grall wrote: > > On 06/03/2013 04:14 PM, Ian Campbell wrote: > > > > > On Mon, 2013-06-03 at 15:57 +0100, Julien Grall wrote: > > >> On 06/03/2013 03:38 PM, Ian Campbell wrote: > > >> > > >>> On Mon, 2013-06-03 at 15:32 +0100, Julien Grall wrote: > > >>>> On 06/03/2013 03:15 PM, Ian Campbell wrote: > > >>>> > > >>>>> On Mon, 2013-06-03 at 15:00 +0100, Julien Grall wrote: > > >>>>>> Add support for VFP context switch on arm32 and a dummy support for arm64 > > >>>>>> > > >>>>>> Signed-off-by: Julien Grall <julien.grall@linaro.org> > > >>>>>> > > >>>>>> Changes in v3: > > >>>>>> - Add vfp_init to check if the processor supports VFP 3 > > >>>>>> - Add clobber memory > > >>>>>> - Remove tmps > > >>>>>> - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h > > >>>>>> > > >>>>>> Changes in v2: > > >>>>>> - Fix all the small errors (type, lost headers...) > > >>>>>> - Add some comments > > >>>>>> --- > > >>>>>> xen/arch/arm/arm32/Makefile | 1 + > > >>>>>> xen/arch/arm/arm32/vfp.c | 99 +++++++++++++++++++++++++++++++++++++++ > > >>>>>> xen/arch/arm/arm64/Makefile | 1 + > > >>>>>> xen/arch/arm/arm64/vfp.c | 13 +++++ > > >>>>>> xen/arch/arm/domain.c | 7 ++- > > >>>>>> xen/include/asm-arm/arm32/vfp.h | 41 ++++++++++++++++ > > >>>>>> xen/include/asm-arm/arm64/vfp.h | 16 +++++++ > > >>>>>> xen/include/asm-arm/cpregs.h | 9 ++++ > > >>>>>> xen/include/asm-arm/domain.h | 4 ++ > > >>>>>> xen/include/asm-arm/vfp.h | 25 ++++++++++ > > >>>>>> 10 files changed, 214 insertions(+), 2 deletions(-) > > >>>>>> create mode 100644 xen/arch/arm/arm32/vfp.c > > >>>>>> create mode 100644 xen/arch/arm/arm64/vfp.c > > >>>>>> create mode 100644 xen/include/asm-arm/arm32/vfp.h > > >>>>>> create mode 100644 xen/include/asm-arm/arm64/vfp.h > > >>>>>> create mode 100644 xen/include/asm-arm/vfp.h > > >>>>>> > > >>>>>> diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile > > >>>>>> index aaf277a..b903803 100644 > > >>>>>> --- a/xen/arch/arm/arm32/Makefile > > >>>>>> +++ b/xen/arch/arm/arm32/Makefile > > >>>>>> @@ -6,5 +6,6 @@ obj-y += proc-ca15.o > > >>>>>> > > >>>>>> obj-y += traps.o > > >>>>>> obj-y += domain.o > > >>>>>> +obj-y += vfp.o > > >>>>>> > > >>>>>> obj-$(EARLY_PRINTK) += debug.o > > >>>>>> diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c > > >>>>>> new file mode 100644 > > >>>>>> index 0000000..2ece43d > > >>>>>> --- /dev/null > > >>>>>> +++ b/xen/arch/arm/arm32/vfp.c > > >>>>>> @@ -0,0 +1,99 @@ > > >>>>>> +#include <xen/sched.h> > > >>>>>> +#include <xen/init.h> > > >>>>>> +#include <asm/processor.h> > > >>>>>> +#include <asm/vfp.h> > > >>>>>> + > > >>>>>> +void vfp_save_state(struct vcpu *v) > > >>>>>> +{ > > >>>>>> + v->arch.vfp.fpexc = READ_CP32(FPEXC); > > >>>>>> + > > >>>>>> + WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC); > > >>>>>> + > > >>>>>> + v->arch.vfp.fpscr = READ_CP32(FPSCR); > > >>>>>> + > > >>>>>> + if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */ > > >>>>>> + { > > >>>>>> + v->arch.vfp.fpinst = READ_CP32(FPINST); > > >>>>>> + > > >>>>>> + if ( v->arch.vfp.fpexc & FPEXC_FP2V ) > > >>>>>> + v->arch.vfp.fpinst2 = READ_CP32(FPINST2); > > >>>>>> + /* Disable FPEXC_EX */ > > >>>>>> + WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC); > > >>>>>> + } > > >>>>>> + > > >>>>>> + /* Save {d0-d15} */ > > >>>>>> + asm volatile("stc p11, cr0, [%0], #32*4" > > >>>>>> + : : "r" (v->arch.vfp.fpregs1) > > >>>>>> + : "memory"); > > >>>>> > > >>>>> http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html suggests that > > >>>>> "=m" (v->arch.vfp.fpregs1) or "=Q" (...) as output constraints might do > > >>>>> the job without clobbering the whole of memory. > > >>>> > > >>>> I'm not sure to fully understand the concept behind "=m". Does it mean > > >>>> that gcc will clobber all the memory range addressed by fpregs? > > >>> > > >>> I'm not totally confident in this stuff myself.... > > >>> > > >>> Apparently the "=" modified means[0] "this operand is write-only for > > >>> this instruction: the previous value is discarded and replaced by output > > >>> data." In the case of a memory constraint you'd want to hope that > > >>> "discarded and replaced" would be equivalent to clobbering the address, > > >>> at least in cases where the compiler knows the size of the thing, as it > > >>> does here. > > >>> > > >>> Ian. > > >>> > > >>> [0] http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/Modifiers.html#Modifiers > > >>> > > >> > > >> > > >> Shall I use this contrainst for the stc instructions and send a new > > >> patch series? > > > > > > If you buy my reasoning and if it works then I guess so. > > > > > > Hi, > > > > So I gave a try and it doesn't work. > > That's a shame, oh well we'll have to use the big clobber then -- not > the end of the world. > > > The modifiers =m replaces %0 by [rn,imm] which is wrong. So it's not > > possible to specify the amount of data which the instruction need to > > read (the #32*4) because pre-index and post-index aren't allowed in a > > same instruction.I had a chat with Tim and he pointed out that this is exactly what the =Q output constraint is for. According to http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/Machine-Constraints.html#Machine-Constraints it is: A memory reference where the exact address is in a single register In other words it outlaws the [rn,imm] construct. Does that work? (that doc goes on to say: (`‘m’' is preferable for asm statements) lets ignore that ;-)) Ian. _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel