Jan Beulich
2007-Oct-29 11:46 UTC
[Xen-devel] [PATCH] x86: allow pv guests to disable TSC for applications
Linux, under CONFIG_SECCOMP, has been capable of hiding the TSC from processes for quite a while. This patch enables this to actually work for pv kernels, by allowing them to control CR4.TSD (and, as a simple thing to do at the same time, CR4.DE). Applies cleanly only on top of the previously submitted debug register handling patch. Signed-off-by: Jan Beulich <jbeulich@novell.com> Index: 2007-10-10/xen/arch/x86/acpi/power.c ==================================================================--- 2007-10-10.orig/xen/arch/x86/acpi/power.c 2007-10-29 11:19:58.000000000 +0100 +++ 2007-10-10/xen/arch/x86/acpi/power.c 2007-10-26 15:08:38.000000000 +0200 @@ -155,6 +155,8 @@ static int enter_state(u32 state) pmprintk(XENLOG_DEBUG, "Back to C."); + write_cr4(idle_vcpu[smp_processor_id()]->arch.cr4); + device_power_up(); pmprintk(XENLOG_INFO, "Finishing wakeup from ACPI S%d state.", state); Index: 2007-10-10/xen/arch/x86/domain.c ==================================================================--- 2007-10-10.orig/xen/arch/x86/domain.c 2007-10-26 16:47:52.000000000 +0200 +++ 2007-10-10/xen/arch/x86/domain.c 2007-10-29 11:20:35.000000000 +0100 @@ -413,6 +413,8 @@ int vcpu_initialise(struct vcpu *v) v->arch.schedule_tail = continue_idle_domain; v->arch.cr3 = __pa(idle_pg_table); } + else + v->arch.cr4 = mmu_cr4_features; } v->arch.perdomain_ptes @@ -1195,6 +1197,12 @@ static void paravirt_ctxt_switch_to(stru set_int80_direct_trap(v); switch_kernel_stack(v); + if ( unlikely(idle_vcpu[v->processor]->arch.cr4 != v->arch.cr4) ) + { + idle_vcpu[v->processor]->arch.cr4 = v->arch.cr4; + write_cr4(v->arch.cr4); + } + /* Maybe switch the debug registers. */ cond_loaddebug(v, 0); cond_loaddebug(v, 1); Index: 2007-10-10/xen/arch/x86/flushtlb.c ==================================================================--- 2007-10-10.orig/xen/arch/x86/flushtlb.c 2007-10-29 11:19:58.000000000 +0100 +++ 2007-10-10/xen/arch/x86/flushtlb.c 2007-10-29 12:03:23.000000000 +0100 @@ -23,6 +23,19 @@ u32 tlbflush_clock = 1U; DEFINE_PER_CPU(u32, tlbflush_time); +static inline void __pge_off(void) +{ + write_cr4(mmu_cr4_features & ~X86_CR4_PGE); +} + +static inline void __pge_on(void) +{ + struct vcpu *idle = idle_vcpu[smp_processor_id()]; + + write_cr4(likely(idle != NULL) && likely(idle != INVALID_VCPU) && + likely(idle->arch.cr4) ? idle->arch.cr4 : mmu_cr4_features); +} + /* * pre_flush(): Increment the virtual TLB-flush clock. Returns new clock value. * Index: 2007-10-10/xen/arch/x86/hvm/vmx/vmcs.c ==================================================================--- 2007-10-10.orig/xen/arch/x86/hvm/vmx/vmcs.c 2007-10-29 11:19:58.000000000 +0100 +++ 2007-10-10/xen/arch/x86/hvm/vmx/vmcs.c 2007-10-26 14:35:40.000000000 +0200 @@ -498,7 +498,7 @@ static int construct_vmcs(struct vcpu *v /* Host control registers. */ __vmwrite(HOST_CR0, read_cr0() | X86_CR0_TS); - __vmwrite(HOST_CR4, read_cr4()); + __vmwrite(HOST_CR4, mmu_cr4_features); /* Host CS:RIP. */ __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS); Index: 2007-10-10/xen/arch/x86/hvm/vmx/vmx.c ==================================================================--- 2007-10-10.orig/xen/arch/x86/hvm/vmx/vmx.c 2007-10-29 09:01:31.000000000 +0100 +++ 2007-10-10/xen/arch/x86/hvm/vmx/vmx.c 2007-10-29 09:10:52.000000000 +0100 @@ -728,6 +728,7 @@ static void vmx_ctxt_switch_from(struct static void vmx_ctxt_switch_to(struct vcpu *v) { + idle_vcpu[v->processor]->arch.cr4 = mmu_cr4_features; vmx_restore_guest_msrs(v); vmx_restore_dr(v); } Index: 2007-10-10/xen/arch/x86/setup.c ==================================================================--- 2007-10-10.orig/xen/arch/x86/setup.c 2007-10-29 11:19:58.000000000 +0100 +++ 2007-10-10/xen/arch/x86/setup.c 2007-10-29 12:00:01.000000000 +0100 @@ -412,7 +412,7 @@ void __init __start_xen(unsigned long mb parse_video_info(); - set_current((struct vcpu *)0xfffff000); /* debug sanity */ + set_current(INVALID_VCPU); /* debug sanity */ idle_vcpu[0] = current; set_processor_id(0); /* needed early, for smp_processor_id() */ Index: 2007-10-10/xen/arch/x86/traps.c ==================================================================--- 2007-10-10.orig/xen/arch/x86/traps.c 2007-10-26 15:53:13.000000000 +0200 +++ 2007-10-10/xen/arch/x86/traps.c 2007-10-26 16:55:03.000000000 +0200 @@ -1717,10 +1717,24 @@ static int emulate_privileged_op(struct break; case 4: /* Write CR4 */ - if ( *reg != (read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE)) ) - gdprintk(XENLOG_WARNING, - "Attempt to change CR4 flags %08lx -> %08lx\n", - read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE), *reg); + res = read_cr4(); + if ( *reg != (res & ~(X86_CR4_PGE|X86_CR4_PSE)) ) + { + unsigned long hv_cr4_mask = ~X86_CR4_TSD; + + if ( cpu_has_de && IS_PRIV(v->domain) ) + hv_cr4_mask &= ~X86_CR4_DE; + if ( (*reg & hv_cr4_mask) !+ (res & hv_cr4_mask & ~(X86_CR4_PGE|X86_CR4_PSE)) ) + gdprintk(XENLOG_WARNING, + "Attempt to change CR4 flags %08lx -> %08lx\n", + res & ~(X86_CR4_PGE|X86_CR4_PSE), *reg); + res &= hv_cr4_mask; + res |= *reg & ~hv_cr4_mask; + v->arch.cr4 = res; + idle_vcpu[v->processor]->arch.cr4 = v->arch.cr4; + write_cr4(res); + } break; default: @@ -1785,6 +1799,10 @@ static int emulate_privileged_op(struct } break; + case 0x31: /* RDTSC */ + rdtsc(regs->eax, regs->edx); + break; + case 0x32: /* RDMSR */ switch ( regs->ecx ) { Index: 2007-10-10/xen/include/asm-x86/domain.h ==================================================================--- 2007-10-10.orig/xen/include/asm-x86/domain.h 2007-10-29 11:19:58.000000000 +0100 +++ 2007-10-10/xen/include/asm-x86/domain.h 2007-10-26 17:14:51.000000000 +0200 @@ -18,6 +18,8 @@ #define is_pv_32on64_vcpu(v) (is_pv_32on64_domain((v)->domain)) #define IS_COMPAT(d) (is_pv_32on64_domain(d)) +#define INVALID_VCPU ((struct vcpu *)0xfffff000) + struct trap_bounce { uint32_t error_code; uint8_t flags; /* TBF_ */ @@ -321,6 +323,7 @@ struct arch_vcpu pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */ pagetable_t monitor_table; /* (MFN) hypervisor PT (for HVM) */ unsigned long cr3; /* (MA) value to install in HW CR3 */ + unsigned long cr4; /* guest-customized CR4 value */ /* Current LDT details. */ unsigned long shadow_ldt_mapcnt; Index: 2007-10-10/xen/include/asm-x86/page.h ==================================================================--- 2007-10-10.orig/xen/include/asm-x86/page.h 2007-10-29 11:19:58.000000000 +0100 +++ 2007-10-10/xen/include/asm-x86/page.h 2007-10-26 14:21:48.000000000 +0200 @@ -294,9 +294,6 @@ void paging_init(void); void setup_idle_pagetable(void); #endif /* !defined(__ASSEMBLY__) */ -#define __pge_off() write_cr4(mmu_cr4_features & ~X86_CR4_PGE) -#define __pge_on() write_cr4(mmu_cr4_features) - #define _PAGE_PRESENT 0x001U #define _PAGE_RW 0x002U #define _PAGE_USER 0x004U _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel