Mark Langsdorf
2007-Nov-02 21:35 UTC
[Xen-devel] [cpfreq][PATCH][2/2] Linux support for the architectural pstate driver
With the third generation Opteron parts, AMD switched to an architecturally defined interface for PowerNow! that uses different MSRs than previous versions. This patch brings the PowerNow! driver up to match the mainline Linux driver and provide support for all AMD parts that use or will use the architectural pstate interface. It also removes a WARN_ON statement in kernel/cpu.c that highlights a cpu hotplug locking issue in the ondemand cpufreq governor. It is only a warning message and the scope of the changes to properly surpress it is a bit large to add to the Xen 2.6.18.8 kernel. I will backport them if people think that''s a better idea. Signed-off-by: Mark Langsdorf <mark.langsdorf@amd.com> diff -r d827dfc6593e arch/i386/kernel/cpu/cpufreq/powernow-k8.c --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c Thu Nov 01 09:07:45 2007 -0600 +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c Fri Nov 02 16:41:22 2007 -0500 @@ -46,7 +46,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 2.00.00" +#define VERSION "version 2.20.00" #include "powernow-k8.h" /* serialize freq changes */ @@ -66,36 +66,15 @@ static u32 find_freq_from_fid(u32 fid) return 800 + (fid * 100); } - /* Return a frequency in KHz, given an input fid */ static u32 find_khz_freq_from_fid(u32 fid) { return 1000 * find_freq_from_fid(fid); } -/* Return a frequency in MHz, given an input fid and did */ -static u32 find_freq_from_fiddid(u32 fid, u32 did) -{ - return 100 * (fid + 0x10) >> did; -} - -static u32 find_khz_freq_from_fiddid(u32 fid, u32 did) -{ - return 1000 * find_freq_from_fiddid(fid, did); -} - -static u32 find_fid_from_pstate(u32 pstate) -{ - u32 hi, lo; - rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); - return lo & HW_PSTATE_FID_MASK; -} - -static u32 find_did_from_pstate(u32 pstate) -{ - u32 hi, lo; - rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); - return (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; +static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 pstate) +{ + return data[pstate].frequency; } /* Return the vco fid for an input fid @@ -139,9 +118,7 @@ static int query_current_values_with_pen if (cpu_family == CPU_HW_PSTATE) { rdmsr(MSR_PSTATE_STATUS, lo, hi); i = lo & HW_PSTATE_MASK; - rdmsr(MSR_PSTATE_DEF_BASE + i, lo, hi); - data->currfid = lo & HW_PSTATE_FID_MASK; - data->currdid = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; + data->currpstate = i; return 0; } do { @@ -292,7 +269,7 @@ static int transition_pstate(struct powe static int transition_pstate(struct powernow_k8_data *data, u32 pstate) { wrmsr(MSR_PSTATE_CTRL, pstate, 0); - data->currfid = find_fid_from_pstate(pstate); + data->currpstate = pstate; return 0; } @@ -882,41 +859,23 @@ static int fill_powernow_table_pstate(st static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) { int i; + u32 hi = 0, lo = 0; + rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); + data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; for (i = 0; i < data->acpi_data->state_count; i++) { u32 index; - u32 hi = 0, lo = 0; - u32 fid; - u32 did; index = data->acpi_data->states[i].control & HW_PSTATE_MASK; - if (index > MAX_HW_PSTATE) { + if (index > data->max_hw_pstate) { printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); - } - rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); - if (!(hi & HW_PSTATE_VALID_MASK)) { - dprintk("invalid pstate %d, ignoring\n", index); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; continue; } - fid = lo & HW_PSTATE_FID_MASK; - did = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; - - dprintk(" %d : fid 0x%x, did 0x%x\n", index, fid, did); - - powernow_table[i].index = index | (fid << HW_FID_INDEX_SHIFT) | (did << HW_DID_INDEX_SHIFT); - - powernow_table[i].frequency = find_khz_freq_from_fiddid(fid, did); - - if (powernow_table[i].frequency != (data->acpi_data->states[i].core_frequency * 1000)) { - printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", - powernow_table[i].frequency, - (unsigned int) (data->acpi_data->states[i].core_frequency * 1000)); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } + powernow_table[i].index = index; + powernow_table[i].frequency = data->acpi_data->states[i].core_frequency * 1000; + } return 0; } @@ -1058,22 +1017,18 @@ static int transition_frequency_fidvid(s /* Take a frequency, and issue the hardware pstate transition command */ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index) { - u32 fid = 0; - u32 did = 0; u32 pstate = 0; int res, i; struct cpufreq_freqs freqs; dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); - /* get fid did for hardware pstate transition */ + /* get MSR index for hardware pstate transition */ pstate = index & HW_PSTATE_MASK; - if (pstate > MAX_HW_PSTATE) + if (pstate > data->max_hw_pstate) return 0; - fid = (index & HW_FID_INDEX_MASK) >> HW_FID_INDEX_SHIFT; - did = (index & HW_DID_INDEX_MASK) >> HW_DID_INDEX_SHIFT; - freqs.old = find_khz_freq_from_fiddid(data->currfid, data->currdid); - freqs.new = find_khz_freq_from_fiddid(fid, did); + freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); + freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); for_each_cpu_mask(i, *(data->available_cores)) { freqs.cpu = i; @@ -1081,9 +1036,7 @@ static int transition_frequency_pstate(s } res = transition_pstate(data, pstate); - data->currfid = find_fid_from_pstate(pstate); - data->currdid = find_did_from_pstate(pstate); - freqs.new = find_khz_freq_from_fiddid(data->currfid, data->currdid); + freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); for_each_cpu_mask(i, *(data->available_cores)) { freqs.cpu = i; @@ -1128,10 +1081,7 @@ static int powernowk8_target(struct cpuf if (query_current_values_with_pending_wait(data)) goto err_out; - if (cpu_family == CPU_HW_PSTATE) - dprintk("targ: curr fid 0x%x, did 0x%x\n", - data->currfid, data->currvid); - else { + if (cpu_family != CPU_HW_PSTATE) { dprintk("targ: curr fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); @@ -1162,7 +1112,7 @@ static int powernowk8_target(struct cpuf mutex_unlock(&fidvid_mutex); if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); + pol->cur = find_khz_freq_from_pstate(data->powernow_table, newstate); else pol->cur = find_khz_freq_from_fid(data->currfid); ret = 0; @@ -1259,7 +1209,7 @@ static int __cpuinit powernowk8_cpu_init + (3 * (1 << data->irt) * 10)) * 1000; if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); + pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); else pol->cur = find_khz_freq_from_fid(data->currfid); dprintk("policy current frequency %d kHz\n", pol->cur); @@ -1276,8 +1226,7 @@ static int __cpuinit powernowk8_cpu_init cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); if (cpu_family == CPU_HW_PSTATE) - dprintk("cpu_init done, current fid 0x%x, did 0x%x\n", - data->currfid, data->currdid); + dprintk("cpu_init done, current pstate 0x%x\n", data->currpstate); else dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); @@ -1332,7 +1281,10 @@ static unsigned int powernowk8_get (unsi if (query_current_values_with_pending_wait(data)) goto out; - khz = find_khz_freq_from_fid(data->currfid); + if (cpu_family == CPU_HW_PSTATE) + khz = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); + else + khz = find_khz_freq_from_fid(data->currfid); out: set_cpus_allowed(current, oldmask); diff -r d827dfc6593e kernel/cpu.c --- a/kernel/cpu.c Thu Nov 01 09:07:45 2007 -0600 +++ b/kernel/cpu.c Fri Nov 02 16:41:22 2007 -0500 @@ -48,7 +48,10 @@ EXPORT_SYMBOL_GPL(lock_cpu_hotplug); void unlock_cpu_hotplug(void) { +/* cpufreq lock-takers fixed in mainline; shut up until dom0 kernel catches up*/ +#ifdef CONFIG_XEN WARN_ON(recursive != current); +#endif if (recursive_depth) { recursive_depth--; return; _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel