Will Schmidt
2014-Jun-24 21:43 UTC
[LLVMdev] [1/5 PATCH/RFC PPC64] add power8 keyword target to llvm
Add pwr8/keyword, and initial P8 tablegen descriptor table. diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index bd58539..6badc2f 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -46,6 +46,7 @@ def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "" def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">; def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">; def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">; +def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">; def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", "Enable 64-bit instructions">; @@ -285,6 +286,15 @@ def : ProcessorModel<"pwr7", P7Model, FeaturePOPCNTD, FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */, DeprecatedMFTB, DeprecatedDST]>; +def : ProcessorModel<"pwr8", P8Model, + [DirectivePwr8, FeatureAltivec, + FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, + FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, + FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeaturePOPCNTD, FeatureLDBRX, + Feature64Bit /*, Feature64BitRegs */, + DeprecatedMFTB, DeprecatedDST]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : ProcessorModel<"ppc64", G5Model, [Directive64, FeatureAltivec, diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index 1221d41..a5cc4e7 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -118,6 +118,7 @@ include "PPCScheduleG4.td" include "PPCScheduleG4Plus.td" include "PPCScheduleG5.td" include "PPCScheduleP7.td" +include "PPCScheduleP8.td" include "PPCScheduleA2.td" include "PPCScheduleE500mc.td" include "PPCScheduleE5500.td" diff --git a/lib/Target/PowerPC/PPCScheduleP8.td b/lib/Target/PowerPC/PPCScheduleP8.td new file mode 100644 index 0000000..c4b918f --- /dev/null +++ b/lib/Target/PowerPC/PPCScheduleP8.td @@ -0,0 +1,389 @@ +//===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the POWER7 processor. +// +//===----------------------------------------------------------------------===// + +// XXX FIXME. +// this is a blind copy of P7 Schedule and s/P7/P8/g . Details within will need to be updated with the P8 specifics. + + +// Primary reference: +// IBM POWER7 multicore server processor +// B. Sinharoy, et al. +// IBM J. Res. & Dev. (55) 3. May/June 2011. + +// Scheduling for the P8 involves tracking two types of resources: +// 1. The dispatch bundle slots +// 2. The functional unit resources + +// Dispatch units: +def P8_DU1 : FuncUnit; +def P8_DU2 : FuncUnit; +def P8_DU3 : FuncUnit; +def P8_DU4 : FuncUnit; +def P8_DU5 : FuncUnit; +def P8_DU6 : FuncUnit; + +def P8_LS1 : FuncUnit; // Load/Store pipeline 1 +def P8_LS2 : FuncUnit; // Load/Store pipeline 2 + +def P8_FX1 : FuncUnit; // FX pipeline 1 +def P8_FX2 : FuncUnit; // FX pipeline 2 + +// VS pipeline 1 (vector integer ops. always here) +def P8_VS1 : FuncUnit; // VS pipeline 1 +// VS pipeline 2 (128-bit stores and perms. here) +def P8_VS2 : FuncUnit; // VS pipeline 2 + +def P8_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs) +def P8_BRU : FuncUnit; // BR unit + +// Notes: +// Each LSU pipeline can also execute FX add and logical instructions. +// Each LSU pipeline can complete a load or store in one cycle. +// +// Each store is broken into two parts, AGEN goes to the LSU while a +// "data steering" op. goes to the FXU or VSU. +// +// FX loads have a two cycle load-to-use latency (so one "bubble" cycle). +// VSU loads have a three cycle load-to-use latency (so two "bubble" cycle). +// +// Frequent FX ops. take only one cycle and results can be used again in the +// next cycle (there is a self-bypass). Getting results from the other FX +// pipeline takes an additional cycle. +// +// The VSU XS is similar to the POWER6, but with a pipeline length of 2 cycles +// (instead of 3 cycles on the POWER6). VSU XS handles vector FX-style ops. +// Dispatch of an instruction to VS1 that uses four single prec. inputs +// (either to a float or XC op). prevents dispatch in that cycle to VS2 of any +// floating point instruction. +// +// The VSU PM is similar to the POWER6, but with a pipeline length of 3 cycles +// (instead of 4 cycles on the POWER6). vsel is handled by the PM pipeline +// (unlike on the POWER6). +// +// FMA from the VSUs can forward results in 6 cycles. VS1 XS and vector FP +// share the same write-back, and have a 5-cycle latency difference, so the +// IFU/IDU will not dispatch an XS instructon 5 cycles after a vector FP +// op. has been dispatched to VS1. +// +// Three cycles after an L1 cache hit, a dependent VSU instruction can issue. +// +// Instruction dispatch groups have (at most) four non-branch instructions, and +// two branches. Unlike on the POWER4/5, a branch does not automatically +// end the dispatch group, but a second branch must be the last in the group. + +def P8Itineraries : ProcessorItineraries< + [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6, + P8_LS1, P8_LS2, P8_FX1, P8_FX2, P8_VS1, P8_VS2, P8_CRU, P8_BRU], [], [ + InstrItinData<IIC_IntSimple , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_FX1, P8_FX2, + P8_LS1, P8_LS2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntGeneral , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntCompare , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [1, 1, 1]>, + // FIXME: Add record-form itinerary data. + InstrItinData<IIC_IntDivW , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<36, [P8_FX1, P8_FX2]>], + [36, 1, 1]>, + InstrItinData<IIC_IntDivD , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<68, [P8_FX1, P8_FX2]>], + [68, 1, 1]>, + InstrItinData<IIC_IntMulHW , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [4, 1, 1]>, + InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [4, 1, 1]>, + InstrItinData<IIC_IntMulLI , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [4, 1, 1]>, + InstrItinData<IIC_IntRotate , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntRotateD , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntShift , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [1, 1, 1]>, + InstrItinData<IIC_IntTrapW , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [1, 1]>, + InstrItinData<IIC_IntTrapD , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [1, 1]>, + InstrItinData<IIC_BrB , [InstrStage<1, [P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_BRU]>], + [3, 1, 1]>, + InstrItinData<IIC_BrCR , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_CRU]>], + [3, 1, 1]>, + InstrItinData<IIC_BrMCR , [InstrStage<1, [P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_BRU]>], + [3, 1, 1]>, + InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_BRU]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLoad , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_LS1, P8_LS2]>], + [2, 1, 1]>, + InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [2, 2, 1, 1]>, + InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [3, 3, 1, 1]>, + InstrItinData<IIC_LdStLD , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_LS1, P8_LS2]>], + [2, 1, 1]>, + InstrItinData<IIC_LdStLDU , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [2, 2, 1, 1]>, + InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [3, 3, 1, 1]>, + InstrItinData<IIC_LdStLFD , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_LS1, P8_LS2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_LS1, P8_LS2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [3, 3, 1, 1]>, + InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [3, 3, 1, 1]>, + InstrItinData<IIC_LdStLHA , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LS1, P8_LS2]>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [4, 4, 1, 1]>, + InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [4, 4, 1, 1]>, + InstrItinData<IIC_LdStLWA , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LS1, P8_LS2]>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_LS1, P8_LS2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_LS1, P8_LS2]>], + [3, 1, 1]>, + InstrItinData<IIC_LdStLMW , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_LS1, P8_LS2]>], + [2, 1, 1]>, + InstrItinData<IIC_LdStStore , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [1, 1, 1]>, + InstrItinData<IIC_LdStSTD , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [1, 1, 1]>, + InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [2, 1, 1, 1]>, + InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [2, 1, 1, 1]>, + InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_VS1, P8_VS2]>], + [1, 1, 1]>, + InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2], 0>, + InstrStage<1, [P8_VS1, P8_VS2]>], + [2, 1, 1, 1]>, + InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_VS2]>], + [1, 1, 1]>, + InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_LS1, P8_LS2]>], + [1, 1, 1]>, + InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_LS1, P8_LS2]>], + [1, 1, 1]>, + InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_CRU]>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [3, 1]>, // mtcr + InstrItinData<IIC_SprMFCR , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_CRU]>], + [6, 1]>, + InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_CRU]>], + [3, 1]>, + InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_FX1]>], + [4, 1]>, // mtctr + InstrItinData<IIC_FPGeneral , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_VS1, P8_VS2]>], + [5, 1, 1]>, + InstrItinData<IIC_FPCompare , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_VS1, P8_VS2]>], + [8, 1, 1]>, + InstrItinData<IIC_FPDivD , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_VS1, P8_VS2]>], + [33, 1, 1]>, + InstrItinData<IIC_FPDivS , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_VS1, P8_VS2]>], + [27, 1, 1]>, + InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_VS1, P8_VS2]>], + [44, 1, 1]>, + InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_VS1, P8_VS2]>], + [32, 1, 1]>, + InstrItinData<IIC_FPFused , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_VS1, P8_VS2]>], + [5, 1, 1, 1]>, + InstrItinData<IIC_FPRes , [InstrStage<1, [P8_DU1, P8_DU2, + P8_DU3, P8_DU4], 0>, + InstrStage<1, [P8_VS1, P8_VS2]>], + [5, 1, 1]>, + InstrItinData<IIC_VecGeneral , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_VS1]>], + [2, 1, 1]>, + InstrItinData<IIC_VecVSL , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_VS1]>], + [2, 1, 1]>, + InstrItinData<IIC_VecVSR , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_VS1]>], + [2, 1, 1]>, + InstrItinData<IIC_VecFP , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_VS1, P8_VS2]>], + [6, 1, 1]>, + InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_VS1, P8_VS2]>], + [6, 1, 1]>, + InstrItinData<IIC_VecFPRound , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_VS1, P8_VS2]>], + [6, 1, 1]>, + InstrItinData<IIC_VecComplex , [InstrStage<1, [P8_DU1], 0>, + InstrStage<1, [P8_VS1]>], + [7, 1, 1]>, + InstrItinData<IIC_VecPerm , [InstrStage<1, [P8_DU1, P8_DU2], 0>, + InstrStage<1, [P8_VS2]>], + [3, 1, 1]> +]>; + +// ===---------------------------------------------------------------------===// +// P8 machine model for scheduling and other instruction cost heuristics. + +def P8Model : SchedMachineModel { + let IssueWidth = 6; // 4 (non-branch) instructions are dispatched per cycle. + // Note that the dispatch bundle size is 6 (including + // branches), but the total internal issue bandwidth per + // cycle (from all queues) is 8. + + let MinLatency = 0; // Out-of-order dispatch. + let LoadLatency = 3; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + let MispredictPenalty = 16; + + let Itineraries = P8Itineraries; +} + diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 8aafa99..a3a7480 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -56,6 +56,7 @@ namespace PPC { DIR_PWR6, DIR_PWR6X, DIR_PWR7, + DIR_PWR8, DIR_64 }; }
Eric Christopher
2014-Jun-24 22:13 UTC
[LLVMdev] [cfe-dev] [1/5 PATCH/RFC PPC64] add power8 keyword target to llvm
Two things: a) this didn't need to go to cfe-dev. b) the XXX Fixme: It seems like you might as well just say it's a P7 in the processor description rather than copying a known "not the same" model. The PPCSubtarget.h and PPC.td files look fine though. -eric On Tue, Jun 24, 2014 at 2:43 PM, Will Schmidt <will_schmidt at vnet.ibm.com> wrote:> Add pwr8/keyword, and initial P8 tablegen descriptor table. > > > > diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td > index bd58539..6badc2f 100644 > --- a/lib/Target/PowerPC/PPC.td > +++ b/lib/Target/PowerPC/PPC.td > @@ -46,6 +46,7 @@ def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "" > def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">; > def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">; > def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">; > +def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">; > > def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", > "Enable 64-bit instructions">; > @@ -285,6 +286,15 @@ def : ProcessorModel<"pwr7", P7Model, > FeaturePOPCNTD, FeatureLDBRX, > Feature64Bit /*, Feature64BitRegs */, > DeprecatedMFTB, DeprecatedDST]>; > +def : ProcessorModel<"pwr8", P8Model, > + [DirectivePwr8, FeatureAltivec, > + FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, > + FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, > + FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, > + FeatureFPRND, FeatureFPCVT, FeatureISEL, > + FeaturePOPCNTD, FeatureLDBRX, > + Feature64Bit /*, Feature64BitRegs */, > + DeprecatedMFTB, DeprecatedDST]>; > def : Processor<"ppc", G3Itineraries, [Directive32]>; > def : ProcessorModel<"ppc64", G5Model, > [Directive64, FeatureAltivec, > diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td > index 1221d41..a5cc4e7 100644 > --- a/lib/Target/PowerPC/PPCSchedule.td > +++ b/lib/Target/PowerPC/PPCSchedule.td > @@ -118,6 +118,7 @@ include "PPCScheduleG4.td" > include "PPCScheduleG4Plus.td" > include "PPCScheduleG5.td" > include "PPCScheduleP7.td" > +include "PPCScheduleP8.td" > include "PPCScheduleA2.td" > include "PPCScheduleE500mc.td" > include "PPCScheduleE5500.td" > diff --git a/lib/Target/PowerPC/PPCScheduleP8.td b/lib/Target/PowerPC/PPCScheduleP8.td > new file mode 100644 > index 0000000..c4b918f > --- /dev/null > +++ b/lib/Target/PowerPC/PPCScheduleP8.td > @@ -0,0 +1,389 @@ > +//===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen -*-===// > +// > +// The LLVM Compiler Infrastructure > +// > +// This file is distributed under the University of Illinois Open Source > +// License. See LICENSE.TXT for details. > +// > +//===----------------------------------------------------------------------===// > +// > +// This file defines the itinerary class data for the POWER7 processor. > +// > +//===----------------------------------------------------------------------===// > + > +// XXX FIXME. > +// this is a blind copy of P7 Schedule and s/P7/P8/g . Details within will need to be updated with the P8 specifics. > + > + > +// Primary reference: > +// IBM POWER7 multicore server processor > +// B. Sinharoy, et al. > +// IBM J. Res. & Dev. (55) 3. May/June 2011. > + > +// Scheduling for the P8 involves tracking two types of resources: > +// 1. The dispatch bundle slots > +// 2. The functional unit resources > + > +// Dispatch units: > +def P8_DU1 : FuncUnit; > +def P8_DU2 : FuncUnit; > +def P8_DU3 : FuncUnit; > +def P8_DU4 : FuncUnit; > +def P8_DU5 : FuncUnit; > +def P8_DU6 : FuncUnit; > + > +def P8_LS1 : FuncUnit; // Load/Store pipeline 1 > +def P8_LS2 : FuncUnit; // Load/Store pipeline 2 > + > +def P8_FX1 : FuncUnit; // FX pipeline 1 > +def P8_FX2 : FuncUnit; // FX pipeline 2 > + > +// VS pipeline 1 (vector integer ops. always here) > +def P8_VS1 : FuncUnit; // VS pipeline 1 > +// VS pipeline 2 (128-bit stores and perms. here) > +def P8_VS2 : FuncUnit; // VS pipeline 2 > + > +def P8_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs) > +def P8_BRU : FuncUnit; // BR unit > + > +// Notes: > +// Each LSU pipeline can also execute FX add and logical instructions. > +// Each LSU pipeline can complete a load or store in one cycle. > +// > +// Each store is broken into two parts, AGEN goes to the LSU while a > +// "data steering" op. goes to the FXU or VSU. > +// > +// FX loads have a two cycle load-to-use latency (so one "bubble" cycle). > +// VSU loads have a three cycle load-to-use latency (so two "bubble" cycle). > +// > +// Frequent FX ops. take only one cycle and results can be used again in the > +// next cycle (there is a self-bypass). Getting results from the other FX > +// pipeline takes an additional cycle. > +// > +// The VSU XS is similar to the POWER6, but with a pipeline length of 2 cycles > +// (instead of 3 cycles on the POWER6). VSU XS handles vector FX-style ops. > +// Dispatch of an instruction to VS1 that uses four single prec. inputs > +// (either to a float or XC op). prevents dispatch in that cycle to VS2 of any > +// floating point instruction. > +// > +// The VSU PM is similar to the POWER6, but with a pipeline length of 3 cycles > +// (instead of 4 cycles on the POWER6). vsel is handled by the PM pipeline > +// (unlike on the POWER6). > +// > +// FMA from the VSUs can forward results in 6 cycles. VS1 XS and vector FP > +// share the same write-back, and have a 5-cycle latency difference, so the > +// IFU/IDU will not dispatch an XS instructon 5 cycles after a vector FP > +// op. has been dispatched to VS1. > +// > +// Three cycles after an L1 cache hit, a dependent VSU instruction can issue. > +// > +// Instruction dispatch groups have (at most) four non-branch instructions, and > +// two branches. Unlike on the POWER4/5, a branch does not automatically > +// end the dispatch group, but a second branch must be the last in the group. > + > +def P8Itineraries : ProcessorItineraries< > + [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6, > + P8_LS1, P8_LS2, P8_FX1, P8_FX2, P8_VS1, P8_VS2, P8_CRU, P8_BRU], [], [ > + InstrItinData<IIC_IntSimple , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2, > + P8_LS1, P8_LS2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_IntGeneral , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_IntCompare , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [1, 1, 1]>, > + // FIXME: Add record-form itinerary data. > + InstrItinData<IIC_IntDivW , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<36, [P8_FX1, P8_FX2]>], > + [36, 1, 1]>, > + InstrItinData<IIC_IntDivD , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<68, [P8_FX1, P8_FX2]>], > + [68, 1, 1]>, > + InstrItinData<IIC_IntMulHW , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [4, 1, 1]>, > + InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [4, 1, 1]>, > + InstrItinData<IIC_IntMulLI , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [4, 1, 1]>, > + InstrItinData<IIC_IntRotate , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_IntRotateD , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_IntShift , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_IntTrapW , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [1, 1]>, > + InstrItinData<IIC_IntTrapD , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [1, 1]>, > + InstrItinData<IIC_BrB , [InstrStage<1, [P8_DU5, P8_DU6], 0>, > + InstrStage<1, [P8_BRU]>], > + [3, 1, 1]>, > + InstrItinData<IIC_BrCR , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_CRU]>], > + [3, 1, 1]>, > + InstrItinData<IIC_BrMCR , [InstrStage<1, [P8_DU5, P8_DU6], 0>, > + InstrStage<1, [P8_BRU]>], > + [3, 1, 1]>, > + InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU5, P8_DU6], 0>, > + InstrStage<1, [P8_BRU]>], > + [3, 1, 1]>, > + InstrItinData<IIC_LdStLoad , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>], > + [2, 1, 1]>, > + InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [2, 2, 1, 1]>, > + InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_DU3], 0>, > + InstrStage<1, [P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [3, 3, 1, 1]>, > + InstrItinData<IIC_LdStLD , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>], > + [2, 1, 1]>, > + InstrItinData<IIC_LdStLDU , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [2, 2, 1, 1]>, > + InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_DU3], 0>, > + InstrStage<1, [P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [3, 3, 1, 1]>, > + InstrItinData<IIC_LdStLFD , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>], > + [3, 1, 1]>, > + InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>], > + [3, 1, 1]>, > + InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [3, 3, 1, 1]>, > + InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [3, 3, 1, 1]>, > + InstrItinData<IIC_LdStLHA , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [3, 1, 1]>, > + InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [4, 4, 1, 1]>, > + InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_DU3], 0>, > + InstrStage<1, [P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [4, 4, 1, 1]>, > + InstrItinData<IIC_LdStLWA , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [3, 1, 1]>, > + InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_DU3], 0>, > + InstrStage<1, [P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>], > + [3, 1, 1]>, > + InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_DU3], 0>, > + InstrStage<1, [P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>], > + [3, 1, 1]>, > + InstrItinData<IIC_LdStLMW , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>], > + [2, 1, 1]>, > + InstrItinData<IIC_LdStStore , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_LdStSTD , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [2, 1, 1, 1]>, > + InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_DU3], 0>, > + InstrStage<1, [P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [2, 1, 1, 1]>, > + InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [2, 1, 1, 1]>, > + InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_VS2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_DU3], 0>, > + InstrStage<1, [P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_DU3], 0>, > + InstrStage<1, [P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_DU3], 0>, > + InstrStage<1, [P8_DU4], 0>, > + InstrStage<1, [P8_CRU]>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [3, 1]>, // mtcr > + InstrItinData<IIC_SprMFCR , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_CRU]>], > + [6, 1]>, > + InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_CRU]>], > + [3, 1]>, > + InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_FX1]>], > + [4, 1]>, // mtctr > + InstrItinData<IIC_FPGeneral , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [5, 1, 1]>, > + InstrItinData<IIC_FPCompare , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [8, 1, 1]>, > + InstrItinData<IIC_FPDivD , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [33, 1, 1]>, > + InstrItinData<IIC_FPDivS , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [27, 1, 1]>, > + InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [44, 1, 1]>, > + InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [32, 1, 1]>, > + InstrItinData<IIC_FPFused , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [5, 1, 1, 1]>, > + InstrItinData<IIC_FPRes , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [5, 1, 1]>, > + InstrItinData<IIC_VecGeneral , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_VS1]>], > + [2, 1, 1]>, > + InstrItinData<IIC_VecVSL , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_VS1]>], > + [2, 1, 1]>, > + InstrItinData<IIC_VecVSR , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_VS1]>], > + [2, 1, 1]>, > + InstrItinData<IIC_VecFP , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [6, 1, 1]>, > + InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [6, 1, 1]>, > + InstrItinData<IIC_VecFPRound , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [6, 1, 1]>, > + InstrItinData<IIC_VecComplex , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_VS1]>], > + [7, 1, 1]>, > + InstrItinData<IIC_VecPerm , [InstrStage<1, [P8_DU1, P8_DU2], 0>, > + InstrStage<1, [P8_VS2]>], > + [3, 1, 1]> > +]>; > + > +// ===---------------------------------------------------------------------===// > +// P8 machine model for scheduling and other instruction cost heuristics. > + > +def P8Model : SchedMachineModel { > + let IssueWidth = 6; // 4 (non-branch) instructions are dispatched per cycle. > + // Note that the dispatch bundle size is 6 (including > + // branches), but the total internal issue bandwidth per > + // cycle (from all queues) is 8. > + > + let MinLatency = 0; // Out-of-order dispatch. > + let LoadLatency = 3; // Optimistic load latency assuming bypass. > + // This is overriden by OperandCycles if the > + // Itineraries are queried instead. > + let MispredictPenalty = 16; > + > + let Itineraries = P8Itineraries; > +} > + > diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h > index 8aafa99..a3a7480 100644 > --- a/lib/Target/PowerPC/PPCSubtarget.h > +++ b/lib/Target/PowerPC/PPCSubtarget.h > @@ -56,6 +56,7 @@ namespace PPC { > DIR_PWR6, > DIR_PWR6X, > DIR_PWR7, > + DIR_PWR8, > DIR_64 > }; > } > > > _______________________________________________ > cfe-dev mailing list > cfe-dev at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-dev