Will Schmidt
2014-Jun-24  21:43 UTC
[LLVMdev] [1/5 PATCH/RFC PPC64] add power8 keyword target to llvm
Add pwr8/keyword, and initial P8 tablegen descriptor table.
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index bd58539..6badc2f 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -46,6 +46,7 @@ def DirectivePwr5x: SubtargetFeature<"",
"DarwinDirective", "PPC::DIR_PWR5X", ""
 def DirectivePwr6: SubtargetFeature<"",
"DarwinDirective", "PPC::DIR_PWR6", "">;
 def DirectivePwr6x: SubtargetFeature<"",
"DarwinDirective", "PPC::DIR_PWR6X", "">;
 def DirectivePwr7: SubtargetFeature<"",
"DarwinDirective", "PPC::DIR_PWR7", "">;
+def DirectivePwr8: SubtargetFeature<"",
"DarwinDirective", "PPC::DIR_PWR8", "">;
 
 def Feature64Bit     :
SubtargetFeature<"64bit","Has64BitSupport",
"true",
                                         "Enable 64-bit
instructions">;
@@ -285,6 +286,15 @@ def : ProcessorModel<"pwr7", P7Model,
                    FeaturePOPCNTD, FeatureLDBRX,
                    Feature64Bit /*, Feature64BitRegs */,
                    DeprecatedMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr8", P8Model,
+                  [DirectivePwr8, FeatureAltivec,
+                   FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
+                   FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+                   FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+                   FeatureFPRND, FeatureFPCVT, FeatureISEL,
+                   FeaturePOPCNTD, FeatureLDBRX,
+                   Feature64Bit /*, Feature64BitRegs */,
+                   DeprecatedMFTB, DeprecatedDST]>;
 def : Processor<"ppc", G3Itineraries, [Directive32]>;
 def : ProcessorModel<"ppc64", G5Model,
                   [Directive64, FeatureAltivec,
diff --git a/lib/Target/PowerPC/PPCSchedule.td
b/lib/Target/PowerPC/PPCSchedule.td
index 1221d41..a5cc4e7 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -118,6 +118,7 @@ include "PPCScheduleG4.td"
 include "PPCScheduleG4Plus.td"
 include "PPCScheduleG5.td"
 include "PPCScheduleP7.td"
+include "PPCScheduleP8.td"
 include "PPCScheduleA2.td"
 include "PPCScheduleE500mc.td"
 include "PPCScheduleE5500.td"
diff --git a/lib/Target/PowerPC/PPCScheduleP8.td
b/lib/Target/PowerPC/PPCScheduleP8.td
new file mode 100644
index 0000000..c4b918f
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleP8.td
@@ -0,0 +1,389 @@
+//===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen
-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the POWER7 processor.
+//
+//===----------------------------------------------------------------------===//
+
+// XXX FIXME.
+//  this is a blind copy of P7 Schedule and s/P7/P8/g .   Details within will
need to be updated with the P8 specifics.
+
+
+// Primary reference:
+// IBM POWER7 multicore server processor
+// B. Sinharoy, et al.
+// IBM J. Res. & Dev. (55) 3. May/June 2011.
+
+// Scheduling for the P8 involves tracking two types of resources:
+//  1. The dispatch bundle slots
+//  2. The functional unit resources
+
+// Dispatch units:
+def P8_DU1    : FuncUnit;
+def P8_DU2    : FuncUnit;
+def P8_DU3    : FuncUnit;
+def P8_DU4    : FuncUnit;
+def P8_DU5    : FuncUnit;
+def P8_DU6    : FuncUnit;
+
+def P8_LS1    : FuncUnit; // Load/Store pipeline 1
+def P8_LS2    : FuncUnit; // Load/Store pipeline 2
+
+def P8_FX1    : FuncUnit; // FX pipeline 1
+def P8_FX2    : FuncUnit; // FX pipeline 2
+
+// VS pipeline 1 (vector integer ops. always here)
+def P8_VS1    : FuncUnit; // VS pipeline 1
+// VS pipeline 2 (128-bit stores and perms. here)
+def P8_VS2    : FuncUnit; // VS pipeline 2
+
+def P8_CRU    : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
+def P8_BRU    : FuncUnit; // BR unit
+
+// Notes:
+// Each LSU pipeline can also execute FX add and logical instructions.
+// Each LSU pipeline can complete a load or store in one cycle.
+//
+// Each store is broken into two parts, AGEN goes to the LSU while a
+// "data steering" op. goes to the FXU or VSU.
+//
+// FX loads have a two cycle load-to-use latency (so one "bubble"
cycle).
+// VSU loads have a three cycle load-to-use latency (so two "bubble"
cycle).
+//
+// Frequent FX ops. take only one cycle and results can be used again in the
+// next cycle (there is a self-bypass). Getting results from the other FX
+// pipeline takes an additional cycle.
+//
+// The VSU XS is similar to the POWER6, but with a pipeline length of 2 cycles
+// (instead of 3 cycles on the POWER6). VSU XS handles vector FX-style ops.
+// Dispatch of an instruction to VS1 that uses four single prec. inputs
+// (either to a float or XC op). prevents dispatch in that cycle to VS2 of any
+// floating point instruction.
+//
+// The VSU PM is similar to the POWER6, but with a pipeline length of 3 cycles
+// (instead of 4 cycles on the POWER6). vsel is handled by the PM pipeline
+// (unlike on the POWER6).
+//
+// FMA from the VSUs can forward results in 6 cycles. VS1 XS and vector FP
+// share the same write-back, and have a 5-cycle latency difference, so the
+// IFU/IDU will not dispatch an XS instructon 5 cycles after a vector FP
+// op. has been dispatched to VS1.
+//
+// Three cycles after an L1 cache hit, a dependent VSU instruction can issue.
+//
+// Instruction dispatch groups have (at most) four non-branch instructions, and
+// two branches. Unlike on the POWER4/5, a branch does not automatically
+// end the dispatch group, but a second branch must be the last in the group.
+
+def P8Itineraries : ProcessorItineraries<
+  [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6,
+   P8_LS1, P8_LS2, P8_FX1, P8_FX2, P8_VS1, P8_VS2, P8_CRU, P8_BRU], [], [
+  InstrItinData<IIC_IntSimple   , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2,
+                                                  P8_LS1, P8_LS2]>],
+                                  [1, 1, 1]>,
+  InstrItinData<IIC_IntGeneral  , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [1, 1, 1]>,
+  InstrItinData<IIC_IntCompare  , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [1, 1, 1]>,
+  // FIXME: Add record-form itinerary data.
+  InstrItinData<IIC_IntDivW     , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<36, [P8_FX1, P8_FX2]>],
+                                  [36, 1, 1]>,
+  InstrItinData<IIC_IntDivD     , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<68, [P8_FX1, P8_FX2]>],
+                                  [68, 1, 1]>,
+  InstrItinData<IIC_IntMulHW    , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [4, 1, 1]>,
+  InstrItinData<IIC_IntMulHWU   , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [4, 1, 1]>,
+  InstrItinData<IIC_IntMulLI    , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [4, 1, 1]>,
+  InstrItinData<IIC_IntRotate   , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                   [1, 1, 1]>,
+  InstrItinData<IIC_IntRotateD  , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                   [1, 1, 1]>,
+  InstrItinData<IIC_IntShift    , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [1, 1, 1]>,
+  InstrItinData<IIC_IntTrapW    , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [1, 1]>,
+  InstrItinData<IIC_IntTrapD    , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [1, 1]>,
+  InstrItinData<IIC_BrB         , [InstrStage<1, [P8_DU5, P8_DU6], 0>,
+                                   InstrStage<1, [P8_BRU]>],
+                                  [3, 1, 1]>,
+  InstrItinData<IIC_BrCR        , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_CRU]>],
+                                  [3, 1, 1]>,
+  InstrItinData<IIC_BrMCR       , [InstrStage<1, [P8_DU5, P8_DU6], 0>,
+                                   InstrStage<1, [P8_BRU]>],
+                                  [3, 1, 1]>,
+  InstrItinData<IIC_BrMCRX      , [InstrStage<1, [P8_DU5, P8_DU6], 0>,
+                                   InstrStage<1, [P8_BRU]>],
+                                  [3, 1, 1]>,
+  InstrItinData<IIC_LdStLoad    , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2]>],
+                                  [2, 1, 1]>,
+  InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [2, 2, 1, 1]>,
+  InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<1, [P8_DU3], 0>,
+                                   InstrStage<1, [P8_DU4], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>,
+                                   InstrStage<1, [P8_LS1, P8_LS2], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [3, 3, 1, 1]>,
+  InstrItinData<IIC_LdStLD      , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2]>],
+                                  [2, 1, 1]>,
+  InstrItinData<IIC_LdStLDU     , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [2, 2, 1, 1]>,
+  InstrItinData<IIC_LdStLDUX    , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<1, [P8_DU3], 0>,
+                                   InstrStage<1, [P8_DU4], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>,
+                                   InstrStage<1, [P8_LS1, P8_LS2], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [3, 3, 1, 1]>,
+  InstrItinData<IIC_LdStLFD     , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2]>],
+                                  [3, 1, 1]>,
+  InstrItinData<IIC_LdStLVecX   , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2]>],
+                                  [3, 1, 1]>,
+  InstrItinData<IIC_LdStLFDU    , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [3, 3, 1, 1]>,
+  InstrItinData<IIC_LdStLFDUX   , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [3, 3, 1, 1]>,
+  InstrItinData<IIC_LdStLHA     , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2]>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [3, 1, 1]>,
+  InstrItinData<IIC_LdStLHAU    , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [4, 4, 1, 1]>,
+  InstrItinData<IIC_LdStLHAUX   , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<1, [P8_DU3], 0>,
+                                   InstrStage<1, [P8_DU4], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>,
+                                   InstrStage<1, [P8_LS1, P8_LS2], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [4, 4, 1, 1]>,
+  InstrItinData<IIC_LdStLWA     , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2]>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [3, 1, 1]>,
+  InstrItinData<IIC_LdStLWARX,    [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<1, [P8_DU3], 0>,
+                                   InstrStage<1, [P8_DU4], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2]>],
+                                  [3, 1, 1]>,
+  InstrItinData<IIC_LdStLDARX,    [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<1, [P8_DU3], 0>,
+                                   InstrStage<1, [P8_DU4], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2]>],
+                                  [3, 1, 1]>,
+  InstrItinData<IIC_LdStLMW     , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2]>],
+                                  [2, 1, 1]>,
+  InstrItinData<IIC_LdStStore   , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [1, 1, 1]>,
+  InstrItinData<IIC_LdStSTD     , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [1, 1, 1]>,
+  InstrItinData<IIC_LdStSTDU    , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [2, 1, 1, 1]>,
+  InstrItinData<IIC_LdStSTDUX   , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<1, [P8_DU3], 0>,
+                                   InstrStage<1, [P8_DU4], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [2, 1, 1, 1]>,
+  InstrItinData<IIC_LdStSTFD    , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2], 0>,
+                                   InstrStage<1, [P8_VS1, P8_VS2]>],
+                                  [1, 1, 1]>,
+  InstrItinData<IIC_LdStSTFDU   , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2], 0>,
+                                   InstrStage<1, [P8_FX1, P8_FX2], 0>,
+                                   InstrStage<1, [P8_VS1, P8_VS2]>],
+                                  [2, 1, 1, 1]>,
+  InstrItinData<IIC_LdStSTVEBX  , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2], 0>,
+                                   InstrStage<1, [P8_VS2]>],
+                                  [1, 1, 1]>,
+  InstrItinData<IIC_LdStSTDCX   , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<1, [P8_DU3], 0>,
+                                   InstrStage<1, [P8_DU4], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2]>],
+                                  [1, 1, 1]>,
+  InstrItinData<IIC_LdStSTWCX   , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<1, [P8_DU3], 0>,
+                                   InstrStage<1, [P8_DU4], 0>,
+                                   InstrStage<1, [P8_LS1, P8_LS2]>],
+                                  [1, 1, 1]>,
+  InstrItinData<IIC_BrMCRX      , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_DU2], 0>,
+                                   InstrStage<1, [P8_DU3], 0>,
+                                   InstrStage<1, [P8_DU4], 0>,
+                                   InstrStage<1, [P8_CRU]>,
+                                   InstrStage<1, [P8_FX1, P8_FX2]>],
+                                  [3, 1]>, // mtcr
+  InstrItinData<IIC_SprMFCR     , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_CRU]>],
+                                  [6, 1]>,
+  InstrItinData<IIC_SprMFCRF    , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_CRU]>],
+                                  [3, 1]>,
+  InstrItinData<IIC_SprMTSPR    , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_FX1]>],
+                                  [4, 1]>, // mtctr
+  InstrItinData<IIC_FPGeneral   , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_VS1, P8_VS2]>],
+                                  [5, 1, 1]>,
+  InstrItinData<IIC_FPCompare   , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_VS1, P8_VS2]>],
+                                  [8, 1, 1]>,
+  InstrItinData<IIC_FPDivD      , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_VS1, P8_VS2]>],
+                                  [33, 1, 1]>,
+  InstrItinData<IIC_FPDivS      , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_VS1, P8_VS2]>],
+                                  [27, 1, 1]>,
+  InstrItinData<IIC_FPSqrtD     , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_VS1, P8_VS2]>],
+                                  [44, 1, 1]>,
+  InstrItinData<IIC_FPSqrtS     , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_VS1, P8_VS2]>],
+                                  [32, 1, 1]>,
+  InstrItinData<IIC_FPFused     , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_VS1, P8_VS2]>],
+                                  [5, 1, 1, 1]>,
+  InstrItinData<IIC_FPRes       , [InstrStage<1, [P8_DU1, P8_DU2,
+                                                  P8_DU3, P8_DU4], 0>,
+                                   InstrStage<1, [P8_VS1, P8_VS2]>],
+                                  [5, 1, 1]>,
+  InstrItinData<IIC_VecGeneral  , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_VS1]>],
+                                  [2, 1, 1]>,
+  InstrItinData<IIC_VecVSL      , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_VS1]>],
+                                  [2, 1, 1]>,
+  InstrItinData<IIC_VecVSR      , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_VS1]>],
+                                  [2, 1, 1]>,
+  InstrItinData<IIC_VecFP       , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_VS1, P8_VS2]>],
+                                  [6, 1, 1]>,
+  InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_VS1, P8_VS2]>],
+                                  [6, 1, 1]>,
+  InstrItinData<IIC_VecFPRound  , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_VS1, P8_VS2]>],
+                                  [6, 1, 1]>,
+  InstrItinData<IIC_VecComplex  , [InstrStage<1, [P8_DU1], 0>,
+                                   InstrStage<1, [P8_VS1]>],
+                                  [7, 1, 1]>,
+  InstrItinData<IIC_VecPerm     , [InstrStage<1, [P8_DU1, P8_DU2], 0>,
+                                   InstrStage<1, [P8_VS2]>],
+                                  [3, 1, 1]>
+]>;
+
+//
===---------------------------------------------------------------------===//
+// P8 machine model for scheduling and other instruction cost heuristics.
+
+def P8Model : SchedMachineModel {
+  let IssueWidth = 6;  // 4 (non-branch) instructions are dispatched per cycle.
+                       // Note that the dispatch bundle size is 6 (including
+                       // branches), but the total internal issue bandwidth per
+                       // cycle (from all queues) is 8.
+
+  let MinLatency = 0;  // Out-of-order dispatch.
+  let LoadLatency = 3; // Optimistic load latency assuming bypass.
+                       // This is overriden by OperandCycles if the
+                       // Itineraries are queried instead.
+  let MispredictPenalty = 16;
+
+  let Itineraries = P8Itineraries;
+}
+
diff --git a/lib/Target/PowerPC/PPCSubtarget.h
b/lib/Target/PowerPC/PPCSubtarget.h
index 8aafa99..a3a7480 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -56,6 +56,7 @@ namespace PPC {
     DIR_PWR6,
     DIR_PWR6X,
     DIR_PWR7,
+    DIR_PWR8,
     DIR_64
   };
 }
Eric Christopher
2014-Jun-24  22:13 UTC
[LLVMdev] [cfe-dev] [1/5 PATCH/RFC PPC64] add power8 keyword target to llvm
Two things: a) this didn't need to go to cfe-dev. b) the XXX Fixme: It seems like you might as well just say it's a P7 in the processor description rather than copying a known "not the same" model. The PPCSubtarget.h and PPC.td files look fine though. -eric On Tue, Jun 24, 2014 at 2:43 PM, Will Schmidt <will_schmidt at vnet.ibm.com> wrote:> Add pwr8/keyword, and initial P8 tablegen descriptor table. > > > > diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td > index bd58539..6badc2f 100644 > --- a/lib/Target/PowerPC/PPC.td > +++ b/lib/Target/PowerPC/PPC.td > @@ -46,6 +46,7 @@ def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "" > def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">; > def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">; > def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">; > +def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">; > > def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", > "Enable 64-bit instructions">; > @@ -285,6 +286,15 @@ def : ProcessorModel<"pwr7", P7Model, > FeaturePOPCNTD, FeatureLDBRX, > Feature64Bit /*, Feature64BitRegs */, > DeprecatedMFTB, DeprecatedDST]>; > +def : ProcessorModel<"pwr8", P8Model, > + [DirectivePwr8, FeatureAltivec, > + FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, > + FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, > + FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, > + FeatureFPRND, FeatureFPCVT, FeatureISEL, > + FeaturePOPCNTD, FeatureLDBRX, > + Feature64Bit /*, Feature64BitRegs */, > + DeprecatedMFTB, DeprecatedDST]>; > def : Processor<"ppc", G3Itineraries, [Directive32]>; > def : ProcessorModel<"ppc64", G5Model, > [Directive64, FeatureAltivec, > diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td > index 1221d41..a5cc4e7 100644 > --- a/lib/Target/PowerPC/PPCSchedule.td > +++ b/lib/Target/PowerPC/PPCSchedule.td > @@ -118,6 +118,7 @@ include "PPCScheduleG4.td" > include "PPCScheduleG4Plus.td" > include "PPCScheduleG5.td" > include "PPCScheduleP7.td" > +include "PPCScheduleP8.td" > include "PPCScheduleA2.td" > include "PPCScheduleE500mc.td" > include "PPCScheduleE5500.td" > diff --git a/lib/Target/PowerPC/PPCScheduleP8.td b/lib/Target/PowerPC/PPCScheduleP8.td > new file mode 100644 > index 0000000..c4b918f > --- /dev/null > +++ b/lib/Target/PowerPC/PPCScheduleP8.td > @@ -0,0 +1,389 @@ > +//===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen -*-===// > +// > +// The LLVM Compiler Infrastructure > +// > +// This file is distributed under the University of Illinois Open Source > +// License. See LICENSE.TXT for details. > +// > +//===----------------------------------------------------------------------===// > +// > +// This file defines the itinerary class data for the POWER7 processor. > +// > +//===----------------------------------------------------------------------===// > + > +// XXX FIXME. > +// this is a blind copy of P7 Schedule and s/P7/P8/g . Details within will need to be updated with the P8 specifics. > + > + > +// Primary reference: > +// IBM POWER7 multicore server processor > +// B. Sinharoy, et al. > +// IBM J. Res. & Dev. (55) 3. May/June 2011. > + > +// Scheduling for the P8 involves tracking two types of resources: > +// 1. The dispatch bundle slots > +// 2. The functional unit resources > + > +// Dispatch units: > +def P8_DU1 : FuncUnit; > +def P8_DU2 : FuncUnit; > +def P8_DU3 : FuncUnit; > +def P8_DU4 : FuncUnit; > +def P8_DU5 : FuncUnit; > +def P8_DU6 : FuncUnit; > + > +def P8_LS1 : FuncUnit; // Load/Store pipeline 1 > +def P8_LS2 : FuncUnit; // Load/Store pipeline 2 > + > +def P8_FX1 : FuncUnit; // FX pipeline 1 > +def P8_FX2 : FuncUnit; // FX pipeline 2 > + > +// VS pipeline 1 (vector integer ops. always here) > +def P8_VS1 : FuncUnit; // VS pipeline 1 > +// VS pipeline 2 (128-bit stores and perms. here) > +def P8_VS2 : FuncUnit; // VS pipeline 2 > + > +def P8_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs) > +def P8_BRU : FuncUnit; // BR unit > + > +// Notes: > +// Each LSU pipeline can also execute FX add and logical instructions. > +// Each LSU pipeline can complete a load or store in one cycle. > +// > +// Each store is broken into two parts, AGEN goes to the LSU while a > +// "data steering" op. goes to the FXU or VSU. > +// > +// FX loads have a two cycle load-to-use latency (so one "bubble" cycle). > +// VSU loads have a three cycle load-to-use latency (so two "bubble" cycle). > +// > +// Frequent FX ops. take only one cycle and results can be used again in the > +// next cycle (there is a self-bypass). Getting results from the other FX > +// pipeline takes an additional cycle. > +// > +// The VSU XS is similar to the POWER6, but with a pipeline length of 2 cycles > +// (instead of 3 cycles on the POWER6). VSU XS handles vector FX-style ops. > +// Dispatch of an instruction to VS1 that uses four single prec. inputs > +// (either to a float or XC op). prevents dispatch in that cycle to VS2 of any > +// floating point instruction. > +// > +// The VSU PM is similar to the POWER6, but with a pipeline length of 3 cycles > +// (instead of 4 cycles on the POWER6). vsel is handled by the PM pipeline > +// (unlike on the POWER6). > +// > +// FMA from the VSUs can forward results in 6 cycles. VS1 XS and vector FP > +// share the same write-back, and have a 5-cycle latency difference, so the > +// IFU/IDU will not dispatch an XS instructon 5 cycles after a vector FP > +// op. has been dispatched to VS1. > +// > +// Three cycles after an L1 cache hit, a dependent VSU instruction can issue. > +// > +// Instruction dispatch groups have (at most) four non-branch instructions, and > +// two branches. Unlike on the POWER4/5, a branch does not automatically > +// end the dispatch group, but a second branch must be the last in the group. > + > +def P8Itineraries : ProcessorItineraries< > + [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6, > + P8_LS1, P8_LS2, P8_FX1, P8_FX2, P8_VS1, P8_VS2, P8_CRU, P8_BRU], [], [ > + InstrItinData<IIC_IntSimple , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2, > + P8_LS1, P8_LS2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_IntGeneral , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_IntCompare , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [1, 1, 1]>, > + // FIXME: Add record-form itinerary data. > + InstrItinData<IIC_IntDivW , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<36, [P8_FX1, P8_FX2]>], > + [36, 1, 1]>, > + InstrItinData<IIC_IntDivD , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<68, [P8_FX1, P8_FX2]>], > + [68, 1, 1]>, > + InstrItinData<IIC_IntMulHW , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [4, 1, 1]>, > + InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [4, 1, 1]>, > + InstrItinData<IIC_IntMulLI , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [4, 1, 1]>, > + InstrItinData<IIC_IntRotate , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_IntRotateD , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_IntShift , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_IntTrapW , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [1, 1]>, > + InstrItinData<IIC_IntTrapD , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [1, 1]>, > + InstrItinData<IIC_BrB , [InstrStage<1, [P8_DU5, P8_DU6], 0>, > + InstrStage<1, [P8_BRU]>], > + [3, 1, 1]>, > + InstrItinData<IIC_BrCR , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_CRU]>], > + [3, 1, 1]>, > + InstrItinData<IIC_BrMCR , [InstrStage<1, [P8_DU5, P8_DU6], 0>, > + InstrStage<1, [P8_BRU]>], > + [3, 1, 1]>, > + InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU5, P8_DU6], 0>, > + InstrStage<1, [P8_BRU]>], > + [3, 1, 1]>, > + InstrItinData<IIC_LdStLoad , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>], > + [2, 1, 1]>, > + InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [2, 2, 1, 1]>, > + InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_DU3], 0>, > + InstrStage<1, [P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [3, 3, 1, 1]>, > + InstrItinData<IIC_LdStLD , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>], > + [2, 1, 1]>, > + InstrItinData<IIC_LdStLDU , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [2, 2, 1, 1]>, > + InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_DU3], 0>, > + InstrStage<1, [P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [3, 3, 1, 1]>, > + InstrItinData<IIC_LdStLFD , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>], > + [3, 1, 1]>, > + InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>], > + [3, 1, 1]>, > + InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [3, 3, 1, 1]>, > + InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [3, 3, 1, 1]>, > + InstrItinData<IIC_LdStLHA , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [3, 1, 1]>, > + InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [4, 4, 1, 1]>, > + InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_DU3], 0>, > + InstrStage<1, [P8_DU4], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [4, 4, 1, 1]>, > + InstrItinData<IIC_LdStLWA , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [3, 1, 1]>, > + InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_DU3], 0>, > + InstrStage<1, [P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>], > + [3, 1, 1]>, > + InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_DU3], 0>, > + InstrStage<1, [P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>], > + [3, 1, 1]>, > + InstrItinData<IIC_LdStLMW , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>], > + [2, 1, 1]>, > + InstrItinData<IIC_LdStStore , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_LdStSTD , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [2, 1, 1, 1]>, > + InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_DU3], 0>, > + InstrStage<1, [P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2]>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [2, 1, 1, 1]>, > + InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_FX1, P8_FX2], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [2, 1, 1, 1]>, > + InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2], 0>, > + InstrStage<1, [P8_VS2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_DU3], 0>, > + InstrStage<1, [P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_DU3], 0>, > + InstrStage<1, [P8_DU4], 0>, > + InstrStage<1, [P8_LS1, P8_LS2]>], > + [1, 1, 1]>, > + InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_DU2], 0>, > + InstrStage<1, [P8_DU3], 0>, > + InstrStage<1, [P8_DU4], 0>, > + InstrStage<1, [P8_CRU]>, > + InstrStage<1, [P8_FX1, P8_FX2]>], > + [3, 1]>, // mtcr > + InstrItinData<IIC_SprMFCR , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_CRU]>], > + [6, 1]>, > + InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_CRU]>], > + [3, 1]>, > + InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_FX1]>], > + [4, 1]>, // mtctr > + InstrItinData<IIC_FPGeneral , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [5, 1, 1]>, > + InstrItinData<IIC_FPCompare , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [8, 1, 1]>, > + InstrItinData<IIC_FPDivD , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [33, 1, 1]>, > + InstrItinData<IIC_FPDivS , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [27, 1, 1]>, > + InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [44, 1, 1]>, > + InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [32, 1, 1]>, > + InstrItinData<IIC_FPFused , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [5, 1, 1, 1]>, > + InstrItinData<IIC_FPRes , [InstrStage<1, [P8_DU1, P8_DU2, > + P8_DU3, P8_DU4], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [5, 1, 1]>, > + InstrItinData<IIC_VecGeneral , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_VS1]>], > + [2, 1, 1]>, > + InstrItinData<IIC_VecVSL , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_VS1]>], > + [2, 1, 1]>, > + InstrItinData<IIC_VecVSR , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_VS1]>], > + [2, 1, 1]>, > + InstrItinData<IIC_VecFP , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [6, 1, 1]>, > + InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [6, 1, 1]>, > + InstrItinData<IIC_VecFPRound , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_VS1, P8_VS2]>], > + [6, 1, 1]>, > + InstrItinData<IIC_VecComplex , [InstrStage<1, [P8_DU1], 0>, > + InstrStage<1, [P8_VS1]>], > + [7, 1, 1]>, > + InstrItinData<IIC_VecPerm , [InstrStage<1, [P8_DU1, P8_DU2], 0>, > + InstrStage<1, [P8_VS2]>], > + [3, 1, 1]> > +]>; > + > +// ===---------------------------------------------------------------------===// > +// P8 machine model for scheduling and other instruction cost heuristics. > + > +def P8Model : SchedMachineModel { > + let IssueWidth = 6; // 4 (non-branch) instructions are dispatched per cycle. > + // Note that the dispatch bundle size is 6 (including > + // branches), but the total internal issue bandwidth per > + // cycle (from all queues) is 8. > + > + let MinLatency = 0; // Out-of-order dispatch. > + let LoadLatency = 3; // Optimistic load latency assuming bypass. > + // This is overriden by OperandCycles if the > + // Itineraries are queried instead. > + let MispredictPenalty = 16; > + > + let Itineraries = P8Itineraries; > +} > + > diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h > index 8aafa99..a3a7480 100644 > --- a/lib/Target/PowerPC/PPCSubtarget.h > +++ b/lib/Target/PowerPC/PPCSubtarget.h > @@ -56,6 +56,7 @@ namespace PPC { > DIR_PWR6, > DIR_PWR6X, > DIR_PWR7, > + DIR_PWR8, > DIR_64 > }; > } > > > _______________________________________________ > cfe-dev mailing list > cfe-dev at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-dev