MWCC/compiler_and_linker/BackEnd/PowerPC/Scheduler/MachineSimulationAltiVec.c

753 lines
31 KiB
C

#include "compiler/Scheduler.h"
#include "compiler/CError.h"
#include "compiler/PCode.h"
#include "compiler/PCodeInfo.h"
// https://www.nxp.com/docs/en/reference-manual/MPC7450UM.pdf
typedef enum Stage {
BPU, // Branch Prediction Unit
IU2_1, // Multiple-Cycle Integer Unit
IU2_2,
IU2_3,
IU1a, // Single-Cycle Integer Unit
IU1b, // Single-Cycle Integer Unit
IU1c, // Single-Cycle Integer Unit
LSU_1, // Load/Store Unit
LSU_2,
LSU_3,
LSU_4,
FPU_1, // Floating-Point Unit
FPU_2,
FPU_3,
FPU_4,
VIU1, // Vector Simple Integer Unit
VPU_1, // Vector Permute Unit
VPU_2,
VIU2_1, // Vector Complex Integer Unit
VIU2_2,
VIU2_3,
VIU2_4,
VFPU_1, // Vector Floating-Point Unit
VFPU_2,
VFPU_3,
VFPU_4,
NumStages
} Stage;
static struct {
// the instruction currently in this pipeline stage
PCode *instr;
// how many cycles are left for this instruction to finish
int remaining;
} pipeline_altivec[NumStages];
enum {
Queue0,
Queue1,
Queue2,
Queue3,
Queue4,
Queue5,
Queue6,
Queue7,
NumQueues
};
static int fetchqueues[NumQueues];
enum {
MaxEntries = 16
};
static struct {
// how many entries remain unused in the queue
unsigned int free;
// how many entries are currently used in the queue
unsigned int used;
// the index of the next instruction that will be retired
unsigned int nextToRetire;
// the index of the next free slot that will be used when an instruction is dispatched
unsigned int nextFreeSlot;
// circular array of entries in the completion queue
struct {
PCode *instr;
int completed;
} entries[MaxEntries];
} completionbuffers;
static struct {
short index;
// the initial stage for this instruction
Stage stage;
// the total amount of cycles required by this instruction
char latency;
// how long it takes to finish each stage
char cycles[4];
// does this instruction serialise?
char serializes;
char unused;
} instruction_timing[] = {
0, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_B
1, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BL
2, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BC
3, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCLR
4, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCCTR
5, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BT
6, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BTLR
7, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BTCTR
8, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BF
9, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BFLR
10, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BFCTR
11, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDNZ
12, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDNZT
13, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDNZF
14, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDZ
15, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDZT
16, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDZF
17, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BLR
18, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCTR
19, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCTRL
20, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BLRL
21, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LBZ
22, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LBZU
23, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LBZX
24, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LBZUX
25, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHZ
26, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHZU
27, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHZX
28, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHZUX
29, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHA
30, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHAU
31, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHAX
32, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHAUX
33, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHBRX
34, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LWZ
35, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LWZU
36, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LWZX
37, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LWZUX
38, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LWBRX
39, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_LMW
40, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STB
41, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STBU
42, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STBX
43, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STBUX
44, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STH
45, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STHU
46, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STHX
47, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STHUX
48, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STHBRX
49, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STW
50, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STWU
51, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STWX
52, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STWUX
53, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STWBRX
54, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STMW
55, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_DCBF
56, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_DCBST
57, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DCBT
58, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DCBTST
59, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DCBZ
60, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADD
61, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDC
62, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDE
63, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDI
64, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDIC
65, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDICR
66, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDIS
67, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDME
68, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDZE
69, IU2_1, 23, 23, 0, 0, 0, 0, 0, // PC_DIVW
70, IU2_1, 23, 23, 0, 0, 0, 0, 0, // PC_DIVWU
71, IU2_1, 4, 2, 2, 0, 0, 0, 0, // PC_MULHW
72, IU2_1, 4, 2, 2, 0, 0, 0, 0, // PC_MULHWU
73, IU2_1, 3, 1, 1, 1, 0, 0, 0, // PC_MULLI
74, IU2_1, 4, 2, 2, 0, 0, 0, 0, // PC_MULLW
75, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_NEG
76, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SUBF
77, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFC
78, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFE
79, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFIC
80, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFME
81, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFZE
82, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_CMPI
83, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_CMP
84, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_CMPLI
85, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_CMPL
86, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ANDI
87, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ANDIS
88, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ORI
89, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ORIS
90, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_XORI
91, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_XORIS
92, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_AND
93, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_OR
94, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_XOR
95, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_NAND
96, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_NOR
97, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_EQV
98, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ANDC
99, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ORC
100, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_EXTSB
101, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_EXTSH
102, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_CNTLZW
103, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_RLWINM
104, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_RLWNM
105, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_RLWIMI
106, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SLW
107, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SRW
108, IU1a, 2, 2, 0, 0, 0, 0, 0, // PC_SRAWI
109, IU1a, 2, 2, 0, 0, 0, 0, 0, // PC_SRAW
110, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CRAND
111, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CRANDC
112, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CREQV
113, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CRNAND
114, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CRNOR
115, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CROR
116, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CRORC
117, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CRXOR
118, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_MCRF
119, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTXER
120, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTCTR
121, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTLR
122, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_MTCRF
123, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTMSR
124, IU2_1, 3, 3, 0, 0, 0, 1, 0, // PC_MTSPR
125, IU2_1, 3, 2, 1, 0, 0, 0, 0, // PC_MFMSR
126, IU2_1, 3, 3, 0, 0, 0, 1, 0, // PC_MFSPR
127, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_MFXER
128, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_MFCTR
129, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_MFLR
130, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MFCR
131, FPU_1, 5, 5, 0, 0, 0, 1, 0, // PC_MFFS
132, FPU_1, 5, 5, 0, 0, 0, 1, 0, // PC_MTFSF
133, LSU_1, 3, 3, 0, 0, 0, 1, 0, // PC_EIEIO
134, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_ISYNC
135, LSU_1, 35, 35, 0, 0, 0, 1, 0, // PC_SYNC
136, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_RFI
137, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_LI
138, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_LIS
139, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_MR
140, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_NOP
141, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_NOT
142, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFS
143, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFSU
144, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFSX
145, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFSUX
146, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFD
147, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFDU
148, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFDX
149, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFDUX
150, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFS
151, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFSU
152, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFSX
153, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFSUX
154, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFD
155, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFDU
156, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFDX
157, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFDUX
158, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMR
159, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FABS
160, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FNEG
161, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FNABS
162, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FADD
163, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FADDS
164, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FSUB
165, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FSUBS
166, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMUL
167, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMULS
168, FPU_1, 35, 35, 0, 0, 0, 0, 0, // PC_FDIV
169, FPU_1, 21, 21, 0, 0, 0, 0, 0, // PC_FDIVS
170, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMADD
171, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMADDS
172, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMSUB
173, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMSUBS
174, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FNMADD
175, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FNMADDS
176, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FNMSUB
177, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FNMSUBS
178, FPU_1, 14, 14, 0, 0, 0, 0, 0, // PC_FRES
179, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FRSQRTE
180, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FSEL
181, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FRSP
182, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FCTIW
183, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FCTIWZ
184, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FCMPU
185, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FCMPO
186, LSU_1, 3, 3, 0, 0, 0, 1, 0, // PC_LWARX
187, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LSWI
188, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_LSWX
189, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STFIWX
190, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STSWI
191, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STSWX
192, LSU_1, 3, 1, 1, 1, 0, 1, 0, // PC_STWCX
193, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_ECIWX
194, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_ECOWX
195, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_DCBI
196, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_ICBI
197, IU2_1, 5, 5, 0, 0, 0, 1, 0, // PC_MCRFS
198, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MCRXR
199, IU2_1, 5, 5, 0, 0, 0, 0, 0, // PC_MFTB
200, IU2_1, 4, 1, 3, 0, 0, 0, 0, // PC_MFSR
201, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTSR
202, IU2_1, 4, 1, 3, 0, 0, 0, 0, // PC_MFSRIN
203, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTSRIN
204, FPU_1, 5, 5, 0, 0, 0, 1, 0, // PC_MTFSB0
205, FPU_1, 5, 5, 0, 0, 0, 1, 0, // PC_MTFSB1
206, FPU_1, 5, 5, 0, 0, 0, 0, 0, // PC_MTFSFI
207, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_SC
208, FPU_1, 1, 1, 0, 0, 0, 0, 0, // PC_FSQRT
209, FPU_1, 1, 1, 0, 0, 0, 0, 0, // PC_FSQRTS
210, LSU_1, 1, 1, 0, 0, 0, 0, 0, // PC_TLBIA
211, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_TLBIE
212, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_TLBLD
213, LSU_1, 3, 3, 0, 0, 0, 1, 0, // PC_TLBLI
214, LSU_1, 3, 3, 0, 0, 0, 1, 0, // PC_TLBSYNC
215, IU1a, 2, 2, 0, 0, 0, 0, 0, // PC_TW
216, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_TRAP
217, IU1a, 2, 2, 0, 0, 0, 0, 0, // PC_TWI
218, IU1a, 1, 1, 0, 0, 0, 1, 0, // PC_OPWORD
219, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_MFROM
220, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_DSA
221, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ESA
222, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_DCCCI
223, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_DCREAD
224, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_ICBT
225, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_ICCCI
226, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_ICREAD
227, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_RFCI
228, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_TLBRE
229, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_TLBSX
230, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_TLBWE
231, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_WRTEE
232, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_WRTEEI
233, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_MFDCR
234, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_MTDCR
235, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DCBA
236, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DSS
237, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DSSALL
238, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DST
239, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DSTT
240, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DSTST
241, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DSTSTT
242, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVEBX
243, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVEHX
244, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVEWX
245, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVSL
246, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVSR
247, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVX
248, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVXL
249, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STVEBX
250, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STVEHX
251, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STVEWX
252, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STVX
253, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STVXL
254, VFPU_1, 2, 2, 0, 0, 0, 1, 0, // PC_MFVSCR
255, VFPU_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTVSCR
256, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDCUW
257, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VADDFP
258, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDSBS
259, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDSHS
260, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDSWS
261, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUBM
262, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUBS
263, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUHM
264, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUHS
265, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUWM
266, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUWS
267, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAND
268, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VANDC
269, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGSB
270, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGSH
271, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGSW
272, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGUB
273, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGUH
274, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGUW
275, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VCFSX
276, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VCFUX
277, VFPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VCMPBFP
278, VFPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VCMPEQFP
279, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPEQUB
280, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPEQUH
281, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPEQUW
282, VFPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VCMPGEFP
283, VFPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VCMPGTFP
284, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTSB
285, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTSH
286, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTSW
287, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTUB
288, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTUH
289, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTUW
290, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VCTSXS
291, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VCTUXS
292, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VEXPTEFP
293, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VLOGEFP
294, VFPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMAXFP
295, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXSB
296, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXSH
297, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXSW
298, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXUB
299, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXUH
300, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXUW
301, VFPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMINFP
302, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMINSB
303, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMINSH
304, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMINSW
305, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMINUB
306, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMINUH
307, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMINUW
308, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRGHB
309, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRGHH
310, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRGHW
311, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRGLB
312, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRGLH
313, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRGLW
314, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULESB
315, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULESH
316, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULEUB
317, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULEUH
318, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULOSB
319, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULOSH
320, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULOUB
321, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULOUH
322, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VNOR
323, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VOR
324, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKPX
325, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKSHSS
326, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKSHUS
327, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKSWSS
328, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKSWUS
329, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKUHUM
330, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKUHUS
331, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKUWUM
332, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKUWUS
333, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VREFP
334, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIM
335, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIN
336, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIP
337, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIZ
338, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VRLB
339, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VRLH
340, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VRLW
341, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VRSQRTEFP
342, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSL
343, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSLB
344, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSLH
345, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSLO
346, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSLW
347, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSPLTB
348, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSPLTH
349, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSPLTW
350, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSPLTISB
351, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSPLTISH
352, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSPLTISW
353, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSR
354, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSRAB
355, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSRAH
356, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSRAW
357, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSRB
358, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSRH
359, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSRO
360, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSRW
361, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBCUW
362, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUBFP
363, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBSBS
364, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBSHS
365, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBSWS
366, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUBM
367, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUBS
368, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUHM
369, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUHS
370, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUWM
371, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUWS
372, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUMSWS
373, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUM2SWS
374, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUM4SBS
375, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUM4SHS
376, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUM4UBS
377, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VUPKHPX
378, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VUPKHSB
379, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VUPKHSH
380, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VUPKLPX
381, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VUPKLSB
382, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VUPKLSH
383, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VXOR
384, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMADDFP
385, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMHADDSHS
386, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMHRADDSHS
387, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMLADDUHM
388, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMSUMMBM
389, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMSUMSHM
390, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMSUMSHS
391, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMSUMUBM
392, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMSUMUHM
393, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMSUMUHS
394, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VNMSUBFP
395, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPERM
396, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSEL
397, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSLDOI
398, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMR
399, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRP
-1, IU2_1, 1, 1, 0, 0, 0, 1, 0
};
enum { NumPipelineUnits = 6 };
static struct {
Stage start, end;
} pipeline_units[8] = {
IU2_1, IU2_3,
LSU_1, LSU_4,
FPU_1, FPU_4,
VPU_1, VPU_2,
VIU2_1, VIU2_4,
VFPU_1, VFPU_4
};
enum { NumFinalStages = 11 };
static Stage finalstages[16] = {
BPU, IU2_3, IU1a, IU1b,
IU1c, LSU_4, FPU_4, VIU1,
VPU_2, VIU2_4, VFPU_4
};
// forward decl
static void complete_instruction(int stage);
static void advance(int firstStage, int oldStage, int newStage) {
PCode *instr = pipeline_altivec[oldStage].instr;
int cycles = instruction_timing[instr->op].cycles[newStage - firstStage];
pipeline_altivec[newStage].instr = instr;
pipeline_altivec[newStage].remaining = cycles;
pipeline_altivec[oldStage].instr = NULL;
pipeline_altivec[oldStage].remaining = 0;
if (cycles == 0)
complete_instruction(newStage);
}
static void assign_completion_buffer(PCode *instr) {
completionbuffers.used++;
completionbuffers.free--;
completionbuffers.entries[completionbuffers.nextFreeSlot].instr = instr;
completionbuffers.entries[completionbuffers.nextFreeSlot].completed = 0;
completionbuffers.nextFreeSlot = (completionbuffers.nextFreeSlot + 1) % MaxEntries;
}
static void complete_instruction(int stage) {
PCode *instr = pipeline_altivec[stage].instr;
int buf = 0;
while (buf < MaxEntries && completionbuffers.entries[buf].instr != instr)
buf++;
completionbuffers.entries[buf].completed = 1;
pipeline_altivec[stage].instr = NULL;
}
static void retire_instruction(void) {
completionbuffers.entries[completionbuffers.nextToRetire].instr = NULL;
completionbuffers.entries[completionbuffers.nextToRetire].completed = 0;
completionbuffers.used--;
completionbuffers.free++;
completionbuffers.nextToRetire = (completionbuffers.nextToRetire + 1) % MaxEntries;
}
static int latency(PCode *instr) {
int cycles = instruction_timing[instr->op].latency;
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
cycles += 1;
if (instr->op == PC_LMW || instr->op == PC_STMW)
cycles += instr->argCount - 2;
return cycles;
}
static void initialize(void) {
int stage;
int i;
fetchqueues[Queue0] = 1;
for (i = 1; i < NumQueues; i++)
fetchqueues[i] = 0;
for (stage = 0; stage < NumStages; stage++)
pipeline_altivec[stage].instr = NULL;
completionbuffers.free = MaxEntries;
completionbuffers.used = 0;
completionbuffers.nextToRetire = 0;
completionbuffers.nextFreeSlot = 0;
for (i = 0; i < MaxEntries; i++) {
completionbuffers.entries[i].instr = NULL;
completionbuffers.entries[i].completed = 0;
}
}
static int can_issue(PCode *instr) {
int stage;
if (completionbuffers.free == 0)
return 0;
stage = instruction_timing[instr->op].stage;
if (stage == IU1a) {
int isClear1 = !pipeline_altivec[IU1a].instr;
int isClear2 = !pipeline_altivec[IU1b].instr;
if (!isClear1 && !isClear2)
return 0;
} else {
if (pipeline_altivec[stage].instr)
return 0;
}
if (fetchqueues[Queue1] <= 0)
return 0;
if (stage == FPU_1) {
if (fetchqueues[Queue2] < 1 || fetchqueues[Queue5] >= 1)
return 0;
} else if (stage >= VIU1 && stage <= VFPU_1) {
if (fetchqueues[Queue4] < 1 || fetchqueues[Queue7] >= 2)
return 0;
} else if (stage != BPU) {
if (fetchqueues[Queue3] < 1 || fetchqueues[Queue6] >= 3)
return 0;
}
return 1;
}
static void issue(PCode *instr) {
int stage = instruction_timing[instr->op].stage;
int cycles = instruction_timing[instr->op].cycles[0];
assign_completion_buffer(instr);
CError_ASSERT(879, --fetchqueues[Queue1] >= 0);
if (stage == FPU_1) {
fetchqueues[Queue2]--;
fetchqueues[Queue5]++;
} else if (stage >= VIU1 && stage <= VFPU_1) {
fetchqueues[Queue4]--;
fetchqueues[Queue7]++;
} else if (stage != BPU) {
fetchqueues[Queue3]--;
fetchqueues[Queue6]++;
}
fetchqueues[Queue2] = (fetchqueues[Queue1] < fetchqueues[Queue2]) ? fetchqueues[Queue1] : fetchqueues[Queue2];
fetchqueues[Queue3] = (fetchqueues[Queue1] < fetchqueues[Queue3]) ? fetchqueues[Queue1] : fetchqueues[Queue3];
fetchqueues[Queue4] = (fetchqueues[Queue1] < fetchqueues[Queue4]) ? fetchqueues[Queue1] : fetchqueues[Queue4];
if (stage == IU1a) {
if (!pipeline_altivec[IU1a].instr)
stage = IU1a;
else if (!pipeline_altivec[IU1b].instr)
stage = IU1b;
else if (!pipeline_altivec[IU1c].instr)
stage = IU1c;
}
pipeline_altivec[stage].instr = instr;
pipeline_altivec[stage].remaining = cycles;
}
static void advance_clock(void) {
int num;
int i;
unsigned int unit;
for (i = 0; i < NumStages; i++) {
if (pipeline_altivec[i].instr && pipeline_altivec[i].remaining)
--pipeline_altivec[i].remaining;
}
for (i = 0; i < 3; i++) {
if (completionbuffers.used == 0)
break;
if (completionbuffers.entries[completionbuffers.nextToRetire].completed == 0)
break;
retire_instruction();
}
unit = 0;
do {
if (pipeline_altivec[finalstages[unit]].instr && pipeline_altivec[finalstages[unit]].remaining == 0)
complete_instruction(finalstages[unit]);
} while (++unit < NumFinalStages);
unit = 0;
do {
Stage first;
Stage current;
first = pipeline_units[unit].start;
for (current = first; current < pipeline_units[unit].end; current++) {
if (pipeline_altivec[current].instr && pipeline_altivec[current].remaining == 0 && !pipeline_altivec[current + 1].instr)
advance(first, current, current + 1);
}
} while (++unit < NumPipelineUnits);
fetchqueues[Queue5] = 0;
fetchqueues[Queue6] = 0;
fetchqueues[Queue7] = 0;
#define CHEAP_MIN(a, b) ( ((a) < (b)) ? (a) : (b) )
num = 2 - fetchqueues[Queue2];
num += 6 - fetchqueues[Queue3];
num += 4 - fetchqueues[Queue4];
num = (num > 3) ? 3 : num;
num = (completionbuffers.free < num) ? completionbuffers.free : num;
if (fetchqueues[Queue0] < num)
num = fetchqueues[Queue0];
fetchqueues[Queue1] += num;
fetchqueues[Queue0] -= num;
fetchqueues[Queue2] = CHEAP_MIN(fetchqueues[Queue1], CHEAP_MIN(2, fetchqueues[Queue2] + num));
fetchqueues[Queue3] = CHEAP_MIN(fetchqueues[Queue1], CHEAP_MIN(6, fetchqueues[Queue3] + num));
fetchqueues[Queue4] = CHEAP_MIN(fetchqueues[Queue1], CHEAP_MIN(4, fetchqueues[Queue4] + num));
CError_ASSERT(991, fetchqueues[Queue1] <= (fetchqueues[Queue2] + fetchqueues[Queue3] + fetchqueues[Queue4]));
if (fetchqueues[Queue0] <= 8)
fetchqueues[Queue0] += 4;
}
static int serializes(PCode *instr) {
return instruction_timing[instr->op].serializes;
}
static int uses_vpermute_unit_altivec(PCode *instr) {
return instruction_timing[instr->op].stage == VPU_1;
}
MachineInfo machine7450 = {
6,
1,
4,
&latency,
&initialize,
&can_issue,
&issue,
&advance_clock,
&serializes,
&uses_vpermute_unit_altivec
};