move lots of source files around to match their actual placement in the original tree

This commit is contained in:
Ash Wolf
2023-01-26 11:30:47 +00:00
parent fc0c4c0df7
commit 094b96ca1d
120 changed files with 400 additions and 392 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,857 @@
#include "compiler/Exceptions.h"
#include "compiler/CError.h"
#include "compiler/CException.h"
#include "compiler/CInit.h"
#include "compiler/CFunc.h"
#include "compiler/CParser.h"
#include "compiler/CompilerTools.h"
#include "compiler/ObjGenMachO.h"
#include "compiler/PCode.h"
#include "compiler/PCodeUtilities.h"
#include "compiler/RegisterInfo.h"
#include "compiler/StackFrame.h"
#include "compiler/objects.h"
static PCAction *pc_actions;
static PCAction *last_pc_action;
EANode *DAG[EAT_NACTIONS];
static GList exceptmodule;
static OLinkList *except_refs;
static OLinkList *last_except_ref;
static EANode *makeEAnode(ExceptionAction *ea) {
EANode *prev;
EANode *node;
for (node = DAG[ea->type]; node; node = node->dagListNext) {
if (node->action == ea)
return node;
}
if (ea->prev)
prev = makeEAnode(ea->prev);
else
prev = NULL;
for (node = DAG[ea->type]; node; node = node->dagListNext) {
if (node->prev == prev && CExcept_ActionCompare(node->action, ea))
return node;
}
node = lalloc(sizeof(EANode));
node->prev = prev;
node->action = ea;
node->count = 0;
node->xE = 0;
node->dagListNext = DAG[ea->type];
DAG[ea->type] = node;
if (prev)
prev->count++;
return node;
}
static void addrelocation(Object *obj, SInt32 offset) {
OLinkList *ref;
ref = lalloc(sizeof(OLinkList));
ref->next = NULL;
ref->obj = obj;
ref->offset = offset;
ref->somevalue = 0;
if (except_refs)
last_except_ref->next = ref;
else
except_refs = ref;
last_except_ref = ref;
}
#ifdef __MWERKS__
#pragma options align=mac68k
#endif
typedef struct AABC {
UInt8 a;
UInt8 b;
UInt16 c;
UInt32 d;
} AABC;
typedef struct AACC {
UInt8 a;
UInt8 b;
UInt32 c;
UInt32 d;
} AACC;
typedef struct AABBC {
UInt8 a;
UInt8 b;
UInt16 c;
UInt16 d;
UInt32 e;
} AABBC;
typedef struct AABCC {
UInt8 a;
UInt8 b;
UInt16 c;
UInt32 d;
UInt32 e;
} AABCC;
typedef struct AACCC {
UInt8 a;
UInt8 b;
UInt32 c;
UInt32 d;
UInt32 e;
} AACCC;
typedef struct AABBBC {
UInt8 a;
UInt8 b;
UInt16 c;
UInt16 d;
UInt16 e;
UInt32 f;
} AABBBC;
typedef struct AABBCC {
UInt8 a;
UInt8 b;
UInt16 c;
UInt16 d;
UInt32 e;
UInt32 f;
} AABBCC;
typedef struct AACCCC {
UInt8 a;
UInt8 b;
UInt32 c;
UInt32 d;
UInt32 e;
UInt32 f;
} AACCCC;
typedef struct AABCCCC {
UInt8 a;
UInt8 b;
UInt16 c;
UInt32 d;
UInt32 e;
UInt32 f;
UInt32 g;
} AABCCCC;
typedef struct AACCCCC {
UInt8 a;
UInt8 b;
UInt32 c;
UInt32 d;
UInt32 e;
UInt32 f;
UInt32 g;
} AACCCCC;
typedef struct AAB {
UInt8 a;
UInt8 b;
UInt16 c;
} AAB;
typedef struct AAC {
UInt8 a;
UInt8 b;
UInt32 c;
} AAC;
typedef struct AA {
UInt8 a;
UInt8 b;
} AA;
#ifdef __MWERKS__
#pragma options align=reset
#endif
static void allocateactioninfo(EANode *node) {
ExceptionAction *ea;
SInt32 offset;
UInt32 flag26;
int reg;
int reg2;
while (node && (node->xE == 0 || node->prev == NULL)) {
offset = exceptmodule.size;
if (node->xE == 0)
node->xE = offset;
flag26 = node->prev ? 0 : 0x80;
ea = node->action;
switch (ea->type) {
case EAT_NOP:
CError_FATAL(146);
break;
case EAT_DESTROYLOCAL: {
if (local_is_16bit_offset(ea->data.destroy_local.local)) {
AABC e;
e.a = flag26 | 2;
e.b = 0;
e.c = CTool_EndianConvertWord16(local_offset_16(ea->data.destroy_local.local));
e.d = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.destroy_local.dtor, offset + 4);
} else {
AACC e;
e.a = flag26 | 0x11;
e.b = 0;
e.c = CTool_EndianConvertWord32(local_offset_32(ea->data.destroy_local.local));
e.d = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.destroy_local.dtor, offset + 6);
}
break;
}
case EAT_DESTROYLOCALCOND: {
reg = OBJECT_REG(ea->data.destroy_local_cond.cond);
if (
(reg || local_is_16bit_offset(ea->data.destroy_local_cond.cond)) &&
local_is_16bit_offset(ea->data.destroy_local_cond.local)
)
{
AABBC e;
e.a = flag26 | 3;
e.b = (reg != 0) << 7;
e.c = CTool_EndianConvertWord16(reg ? reg : local_offset_16(ea->data.destroy_local_cond.cond));
e.d = CTool_EndianConvertWord16(local_offset_16(ea->data.destroy_local_cond.local));
e.e = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.destroy_local_cond.dtor, offset + 6);
} else {
AACCC e;
e.a = flag26 | 0x12;
e.b = (reg != 0) << 7;
e.c = CTool_EndianConvertWord32(reg ? reg : local_offset_32(ea->data.destroy_local_cond.cond));
e.d = CTool_EndianConvertWord32(local_offset_32(ea->data.destroy_local_cond.local));
e.e = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.destroy_local_cond.dtor, offset + 10);
}
break;
}
case EAT_DESTROYLOCALOFFSET: {
if (local_is_16bit_offset(ea->data.destroy_local_offset.local)) {
AABC e;
e.a = flag26 | 2;
e.b = 0;
e.c = CTool_EndianConvertWord16(ea->data.destroy_local_offset.offset + local_offset_16(ea->data.destroy_local_offset.local));
e.d = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.destroy_local_offset.dtor, offset + 4);
} else {
AACC e;
e.a = flag26 | 0x11;
e.b = 0;
e.c = CTool_EndianConvertWord32(ea->data.destroy_local_offset.offset + local_offset_32(ea->data.destroy_local_offset.local));
e.d = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.destroy_local_offset.dtor, offset + 6);
}
break;
}
case EAT_DESTROYLOCALPOINTER: {
reg = OBJECT_REG(ea->data.destroy_local_pointer.pointer);
if (reg || local_is_16bit_offset(ea->data.destroy_local_pointer.pointer)) {
AABC e;
e.a = flag26 | 4;
e.b = (reg != 0) << 7;
e.c = CTool_EndianConvertWord16(reg ? reg : local_offset_16(ea->data.destroy_local_pointer.pointer));
e.d = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.destroy_local_pointer.dtor, offset + 4);
} else {
AACC e;
e.a = flag26 | 0x13;
e.b = (reg != 0) << 7;
e.c = CTool_EndianConvertWord32(reg ? reg : local_offset_32(ea->data.destroy_local_pointer.pointer));
e.d = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.destroy_local_pointer.dtor, offset + 6);
}
break;
}
case EAT_DESTROYLOCALARRAY: {
if (local_is_16bit_offset(ea->data.destroy_local_array.localarray)) {
AABBBC e;
e.a = flag26 | 5;
e.b = 0;
e.c = CTool_EndianConvertWord16(local_offset_16(ea->data.destroy_local_array.localarray));
e.d = CTool_EndianConvertWord16(ea->data.destroy_local_array.elements);
e.e = CTool_EndianConvertWord16(ea->data.destroy_local_array.element_size);
e.f = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.destroy_local_array.dtor, offset + 8);
} else {
AACCCC e;
e.a = flag26 | 0x14;
e.b = 0;
e.c = CTool_EndianConvertWord32(local_offset_32(ea->data.destroy_local_array.localarray));
e.d = CTool_EndianConvertWord32(ea->data.destroy_local_array.elements);
e.e = CTool_EndianConvertWord32(ea->data.destroy_local_array.element_size);
e.f = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.destroy_local_array.dtor, offset + 14);
}
break;
}
case EAT_DESTROYMEMBER: {
reg = OBJECT_REG(ea->data.destroy_member.objectptr);
if (reg || local_is_16bit_offset(ea->data.destroy_member.objectptr)) {
AABCC e;
e.a = flag26 | 7;
e.b = (reg != 0) << 7;
e.c = CTool_EndianConvertWord16(reg ? reg : local_offset_16(ea->data.destroy_member.objectptr));
e.d = CTool_EndianConvertWord32(ea->data.destroy_member.offset);
e.e = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.destroy_member.dtor, offset + 8);
} else {
AACCC e;
e.a = flag26 | 0x16;
e.b = (reg != 0) << 7;
e.c = CTool_EndianConvertWord32(reg ? reg : local_offset_32(ea->data.destroy_member.objectptr));
e.d = CTool_EndianConvertWord32(ea->data.destroy_member.offset);
e.e = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.destroy_member.dtor, offset + 10);
}
break;
}
case EAT_DESTROYBASE: {
reg = OBJECT_REG(ea->data.destroy_member.objectptr);
if (reg || local_is_16bit_offset(ea->data.destroy_member.objectptr)) {
AABCC e;
e.a = flag26 | 6;
e.b = (reg != 0) << 7;
e.c = CTool_EndianConvertWord16(reg ? reg : local_offset_16(ea->data.destroy_member.objectptr));
e.d = CTool_EndianConvertWord32(ea->data.destroy_member.offset);
e.e = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.destroy_member.dtor, offset + 8);
} else {
AACCC e;
e.a = flag26 | 0x15;
e.b = (reg != 0) << 7;
e.c = CTool_EndianConvertWord32(reg ? reg : local_offset_32(ea->data.destroy_member.objectptr));
e.d = CTool_EndianConvertWord32(ea->data.destroy_member.offset);
e.e = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.destroy_member.dtor, offset + 10);
}
break;
}
case EAT_DESTROYMEMBERCOND: {
reg = OBJECT_REG(ea->data.destroy_member_cond.cond);
reg2 = OBJECT_REG(ea->data.destroy_member_cond.objectptr);
if (
(reg || local_is_16bit_offset(ea->data.destroy_member_cond.cond)) &&
(reg2 || local_is_16bit_offset(ea->data.destroy_member_cond.objectptr))
)
{
AABBCC e;
e.a = flag26 | 8;
e.b = ((reg ? 1 : 0) << 7) | ((reg2 ? 1 : 0) << 6);
e.c = CTool_EndianConvertWord16(reg ? reg : local_offset_16(ea->data.destroy_member_cond.cond));
e.d = CTool_EndianConvertWord16(reg2 ? reg2 : local_offset_16(ea->data.destroy_member_cond.objectptr));
e.e = CTool_EndianConvertWord32(ea->data.destroy_member_cond.offset);
e.f = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.destroy_member_cond.dtor, offset + 10);
} else {
AACCCC e;
e.a = flag26 | 0x17;
e.b = ((reg ? 1 : 0) << 7) | ((reg2 ? 1 : 0) << 6);
e.c = CTool_EndianConvertWord32(reg ? reg : local_offset_32(ea->data.destroy_member_cond.cond));
e.d = CTool_EndianConvertWord32(reg2 ? reg2 : local_offset_32(ea->data.destroy_member_cond.objectptr));
e.e = CTool_EndianConvertWord32(ea->data.destroy_member_cond.offset);
e.f = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.destroy_member_cond.dtor, offset + 14);
}
break;
}
case EAT_DESTROYMEMBERARRAY: {
reg = OBJECT_REG(ea->data.destroy_member_array.objectptr);
if (reg || local_is_16bit_offset(ea->data.destroy_member_array.objectptr)) {
AABCCCC e;
e.a = flag26 | 9;
e.b = (reg != 0) << 7;
e.c = CTool_EndianConvertWord16(reg ? reg : local_offset_16(ea->data.destroy_member_array.objectptr));
e.d = CTool_EndianConvertWord32(ea->data.destroy_member_array.offset);
e.e = CTool_EndianConvertWord32(ea->data.destroy_member_array.elements);
e.f = CTool_EndianConvertWord32(ea->data.destroy_member_array.element_size);
e.g = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.destroy_member_array.dtor, offset + 16);
} else {
AACCCCC e;
e.a = flag26 | 0x18;
e.b = (reg != 0) << 7;
e.c = CTool_EndianConvertWord32(reg ? reg : local_offset_32(ea->data.destroy_member_array.objectptr));
e.d = CTool_EndianConvertWord32(ea->data.destroy_member_array.offset);
e.e = CTool_EndianConvertWord32(ea->data.destroy_member_array.elements);
e.f = CTool_EndianConvertWord32(ea->data.destroy_member_array.element_size);
e.g = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.destroy_member_array.dtor, offset + 18);
}
break;
}
case EAT_DELETEPOINTER:
case EAT_DELETELOCALPOINTER: {
reg = OBJECT_REG(ea->data.delete_pointer.pointerobject);
if (reg || local_is_16bit_offset(ea->data.delete_pointer.pointerobject)) {
AABC e;
e.a = flag26 | 0xA;
e.b = (reg != 0) << 7;
e.c = CTool_EndianConvertWord16(reg ? reg : local_offset_16(ea->data.delete_pointer.pointerobject));
e.d = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.delete_pointer.deletefunc, offset + 4);
} else {
AACC e;
e.a = flag26 | 0x19;
e.b = (reg != 0) << 7;
e.c = CTool_EndianConvertWord32(reg ? reg : local_offset_32(ea->data.delete_pointer.pointerobject));
e.d = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.delete_pointer.deletefunc, offset + 6);
}
break;
}
case EAT_DELETEPOINTERCOND: {
reg = OBJECT_REG(ea->data.delete_pointer_cond.cond);
reg2 = OBJECT_REG(ea->data.delete_pointer_cond.pointerobject);
if (
(reg || local_is_16bit_offset(ea->data.delete_pointer_cond.cond)) &&
(reg2 || local_is_16bit_offset(ea->data.delete_pointer_cond.pointerobject))
)
{
AABBC e;
e.a = flag26 | 0xB;
e.b = ((reg ? 1 : 0) << 7) | ((reg2 ? 1 : 0) << 6);
e.c = CTool_EndianConvertWord16(reg ? reg : local_offset_16(ea->data.delete_pointer_cond.cond));
e.d = CTool_EndianConvertWord16(reg2 ? reg2 : local_offset_16(ea->data.delete_pointer_cond.pointerobject));
e.e = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.delete_pointer_cond.deletefunc, offset + 6);
} else {
AACCC e;
e.a = flag26 | 0x1A;
e.b = ((reg ? 1 : 0) << 7) | ((reg2 ? 1 : 0) << 6);
e.c = CTool_EndianConvertWord32(reg ? reg : local_offset_32(ea->data.delete_pointer_cond.cond));
e.d = CTool_EndianConvertWord32(reg2 ? reg2 : local_offset_32(ea->data.delete_pointer_cond.pointerobject));
e.e = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
addrelocation(ea->data.delete_pointer_cond.deletefunc, offset + 10);
}
break;
}
case EAT_CATCHBLOCK: {
AACCC e;
e.a = flag26 | 0x10;
e.b = 0;
e.c = 0;
if (ea->data.catch_block.catch_label->pclabel)
e.d = CTool_EndianConvertWord32(ea->data.catch_block.catch_label->pclabel->block->codeOffset);
else
e.d = 0;
e.e = CTool_EndianConvertWord32(local_offset_16(ea->data.catch_block.catch_info_object));
AppendGListData(&exceptmodule, &e, sizeof(e));
if (ea->data.catch_block.catch_typeid)
addrelocation(ea->data.catch_block.catch_typeid, offset + 2);
break;
}
case EAT_ACTIVECATCHBLOCK: {
if (local_is_16bit_offset(ea->data.active_catch_block.catch_info_object)) {
AAB e;
e.a = flag26 | 0xD;
e.b = 0;
e.c = CTool_EndianConvertWord16(local_offset_16(ea->data.active_catch_block.catch_info_object));
AppendGListData(&exceptmodule, &e, sizeof(e));
} else {
AAC e;
e.a = flag26 | 0x1B;
e.b = 0;
e.c = CTool_EndianConvertWord32(local_offset_32(ea->data.active_catch_block.catch_info_object));
AppendGListData(&exceptmodule, &e, sizeof(e));
}
break;
}
case EAT_SPECIFICATION: {
AABCC e;
int i;
e.a = flag26 | 0xF;
e.b = 0;
e.c = CTool_EndianConvertWord16(ea->data.specification.unexp_ids);
if (ea->data.specification.unexp_label->pclabel)
e.d = CTool_EndianConvertWord32(ea->data.specification.unexp_label->pclabel->block->codeOffset);
e.e = CTool_EndianConvertWord32(local_offset_16(ea->data.specification.unexp_info_object));
AppendGListData(&exceptmodule, &e, sizeof(e));
for (i = 0; i < ea->data.specification.unexp_ids; i++) {
addrelocation(ea->data.specification.unexp_id[i], 12 + i * 4 + offset);
AppendGListLong(&exceptmodule, 0);
}
break;
}
case EAT_TERMINATE: {
AA e;
e.a = flag26 | 0xE;
e.b = 0;
AppendGListData(&exceptmodule, &e, sizeof(e));
break;
}
default:
CError_FATAL(671);
}
node = node->prev;
}
if (node) {
AAB e;
e.a = 1;
e.b = 0;
e.c = CTool_EndianConvertWord16(node->xE);
AppendGListData(&exceptmodule, &e, sizeof(e));
}
}
static UInt32 findPC(PCode *instr) {
UInt32 pc = instr->block->codeOffset;
instr = instr->prevPCode;
while (instr) {
instr = instr->prevPCode;
pc += 4;
}
CError_ASSERT(704, FITS_IN_USHORT(pc));
return pc;
}
static UInt32 findPC_long(PCode *instr) {
UInt32 pc = instr->block->codeOffset;
instr = instr->prevPCode;
while (instr) {
instr = instr->prevPCode;
pc += 4;
}
return pc;
}
void initializeexceptiontables(void) {
int i;
for (i = 0; i < EAT_NACTIONS; i++)
DAG[i] = NULL;
pc_actions = last_pc_action = NULL;
except_refs = last_except_ref = NULL;
}
int countexceptionactionregisters(ExceptionAction *actions) {
int count = 0;
while (actions) {
switch (actions->type) {
case EAT_DESTROYLOCALCOND:
if (OBJECT_REG(actions->data.destroy_local_cond.cond))
count++;
break;
case EAT_DESTROYLOCALPOINTER:
if (OBJECT_REG(actions->data.destroy_local_pointer.pointer))
count++;
break;
case EAT_DESTROYMEMBER:
if (OBJECT_REG(actions->data.destroy_member.objectptr))
count++;
break;
case EAT_DESTROYBASE:
if (OBJECT_REG(actions->data.destroy_base.objectptr))
count++;
break;
case EAT_DESTROYMEMBERCOND:
if (OBJECT_REG(actions->data.destroy_member_cond.cond))
count++;
if (OBJECT_REG(actions->data.destroy_member_cond.objectptr))
count++;
break;
case EAT_DESTROYMEMBERARRAY:
if (OBJECT_REG(actions->data.destroy_member_array.objectptr))
count++;
break;
case EAT_DELETEPOINTER:
case EAT_DELETELOCALPOINTER:
if (OBJECT_REG(actions->data.delete_pointer.pointerobject))
count++;
break;
case EAT_DELETEPOINTERCOND:
if (OBJECT_REG(actions->data.delete_pointer_cond.cond))
count++;
if (OBJECT_REG(actions->data.delete_pointer_cond.pointerobject))
count++;
break;
}
actions = actions->prev;
}
return count;
}
void noteexceptionactionregisters(ExceptionAction *actions, PCodeArg *ops) {
Object *obj;
int reg;
while (actions) {
switch (actions->type) {
case EAT_DESTROYLOCALCOND:
if ((reg = OBJECT_REG(obj = actions->data.destroy_local_cond.cond))) {
ops->kind = PCOp_REGISTER;
ops->arg = obj->u.var.info->rclass;
ops->data.reg.reg = reg;
ops->data.reg.effect = EffectRead | Effect8;
ops++;
}
break;
case EAT_DESTROYLOCALPOINTER:
if ((reg = OBJECT_REG(obj = actions->data.destroy_local_pointer.pointer))) {
ops->kind = PCOp_REGISTER;
ops->arg = obj->u.var.info->rclass;
ops->data.reg.reg = reg;
ops->data.reg.effect = EffectRead | Effect8;
ops++;
}
break;
case EAT_DESTROYMEMBER:
if ((reg = OBJECT_REG(obj = actions->data.destroy_member.objectptr))) {
ops->kind = PCOp_REGISTER;
ops->arg = obj->u.var.info->rclass;
ops->data.reg.reg = reg;
ops->data.reg.effect = EffectRead | Effect8;
ops++;
}
break;
case EAT_DESTROYBASE:
if ((reg = OBJECT_REG(obj = actions->data.destroy_base.objectptr))) {
ops->kind = PCOp_REGISTER;
ops->arg = obj->u.var.info->rclass;
ops->data.reg.reg = reg;
ops->data.reg.effect = EffectRead | Effect8;
ops++;
}
break;
case EAT_DESTROYMEMBERCOND:
if ((reg = OBJECT_REG(obj = actions->data.destroy_member_cond.cond))) {
ops->kind = PCOp_REGISTER;
ops->arg = obj->u.var.info->rclass;
ops->data.reg.reg = reg;
ops->data.reg.effect = EffectRead | Effect8;
ops++;
}
if ((reg = OBJECT_REG(obj = actions->data.destroy_member_cond.objectptr))) {
ops->kind = PCOp_REGISTER;
ops->arg = obj->u.var.info->rclass;
ops->data.reg.reg = reg;
ops->data.reg.effect = EffectRead | Effect8;
ops++;
}
break;
case EAT_DESTROYMEMBERARRAY:
if ((reg = OBJECT_REG(obj = actions->data.destroy_member_array.objectptr))) {
ops->kind = PCOp_REGISTER;
ops->arg = obj->u.var.info->rclass;
ops->data.reg.reg = reg;
ops->data.reg.effect = EffectRead | Effect8;
ops++;
}
break;
case EAT_DELETEPOINTER:
case EAT_DELETELOCALPOINTER:
if ((reg = OBJECT_REG(obj = actions->data.delete_pointer.pointerobject))) {
ops->kind = PCOp_REGISTER;
ops->arg = obj->u.var.info->rclass;
ops->data.reg.reg = reg;
ops->data.reg.effect = EffectRead | Effect8;
ops++;
}
break;
case EAT_DELETEPOINTERCOND:
if ((reg = OBJECT_REG(obj = actions->data.delete_pointer_cond.cond))) {
ops->kind = PCOp_REGISTER;
ops->arg = obj->u.var.info->rclass;
ops->data.reg.reg = reg;
ops->data.reg.effect = EffectRead | Effect8;
ops++;
}
if ((reg = OBJECT_REG(obj = actions->data.delete_pointer_cond.pointerobject))) {
ops->kind = PCOp_REGISTER;
ops->arg = obj->u.var.info->rclass;
ops->data.reg.reg = reg;
ops->data.reg.effect = EffectRead | Effect8;
ops++;
}
break;
}
actions = actions->prev;
}
}
void recordexceptionactions(PCode *instr, ExceptionAction *actions) {
PCAction *pca;
if (!actions && (!last_pc_action || !last_pc_action->actions))
return;
pca = lalloc(sizeof(PCAction));
pca->next = NULL;
pca->firstInstr = pca->lastInstr = instr;
pca->actions = actions;
pca->prev = last_pc_action;
if (last_pc_action)
last_pc_action->next = pca;
else
pc_actions = pca;
last_pc_action = pca;
branch_label(makepclabel());
while (actions) {
if (actions->type == EAT_CATCHBLOCK && actions->data.catch_block.catch_label->pclabel)
pcbranch(instr->block, actions->data.catch_block.catch_label->pclabel);
else if (actions->type == EAT_SPECIFICATION && actions->data.specification.unexp_label->pclabel)
pcbranch(instr->block, actions->data.specification.unexp_label->pclabel);
actions = actions->prev;
}
}
static void deleteexceptionaction(PCAction *pca) {
if (pca->prev)
pca->prev->next = pca->next;
else
pc_actions = pca->next;
if (pca->next)
pca->next->prev = pca->prev;
}
static int mergeexceptionactions(void) {
int count;
PCAction *pca;
PCAction *prev;
if (!pc_actions)
return 0;
for (pca = pc_actions; pca; pca = pca->next) {
if (pca->firstInstr->block->flags & fDeleted)
deleteexceptionaction(pca);
}
if (!(pca = pc_actions))
return 0;
while (pca) {
pca->node = pca->actions ? makeEAnode(pca->actions) : NULL;
pca = pca->next;
}
prev = pc_actions;
for (pca = pc_actions->next; pca; pca = pca->next) {
if (pca->node == prev->node) {
prev->lastInstr = pca->lastInstr;
deleteexceptionaction(pca);
} else {
prev = pca;
}
}
count = 0;
for (pca = pc_actions; pca; pca = pca->next) {
if (!pca->actions)
deleteexceptionaction(pca);
else
count++;
}
return count;
}
typedef struct ExceptionThing {
UInt32 x0;
UInt16 x4;
UInt16 x6;
} ExceptionThing;
void dumpexceptiontables(Object *function, SInt32 codesize) {
PCAction *pca;
UInt32 insn_start;
UInt32 insn_count;
UInt16 *sh;
ExceptionThing *thing;
int count;
count = mergeexceptionactions();
InitGList(&exceptmodule, 256);
AppendGListNoData(&exceptmodule, 8 * count + 4);
AppendGListLong(&exceptmodule, 0);
for (pca = pc_actions; pca; pca = pca->next) {
if (pca->node->count == 0 && pca->node->xE == 0)
allocateactioninfo(pca->node);
}
sh = (UInt16 *) *exceptmodule.data;
if (copts.altivec_model && used_nonvolatile_registers[RegClass_VR]) {
sh[0] =
CTool_EndianConvertWord16(
(used_nonvolatile_registers[RegClass_GPR] << 11) |
((used_nonvolatile_registers[RegClass_FPR] & 0x1F) << 6) |
(((used_nonvolatile_registers[RegClass_CRFIELD] != 0) & 1) << 5) |
((dynamic_stack & 1) << 4) |
8);
sh[0] |= 4;
if (copts.altivec_vrsave)
sh[1] = CTool_EndianConvertWord16((used_nonvolatile_registers[RegClass_VR] << 11) | 0x400);
else
sh[1] = CTool_EndianConvertWord16((used_nonvolatile_registers[RegClass_VR] & 0x1F) << 11);
} else {
sh[0] =
CTool_EndianConvertWord16(
(used_nonvolatile_registers[RegClass_GPR] << 11) |
((used_nonvolatile_registers[RegClass_FPR] & 0x1F) << 6) |
(((used_nonvolatile_registers[RegClass_CRFIELD] != 0) & 1) << 5) |
((dynamic_stack & 1) << 4) |
8);
sh[1] = 0;
}
thing = (ExceptionThing *) (sh + 2);
pca = pc_actions;
while (pca) {
insn_start = findPC_long(pca->firstInstr);
insn_count = (findPC_long(pca->lastInstr) - insn_start) / 4;
CError_ASSERT(1203, (insn_count & 0xFFFF0000) == 0);
thing->x0 = CTool_EndianConvertWord32(insn_start + 4);
thing->x4 = CTool_EndianConvertWord16(insn_count);
thing->x6 = CTool_EndianConvertWord16(pca->node->xE);
pca = pca->next;
thing++;
}
LockGList(&exceptmodule);
ObjGen_DeclareExceptionTables(function, codesize, *exceptmodule.data, exceptmodule.size, except_refs);
FreeGList(&exceptmodule);
}

View File

@@ -0,0 +1,642 @@
#include "compiler/FunctionCalls.h"
#include "compiler/CError.h"
#include "compiler/CFunc.h"
#include "compiler/CMachine.h"
#include "compiler/CParser.h"
#include "compiler/CodeGen.h"
#include "compiler/CompilerTools.h"
#include "compiler/InstrSelection.h"
#include "compiler/Operands.h"
#include "compiler/PCode.h"
#include "compiler/PCodeUtilities.h"
#include "compiler/Registers.h"
#include "compiler/StackFrame.h"
#include "compiler/StructMoves.h"
#include "compiler/types.h"
enum {
AIF_PassInGPR = 1,
AIF_PassInFPR = 2,
AIF_PassOnStack = 4,
AIF_ExtendTo32Bits = 8,
AIF_ForceDoublePrecision = 0x10,
AIF_PassInVR = 0x20,
AIF_PassMask = AIF_PassInGPR | AIF_PassInFPR | AIF_PassOnStack | AIF_PassInVR
};
#ifdef __MWERKS__
#pragma options align=mac68k
#endif
typedef struct ArgInfo {
struct ArgInfo *next;
ENode *expr;
Operand opnd;
SInt32 offset;
short gpr;
short gprHi;
short fpr;
short vr;
short evaluated;
short flags;
} ArgInfo;
#ifdef __MWERKS__
#pragma options align=reset
#endif
// forward decls
static void branch_subroutine_indirect_ctr(Operand *addrOpnd, UInt32 *used_regs);
static ArgInfo *make_arginfo(ENode *expr) {
ArgInfo *info = lalloc(sizeof(ArgInfo));
memclrw(info, sizeof(ArgInfo));
info->next = NULL;
info->expr = expr;
info->offset = -1;
info->gpr = -1;
info->gprHi = -1;
info->fpr = -1;
info->vr = -1;
info->evaluated = 0;
info->flags = 0;
return info;
}
static ArgInfo *analyze_arguments(ENode *funcref, ENodeList *arg_expr, FuncArg *arg, UInt32 *used_regs, Boolean *resultHasFloats, char has_varargs) {
ArgInfo *infos;
ArgInfo *info;
SInt32 displ;
SInt32 arg_size;
int gpr_counter;
int fpr_counter;
int vr_counter;
Type *type;
RegClass rclass;
Boolean spilledVectorFlag;
infos = NULL;
displ = 0;
gpr_counter = 3;
fpr_counter = 1;
vr_counter = 2;
for (rclass = 0; rclass < RegClassMax; rclass++)
used_regs[rclass] = 0;
*resultHasFloats = 0;
while (arg_expr) {
if (arg_expr->node == funcref) {
arg_expr = arg_expr->next;
arg = arg->next;
continue;
}
type = arg_expr->node->rtype;
if (infos) {
info->next = make_arginfo(arg_expr->node);
info = info->next;
} else {
infos = info = make_arginfo(arg_expr->node);
}
arg_size = 0;
if (IS_TYPE_VECTOR(type)) {
if (arg == &elipsis) {
spilledVectorFlag = 1;
info->flags |= AIF_PassOnStack;
} else {
spilledVectorFlag = 0;
if (vr_counter <= 13) {
info->flags |= AIF_PassInVR;
info->vr = vr_counter;
used_regs[RegClass_VR] |= 1 << vr_counter;
} else {
spilledVectorFlag = 1;
info->flags |= AIF_PassOnStack;
}
}
if (has_varargs) {
if (gpr_counter < 10) {
gpr_counter = ((gpr_counter - 2) & ~3) + 5;
if (arg == &elipsis && gpr_counter < 10) {
info->flags |= AIF_PassInGPR;
info->gpr = gpr_counter;
used_regs[RegClass_GPR] |= (15 << gpr_counter) & 0x7E0;
}
gpr_counter += 4;
}
spilledVectorFlag = 1;
}
if (spilledVectorFlag)
arg_size = 16;
vr_counter++;
} else if (IS_TYPE_FLOAT(type)) {
*resultHasFloats = 1;
if (!arg || arg == &oldstyle) {
if (fpr_counter <= 13) {
info->flags |= AIF_PassInFPR;
info->fpr = fpr_counter;
used_regs[RegClass_FPR] |= 1 << fpr_counter;
} else {
info->flags |= AIF_PassOnStack | AIF_ForceDoublePrecision;
}
arg_size = 8;
fpr_counter++;
gpr_counter += 2;
} else if (arg == &elipsis) {
if (gpr_counter < 10) {
info->flags |= AIF_PassInGPR;
info->gpr = gpr_counter;
used_regs[RegClass_GPR] |= 3 << gpr_counter;
} else if (gpr_counter == 10) {
info->flags |= AIF_PassInGPR | AIF_PassOnStack | AIF_ForceDoublePrecision;
info->gpr = gpr_counter;
used_regs[RegClass_GPR] |= 3 << gpr_counter;
} else {
info->flags |= AIF_PassOnStack | AIF_ForceDoublePrecision;
}
arg_size = 8;
fpr_counter++;
gpr_counter += 2;
} else {
if (fpr_counter <= 13) {
info->flags |= AIF_PassInFPR;
info->fpr = fpr_counter;
used_regs[RegClass_FPR] |= 1 << fpr_counter;
} else {
info->flags |= AIF_PassOnStack;
}
if (type->size == 4) {
arg_size = 4;
gpr_counter++;
} else {
arg_size = 8;
gpr_counter += 2;
}
fpr_counter++;
}
} else if (TYPE_IS_8BYTES(type)) {
if (gpr_counter <= 10) {
info->flags |= AIF_PassInGPR;
if (copts.littleendian) {
info->gpr = gpr_counter;
info->gprHi = gpr_counter + 1;
} else {
info->gpr = gpr_counter + 1;
info->gprHi = gpr_counter;
}
used_regs[RegClass_GPR] |= 1 << gpr_counter;
if ((gpr_counter + 1) <= 10)
used_regs[RegClass_GPR] |= 1 << (gpr_counter + 1);
} else {
info->flags |= AIF_PassOnStack;
}
arg_size = 8;
gpr_counter += 2;
} else if (TYPE_FITS_IN_REGISTER(type)) {
if ((!arg || arg == &elipsis || arg == &oldstyle) && type->size < 4)
info->flags |= AIF_ExtendTo32Bits;
if (gpr_counter <= 10) {
info->flags |= AIF_PassInGPR;
info->gpr = gpr_counter;
used_regs[RegClass_GPR] |= 1 << gpr_counter;
} else {
info->flags |= AIF_PassOnStack;
}
arg_size = 4;
gpr_counter++;
} else if (IS_TYPE_ARRAY(type) || IS_TYPE_NONVECTOR_STRUCT(type) || IS_TYPE_CLASS(type) ||
IS_TYPE_12BYTES_MEMBERPOINTER(type)) {
SInt32 gprs_needed = (type->size >> 2) + ((type->size & 3) != 0);
if (gpr_counter <= 10) {
if ((gpr_counter + gprs_needed - 1) <= 10) {
info->flags |= AIF_PassInGPR;
info->gpr = gpr_counter;
used_regs[RegClass_GPR] |= ((1 << gprs_needed) - 1) << gpr_counter;
} else {
info->flags |= AIF_PassInGPR | AIF_PassOnStack;
info->gpr = gpr_counter;
used_regs[RegClass_GPR] |= ((1 << (11 - gpr_counter)) - 1) << gpr_counter;
}
} else {
info->flags |= AIF_PassOnStack;
}
gpr_counter += gprs_needed;
arg_size = type->size;
} else {
CError_FATAL(421);
}
displ = set_out_param_displ(displ, type, info->flags & AIF_PassOnStack, &info->offset, arg_size);
arg_expr = arg_expr->next;
if (arg && arg != &elipsis && arg != &oldstyle)
arg = arg->next;
}
update_out_param_size(displ);
return infos;
}
static void pass_in_memory(ArgInfo *info) {
Type *type;
Operand opnd;
type = info->expr->rtype;
memclrw(&opnd, sizeof(Operand));
if (TYPE_FITS_IN_REGISTER(type)) {
if (TYPE_IS_8BYTES(type)) {
if (!info->evaluated)
GEN_NODE(info->expr, &info->opnd);
coerce_to_register_pair(&info->opnd, type, 0, 0);
load_store_register(
PC_STW, info->opnd.reg, 1,
NULL, low_offset + out_param_displ_to_offset(info->offset));
load_store_register(
PC_STW, info->opnd.regHi, 1,
NULL, high_offset + out_param_displ_to_offset(info->offset));
} else {
if (!info->evaluated)
GEN_NODE(info->expr, &info->opnd);
if (info->flags & AIF_ExtendTo32Bits)
extend32(&info->opnd, type, 0);
ENSURE_GPR(&info->opnd, type, 0);
load_store_register(
PC_STW, info->opnd.reg, 1,
NULL, out_param_displ_to_offset(info->offset));
}
} else if (IS_TYPE_FLOAT(type)) {
if (!info->evaluated)
GEN_NODE(info->expr, &info->opnd);
ENSURE_FPR(&info->opnd, type, 0);
if (type->size == 4 && !(info->flags & AIF_ForceDoublePrecision)) {
load_store_register(
PC_STFS, info->opnd.reg, 1,
NULL, out_param_displ_to_offset(info->offset));
} else {
load_store_register(
PC_STFD, info->opnd.reg, 1,
NULL, out_param_displ_to_offset(info->offset));
}
} else if (IS_TYPE_VECTOR(type)) {
if (!info->evaluated)
GEN_NODE(info->expr, &info->opnd);
ENSURE_VR(&info->opnd, type, 0);
load_store_register(
PC_STVX, info->opnd.reg, 1,
NULL, out_param_displ_to_offset(info->offset));
} else {
opnd.optype = OpndType_IndirectGPR_ImmOffset;
opnd.reg = 1;
opnd.object = NULL;
opnd.immOffset = out_param_displ_to_offset(info->offset);
if (!info->evaluated)
GEN_NODE(info->expr, &info->opnd);
move_block(&opnd, &info->opnd, type->size, CMach_ArgumentAlignment(type));
}
}
static void pass_in_register(ArgInfo *info) {
Type *type;
type = info->expr->rtype;
if ((info->flags & AIF_PassMask) == AIF_PassInFPR) {
if (!info->evaluated)
GEN_NODE_TO_REG(info->expr, info->fpr, 0, &info->opnd);
ENSURE_FPR(&info->opnd, type, info->fpr);
if (info->opnd.reg != info->fpr)
emitpcode(PC_FMR, info->fpr, info->opnd.reg);
} else if ((info->flags & AIF_PassMask) == AIF_PassInVR) {
if (!info->evaluated)
GEN_NODE_TO_REG(info->expr, info->vr, 0, &info->opnd);
ENSURE_VR(&info->opnd, type, info->vr);
if (info->opnd.reg != info->vr)
emitpcode(PC_VMR, info->vr, info->opnd.reg);
} else if (TYPE_FITS_IN_REGISTER(type)) {
if (TYPE_IS_8BYTES(type)) {
if (!info->evaluated)
GEN_NODE_TO_REG(info->expr, info->gpr, info->gprHi, &info->opnd);
coerce_to_register_pair(&info->opnd, type, info->gpr, info->gprHi);
if (copts.littleendian) {
if (info->gprHi > 10) {
load_store_register(
PC_STW, info->opnd.regHi, 1,
NULL, high_offset + out_param_displ_to_offset(info->offset));
}
} else {
if (info->gpr > 10) {
load_store_register(
PC_STW, info->opnd.reg, 1,
NULL, low_offset + out_param_displ_to_offset(info->offset));
}
}
} else {
if (!info->evaluated)
GEN_NODE_TO_REG(info->expr, info->gpr, 0, &info->opnd);
if (info->flags & AIF_ExtendTo32Bits)
extend32(&info->opnd, type, info->gpr);
ENSURE_GPR(&info->opnd, type, info->gpr);
if (info->opnd.reg != info->gpr)
emitpcode(PC_MR, info->gpr, info->opnd.reg);
}
} else if (IS_TYPE_FLOAT(type)) {
if (!info->evaluated)
GEN_NODE(info->expr, &info->opnd);
if (type->size != 4 && info->opnd.optype == OpndType_IndirectGPR_ImmOffset) {
load_store_register(
PC_LWZ, info->gpr, info->opnd.reg,
info->opnd.object, info->opnd.immOffset);
load_store_register(
PC_LWZ, info->gpr + 1, info->opnd.reg,
info->opnd.object, info->opnd.immOffset + 4);
} else {
ENSURE_FPR(&info->opnd, type, 0);
load_store_register(
PC_STFD, info->opnd.reg, 1,
NULL, out_param_displ_to_offset(info->offset));
load_store_register(
PC_LWZ, info->gpr, 1,
NULL, out_param_displ_to_offset(info->offset));
load_store_register(
PC_LWZ, info->gpr + 1, 1,
NULL, out_param_displ_to_offset(info->offset) + 4);
}
} else if (IS_TYPE_VECTOR(type)) {
if (!info->evaluated)
GEN_NODE(info->expr, &info->opnd);
if (info->opnd.optype == OpndType_IndirectGPR_ImmOffset) {
load_store_register(
PC_LWZ, info->gpr, info->opnd.reg,
info->opnd.object, info->opnd.immOffset);
load_store_register(
PC_LWZ, info->gpr + 1, info->opnd.reg,
info->opnd.object, info->opnd.immOffset + 4);
if ((info->gpr + 2) < 10) {
load_store_register(
PC_LWZ, info->gpr + 2, info->opnd.reg,
info->opnd.object, info->opnd.immOffset + 8);
load_store_register(
PC_LWZ, info->gpr + 3, info->opnd.reg,
info->opnd.object, info->opnd.immOffset + 12);
}
} else {
ENSURE_VR(&info->opnd, type, 0);
load_store_register(
PC_STVX, info->opnd.reg, 1,
NULL, out_param_displ_to_offset(info->offset));
load_store_register(
PC_LWZ, info->gpr, 1,
NULL, out_param_displ_to_offset(info->offset));
load_store_register(
PC_LWZ, info->gpr + 1, 1,
NULL, out_param_displ_to_offset(info->offset) + 4);
if ((info->gpr + 2) < 10) {
load_store_register(
PC_LWZ, info->gpr + 2, 1,
NULL, out_param_displ_to_offset(info->offset) + 8);
load_store_register(
PC_LWZ, info->gpr + 3, 1,
NULL, out_param_displ_to_offset(info->offset) + 12);
}
}
} else {
if (!info->evaluated)
GEN_NODE(info->expr, &info->opnd);
if (type->size <= 4) {
if (info->opnd.optype == OpndType_IndirectSymbol)
coerce_to_addressable(&info->opnd);
if (info->opnd.optype == OpndType_IndirectGPR_ImmOffset) {
load_store_register(
PC_LWZ, info->gpr, info->opnd.reg,
info->opnd.object, info->opnd.immOffset);
} else if (info->opnd.optype == OpndType_IndirectGPR_Indexed) {
emitpcode(
PC_LWZX, info->gpr, info->opnd.reg,
info->opnd.regOffset);
}
} else {
SInt32 gprs_needed = (type->size >> 2) + ((type->size & 3) != 0);
SInt32 i;
make_addressable(&info->opnd, gprs_needed * 4, 12);
for (i = 0; i < gprs_needed; i++) {
if (info->opnd.reg != (info->gpr + i)) {
load_store_register(
PC_LWZ, info->gpr + i, info->opnd.reg,
info->opnd.object, info->opnd.immOffset + i * 4);
}
}
if (info->opnd.reg >= info->gpr && info->opnd.reg < (info->gpr + gprs_needed)) {
load_store_register(
PC_LWZ, info->opnd.reg, info->opnd.reg,
info->opnd.object, info->opnd.immOffset + (info->opnd.reg - info->gpr) * 4);
}
}
}
}
static void pass_in_register_and_memory(ArgInfo *info) {
Type *type;
int gpr;
SInt32 offset;
type = info->expr->rtype;
gpr = info->gpr;
offset = 0;
while (offset < type->size && gpr <= 10) {
load_store_register(
PC_LWZ, gpr, 1,
NULL, offset + out_param_displ_to_offset(info->offset));
gpr++;
offset += 4;
}
}
static Boolean needs_TOC_reload(Object *func) {
return 0;
}
static void load_virtual_function(TypeClass *tclass, SInt32 offset, int reg, Operand *opnd) {
if (tclass->flags & CLASS_HANDLEOBJECT) {
load_store_register(PC_LWZ, 12, reg, NULL, 0);
load_store_register(PC_LWZ, 12, 12, NULL, tclass->vtable->offset);
} else {
load_store_register(PC_LWZ, 12, reg, NULL, tclass->vtable->offset);
}
load_store_register(PC_LWZ, 12, 12, NULL, offset);
opnd->optype = OpndType_GPR;
opnd->reg = 12;
}
static void branch_subroutine_indirect(Object *func, Operand *addrOpnd, UInt32 *used_regs) {
if (addrOpnd->reg != 12)
emitpcode(PC_MR, 12, addrOpnd->reg);
used_regs[RegClass_GPR] |= 1 << 12;
branch_subroutine(func, 1, used_regs);
}
static void evaluate_nested_function_calls(ArgInfo *info) {
ArgInfo *scan;
scan = info->next;
while (scan && !scan->expr->hascall)
scan = scan->next;
if (scan)
evaluate_nested_function_calls(scan);
if (info->expr->hascall) {
GEN_NODE(info->expr, &info->opnd);
info->evaluated = 1;
}
}
void call_function(ENode *expr, Operand *output) {
ArgInfo *infos; // r31
ENode *funcref = expr->data.funccall.funcref; // r27
Type *resultType = expr->data.funccall.functype->functype; // r26
ENode *node = NULL; // r25
char has_varargs; // r24
ArgInfo *info; // r22
Operand opnd;
UInt32 used_regs[RegClassMax] = {0};
Boolean has_floats;
FuncArg *arg;
memclrw(&opnd, sizeof(Operand));
has_varargs = 0;
for (arg = expr->data.funccall.functype->args; arg; arg = arg->next) {
if (arg == &elipsis) {
has_varargs = 1;
break;
}
}
if (expr->data.funccall.functype->flags & FUNC_FLAGS_80) {
if (CMach_PassResultInHiddenArg(resultType))
node = expr->data.funccall.args->next->node;
else
node = expr->data.funccall.args->node;
}
infos = analyze_arguments(
node,
expr->data.funccall.args,
expr->data.funccall.functype->args,
used_regs,
&has_floats,
has_varargs);
if (infos)
evaluate_nested_function_calls(infos);
if (funcref->hascall) {
GEN_NODE_TO_GPR(funcref, &opnd, TYPE(&void_ptr), 0);
} else if (node && node->hascall) {
GEN_NODE_TO_GPR(node, &opnd, TYPE(&void_ptr), 0);
}
for (info = infos; info; info = info->next) {
if (info->flags & AIF_PassOnStack)
pass_in_memory(info);
}
for (info = infos; info; info = info->next) {
if ((info->flags & AIF_PassMask) == (AIF_PassInGPR | AIF_PassOnStack))
pass_in_register_and_memory(info);
}
for (info = infos; info; info = info->next) {
int flag = info->flags & AIF_PassMask;
if (
flag == AIF_PassInGPR ||
flag == AIF_PassInFPR ||
flag == AIF_PassInVR
)
pass_in_register(info);
}
if (funcref->type == EOBJREF) {
TypeClass *tclass;
SInt32 vfOffset;
if (CParser_IsVirtualFunction(funcref->data.objref, &tclass, &vfOffset)) {
load_virtual_function(
tclass,
vfOffset,
CMach_PassResultInHiddenArg(resultType) ? Register4 : Register3,
&opnd
);
branch_subroutine_indirect_ctr(&opnd, used_regs);
} else if (node) {
if (!node->hascall) {
GEN_NODE_TO_REG(node, 12, 0, &opnd);
ENSURE_GPR(&opnd, TYPE(&void_ptr), 12);
}
branch_subroutine_indirect(funcref->data.objref, &opnd, used_regs);
} else {
branch_subroutine(funcref->data.objref, needs_TOC_reload(funcref->data.objref), used_regs);
}
} else {
if (!funcref->hascall)
GEN_NODE_TO_REG(funcref, 12, 0, &opnd);
ENSURE_GPR(&opnd, TYPE(&void_ptr), 12);
branch_subroutine_indirect_ctr(&opnd, used_regs);
}
if (IS_TYPE_FLOAT(resultType)) {
output->optype = OpndType_FPR;
output->reg = used_virtual_registers[RegClass_FPR]++;
emitpcode(PC_FMR, output->reg, 1);
} else if (IS_TYPE_VECTOR(resultType)) {
output->optype = OpndType_VR;
output->reg = used_virtual_registers[RegClass_VR]++;
emitpcode(PC_VMR, output->reg, 2);
} else if (TYPE_FITS_IN_REGISTER(resultType)) {
if (resultType->size > 4) {
output->optype = OpndType_GPRPair;
output->reg = used_virtual_registers[RegClass_GPR]++;
output->regHi = used_virtual_registers[RegClass_GPR]++;
emitpcode(PC_MR, output->reg, low_reg);
emitpcode(PC_MR, output->regHi, high_reg);
} else {
output->optype = OpndType_GPR;
output->reg = used_virtual_registers[RegClass_GPR]++;
emitpcode(PC_MR, output->reg, 3);
}
} else {
output->optype = OpndType_Absolute;
output->immediate = 0;
}
}
static void branch_subroutine_indirect_ctr(Operand *addrOpnd, UInt32 *used_regs) {
if (addrOpnd->reg != 12)
emitpcode(PC_MR, 12, addrOpnd->reg);
emitpcode(PC_MTCTR, 12);
used_regs[RegClass_GPR] |= 1 << 12;
branch_subroutine_ctr(used_regs);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,536 @@
#include "compiler/PCodeListing.h"
#include "compiler/CError.h"
#include "compiler/CMangler.h"
#include "compiler/CParser.h"
#include "compiler/Alias.h"
#include "compiler/BitVectors.h"
#include "compiler/CompilerTools.h"
#include "compiler/InterferenceGraph.h"
#include "compiler/LiveInfo.h"
#include "compiler/PCode.h"
#include "compiler/PCodeAssembly.h"
#include "compiler/Registers.h"
#include "compiler/Scheduler.h"
#include "compiler/objects.h"
static FILE *pcfile;
static int ptime;
static int sourcetext;
static int sourcetext_is_main;
static int sourcelength;
int pclist_bad_operand;
static void formatdataflowset(char *name, UInt32 *vec, UInt32 size, char *format) {
UInt32 i;
UInt32 counter;
char *separator;
separator = "";
fprintf(pcfile, "%s = {", name);
for (i = 0, counter = 0; i < size; i++) {
if (bitvectorgetbit(i, vec)) {
if (i)
fprintf(pcfile, separator);
if (counter++ == 10) {
fprintf(pcfile, "\n\t\t");
counter = 0;
}
fprintf(pcfile, format, i);
separator = ",";
}
}
fprintf(pcfile, "}\n");
}
static void pclistblock(PCodeBlock *block, char *format, UInt32 vecSize) {
PCLink *link;
PCodeLabel *label;
int cpu;
int chr;
PCode *instr;
int offset;
int latency;
UInt32 opcode;
MachineInfo *mi;
char buf[500];
WeirdOperand dummyArg;
fprintf(pcfile, ":{%4.4x}::::::::::::::::::::::::::::::::::::::::LOOPWEIGHT=%" PRId32 "\n", block->flags, block->loopWeight);
fprintf(pcfile, "B%" PRId32 ": ", block->blockIndex);
fprintf(pcfile, "Successors = { ");
for (link = block->successors; link; link = link->nextLink) {
if (link->block)
fprintf(pcfile, "B%" PRId32 " ", link->block->blockIndex);
}
fprintf(pcfile, "} ");
fprintf(pcfile, "Predecessors = { ");
for (link = block->predecessors; link; link = link->nextLink) {
if (link->block)
fprintf(pcfile, "B%" PRId32 " ", link->block->blockIndex);
}
if (block->labels) {
fprintf(pcfile, "} Labels = { ");
for (label = block->labels; label; label = label->nextLabel)
fprintf(pcfile, "L%" PRId32 " ", label->index);
}
fprintf(pcfile, "}\n\n");
cpu = copts.scheduling;
if (cpu == 10) {
mi = &machine7450;
} else if (copts.altivec_model != 0 || cpu == 7) {
mi = &machine7400;
} else if (cpu == 2) {
mi = &machine603;
} else if (cpu == 5) {
mi = &machine603e;
} else if (cpu == 3) {
mi = &machine604;
} else if (cpu == 6) {
mi = &machine604;
} else if (cpu == 4) {
mi = &machine750;
} else if (cpu == 1) {
mi = &machine601;
} else if (cpu == 9) {
mi = &machine821;
} else {
mi = &machine603;
}
for (offset = block->codeOffset, instr = block->firstPCode; instr; instr = instr->nextPCode, offset += 4) {
latency = mi->latency(instr);
formatoperands(instr, buf, 1);
chr = (PCODE_FLAG_SET_F(instr) & fRecordBit) ? '.' : ' ';
if (coloring)
opcode = 0;
else
opcode = assemblepcode(instr, offset, &dummyArg);
fprintf(
pcfile,
" %.8" PRIX32 " %.8" PRIX32 " %4" PRId32 " %-7s%c %s\n",
offset, CTool_EndianConvertWord32(opcode), latency,
opcodeinfo[instr->op].name, chr, buf
);
if (instr->alias)
dumpalias(instr->alias, 0, 1, 0);
}
if (vecSize) {
fprintf(pcfile, "............................................................\n");
formatdataflowset("use", liveinfo[block->blockIndex].use, vecSize, format);
formatdataflowset("def", liveinfo[block->blockIndex].def, vecSize, format);
formatdataflowset("in ", liveinfo[block->blockIndex].in, vecSize, format);
formatdataflowset("out", liveinfo[block->blockIndex].out, vecSize, format);
}
fflush(pcfile);
if (pclist_bad_operand)
CError_FATAL(252);
}
static void pclistonoff(int flag) {
if (flag)
fprintf(pcfile, "On\n");
else
fprintf(pcfile, "Off\n");
}
void pcinitlisting() {
// unknown args, etc
}
void pccleanuplisting(void) {
#ifdef CW_ENABLE_PCODE_DEBUG
// this code is not based on the original as we don't have it
if (pcfile) {
fclose(pcfile);
pcfile = NULL;
}
#endif
}
void pclistblocks(char *name1, char *name2) {
#ifdef CW_ENABLE_PCODE_DEBUG
// this code is not based on the original as we don't have it
PCodeBlock *block;
if (copts.debuglisting) {
if (!pcfile)
pcfile = fopen("pcdump.txt", "a");
fprintf(pcfile, "\n%s\n%s\n", name1, name2);
for (block = pcbasicblocks; block; block = block->nextBlock)
pclistblock(block, NULL, 0);
}
#endif
}
void pclistdataflow() {
// unknown args
}
void pclistinterferences(char *class_format, int regcount) {
}
void pclistspill() {
// unknown args
}
void pclistcopypropitem() {
// unknown args
}
void pclistcoalesce() {
// unknown args
}
void pclistusedefs() {
// unknown args
}
void pclistpropinfo() {
// unknown args
}
static void listloop() {
// unknown args
}
static void listloops() {
// unknown args
}
void pclistloops() {
// unknown args
}
static void listswitchtables() {
// unknown args
}
void pclistswitchtables() {
// unknown args
}
void pclistdominators() {
// unknown args
}
void pclistbackedge() {
// unknown args
}
static char *GetInterferenceFlags(IGNode *node) {
char *buf;
Boolean first;
first = 1;
buf = oalloc(512);
buf[0] = 0;
if (node->flags & fSpilled) {
strcat(buf, "fSpilled");
first = 0;
}
if (node->flags & fPushed) {
if (!first)
strcat(buf, "|");
strcat(buf, "fPushed");
first = 0;
}
if (node->flags & fCoalesced) {
if (!first)
strcat(buf, "|");
strcat(buf, "fCoalesced");
first = 0;
}
if (node->flags & fCoalescedInto) {
if (!first)
strcat(buf, "|");
strcat(buf, "fCoalescedInto");
first = 0;
}
if (node->flags & fPairHigh) {
if (!first)
strcat(buf, "|");
strcat(buf, "fPairHigh");
first = 0;
}
if (node->flags & fPairLow) {
if (!first)
strcat(buf, "|");
strcat(buf, "fPairLow");
first = 0;
}
if (!*buf)
strcat(buf, "no_flags");
return buf;
}
void pclistinterferencegraphnode() {
// unknown args
}
void pclistinterferencegraph() {
// unknown args
}
void pclistblock_scheduler() {
// unknown args
}
void pclistblocks_start_scheduler(char *str1, char *str2) {
}
void pclistblocks_end_scheduler(void) {
if (pclist_bad_operand)
CError_FATAL(1318);
}
static void printheapsize() {
// unknown args
}
void pctotalheap() {
// unknown args
}
void pctotalmemory() {
// unknown args
}
void pcmessage(char *probably_a_string, ...) {
}
int formatalias(Alias *alias, char *buf, int bufSize) {
char *name;
char *typestr;
int len;
int len2;
if (bufSize < 16)
return sprintf(buf, "...");
switch (alias->type) {
case AliasType0:
case AliasType1:
name = CMangler_GetLinkName(alias->object)->name;
if (!strlen(name) || name[0] < 0)
CError_FATAL(1458);
if (strlen(name) + 16 > bufSize)
return sprintf(buf, "...");
switch (alias->object->datatype) {
case DNONLAZYPTR:
typestr = "{NL}";
break;
case DDATA:
typestr = "{RW}";
break;
case DLOCAL:
typestr = "{SP}";
break;
default:
typestr = "";
}
len = sprintf(buf, "%0.*s%s", bufSize - 20, name, typestr, alias->size);
buf += len;
if (alias->type == AliasType0)
return len;
if (alias->offset == 0)
len2 = sprintf(buf, ":%d", alias->size);
else if (alias->offset > 0)
len2 = sprintf(buf, "+%d:%d", alias->offset, alias->size);
else
len2 = sprintf(buf, "-%d:%d", -alias->offset, alias->size);
return len + len2;
case AliasType2:
len = 0;
len2 = sprintf(buf, "{");
buf += len2;
len += len2;
len2 = sprintf(buf, "*");
buf += len2;
len += len2;
len2 = sprintf(buf, "}");
buf += len2;
len += len2;
return len;
default:
CError_FATAL(1543);
return 0;
}
}
int dumpalias(Alias *alias, int len, Boolean flag1, Boolean flag2) {
char *name;
char *typestr;
AliasMember *member;
Boolean notFirst;
if (!flag2 && alias == worst_case) {
fprintf(pcfile, " ALIAS = {worst_case}");
if (flag1)
fprintf(pcfile, "\n");
return 0;
}
if (flag1) {
if (alias == worst_case)
fprintf(pcfile, "ALIAS worst_case = ");
else
fprintf(pcfile, " ALIAS = ");
}
switch (alias->type) {
case AliasType0:
case AliasType1:
name = CMangler_GetLinkName(alias->object)->name;
if (!strlen(name) || name[0] < 0)
CError_FATAL(1581);
switch (alias->object->datatype) {
case DNONLAZYPTR:
typestr = "{NL}";
break;
case DDATA:
typestr = "{RW}";
break;
case DLOCAL:
typestr = "{SP}";
break;
default:
typestr = "";
}
len += fprintf(pcfile, "%0.80s%s", name, typestr);
if (alias->type == AliasType0) {
if (flag1)
fprintf(pcfile, "\n");
return len;
}
if (alias->offset == 0)
len += fprintf(pcfile, ":%d", alias->size);
else if (alias->offset > 0)
len += fprintf(pcfile, "+%d:%d", alias->offset, alias->size);
else
len += fprintf(pcfile, "-%d:%d", -alias->offset, alias->size);
if (flag1)
fprintf(pcfile, "\n");
return len;
case AliasType2:
len += fprintf(pcfile, "{");
notFirst = 0;
for (member = alias->parents; member; member = member->nextParent) {
if (member->child->type == AliasType0) {
if (notFirst)
len += fprintf(pcfile, ",");
if (len > 60) {
fprintf(pcfile, "\n ");
len = 0;
}
len = dumpalias(member->child, len, 0, 0);
notFirst = 1;
}
}
for (member = alias->parents; member; member = member->nextParent) {
if (member->child->type != AliasType0) {
if (notFirst)
len += fprintf(pcfile, ",");
if (len > 60) {
fprintf(pcfile, "\n ");
len = 0;
}
len = dumpalias(member->child, len, 0, 0);
notFirst = 1;
}
}
len += fprintf(pcfile, "}");
if (flag1)
fprintf(pcfile, "\n");
return len;
default:
CError_FATAL(1661);
return 0;
}
}
void pcformatset() {
// unknown args
}
int GetLineEndOffset(char *str, int lineNum, int len) {
int offset;
char *work;
offset = GetLineOffset(str, lineNum, len);
if (offset < 0)
return offset;
work = str + offset;
while (*work) {
if (*work == '\n')
return work - str - 1;
work++;
}
return -1;
}
int GetLineOffset(char *str, int lineNum, int len) {
char *work = str;
char *end;
if (lineNum < 0)
return -1;
end = str + len;
while (work < end) {
if (*work == '\n' && --lineNum <= 0)
return work - str;
work++;
}
return 0;
}
void DumpSourceCode() {
// unknown args
}
int DumpIR_SrcBreak() {
// unknown args
return 0;
}

View File

@@ -0,0 +1,345 @@
#include "compiler/PCodeUtilities.h"
#include "compiler/CError.h"
#include "compiler/CFunc.h"
#include "compiler/CParser.h"
#include "compiler/CodeGen.h"
#include "compiler/Exceptions.h"
#include "compiler/PCode.h"
#include "compiler/PCodeInfo.h"
#include "compiler/Registers.h"
#include "compiler/enode.h"
#include "compiler/objects.h"
void pcsetrecordbit(PCode *pc) {
int reg;
PCodeArg *arg;
short argCount;
int argIdx;
pc->flags &= ~(fIsMove | fCommutative | fIsCSE);
if ((pc->flags & fOpTypeMask) == fOpTypeFPR) {
reg = 1;
} else if ((pc->flags & fOpTypeMask) == fOpTypeVR) {
reg = 6;
} else {
reg = 0;
}
if (pc->op == PC_ANDI || pc->op == PC_ANDIS) {
pc->flags |= fRecordBit;
} else if (pc->op == PC_ADDI || pc->op == PC_ADDIC) {
pc->flags |= fSetsCarry;
pc->flags |= fRecordBit;
change_num_operands(pc, 5);
pc->op = PC_ADDICR;
CError_ASSERT(76, pc->args[3].kind == PCOp_PLACEHOLDEROPERAND);
pc->args[3].kind = PCOp_REGISTER;
pc->args[3].arg = RegClass_SPR;
pc->args[3].data.reg.reg = 0;
pc->args[3].data.reg.effect = EffectWrite;
CError_ASSERT(80, pc->args[4].kind == PCOp_PLACEHOLDEROPERAND);
pc->args[4].kind = PCOp_REGISTER;
pc->args[4].arg = RegClass_CRFIELD;
pc->args[4].data.reg.reg = reg;
pc->args[4].data.reg.effect = EffectWrite;
} else {
arg = pc->args;
argIdx = argCount = pc->argCount;
while (arg->kind != PCOp_PLACEHOLDEROPERAND && argIdx) {
if (arg->kind == PCOp_REGISTER && arg->arg == RegClass_CRFIELD && arg->data.reg.reg == reg) {
arg->data.reg.effect |= EffectWrite;
pc->flags |= fRecordBit;
return;
}
arg++;
argIdx--;
}
if (argIdx <= 0) {
arg = &pc->args[argCount];
pc->argCount++;
}
CError_ASSERT(105, arg->kind == PCOp_PLACEHOLDEROPERAND);
arg->kind = PCOp_REGISTER;
arg->arg = RegClass_CRFIELD;
arg->data.reg.reg = reg;
arg->data.reg.effect = EffectWrite;
if (pc->op != PC_ADDICR)
pc->flags |= fRecordBit;
}
}
void pcsetsideeffects(PCode *pc) {
pc->flags &= ~(fIsMove | fCommutative | fIsCSE);
pc->flags |= fSideEffects;
}
void pcsetlinkbit(PCode *pc) {
PCodeArg *arg;
int argIdx;
switch (pc->op) {
case PC_B:
pc->op = PC_BL;
break;
case PC_BCTR:
pc->op = PC_BCTRL;
break;
case PC_BLR:
pc->op = PC_BLRL;
break;
}
arg = pc->args;
argIdx = pc->argCount;
while (arg->kind != PCOp_PLACEHOLDEROPERAND && argIdx) {
if (arg->kind == PCOp_REGISTER && arg->arg == RegClass_SPR && arg->data.reg.reg == 1) {
arg->data.reg.effect |= EffectWrite;
pc->flags |= fLink;
return;
}
arg++;
argIdx--;
}
CError_ASSERT(169, arg->kind == PCOp_PLACEHOLDEROPERAND);
arg->kind = PCOp_REGISTER;
arg->arg = RegClass_SPR;
arg->data.reg.reg = 1;
arg->data.reg.effect = EffectWrite;
if (opcodeinfo[pc->op].flags & fIsCall) {
pc->flags &= ~fIsBranch;
pc->flags |= fIsCall;
}
pc->flags |= fLink;
}
void branch_label(PCodeLabel *label) {
if (pclastblock->pcodeCount) {
pcbranch(pclastblock, label);
makepcblock();
}
pclabel(pclastblock, label);
}
void branch_conditional(short a, short compareop, short c, PCodeLabel *label) {
PCodeBlock *tmpblock;
PCodeLabel *tmplabel;
int r28;
tmpblock = pclastblock;
tmplabel = makepclabel();
switch (compareop) {
case ENOTEQU:
c = !c;
case EEQU:
r28 = 2;
break;
case EGREATEREQU:
c = !c;
case ELESS:
r28 = 0;
break;
case ELESSEQU:
c = !c;
case EGREATER:
r28 = 1;
break;
}
emitpcode(c ? PC_BT : PC_BF, a, r28, label);
pcbranch(pclastblock, label);
pcbranch(pclastblock, tmplabel);
makepcblock();
pclabel(pclastblock, tmplabel);
}
void branch_always(PCodeLabel *label) {
emitpcode(PC_B, label);
pcbranch(pclastblock, label);
makepcblock();
}
void branch_decrement_always(Opcode opcode, PCodeLabel *label) {
PCodeLabel *tmplabel = makepclabel();
emitpcode(opcode, label);
pcbranch(pclastblock, label);
pcbranch(pclastblock, tmplabel);
makepcblock();
pclabel(pclastblock, tmplabel);
}
void branch_indirect(Object *obj) {
emitpcode(PC_BCTR, obj, 0);
makepcblock();
}
int branch_count_volatiles(void) {
int count = 0;
int i;
RegClass rclass;
for (rclass = 0; rclass < RegClassMax; rclass++) {
for (i = 0; i < n_scratch_registers[rclass]; i++) {
count++;
}
}
return count;
}
PCodeArg *branch_record_volatiles(PCodeArg *arglist, UInt32 *masks) {
int i;
RegClass rclass;
for (rclass = RegClassMax - 1; rclass >= 0; rclass--) {
for (i = 0; i < n_scratch_registers[rclass]; i++) {
arglist->kind = PCOp_REGISTER;
arglist->arg = rclass;
arglist->data.reg.reg = scratch_registers[rclass][i];
arglist->data.reg.effect = EffectWrite;
if (masks[rclass] & (1 << scratch_registers[rclass][i]))
arglist->data.reg.effect |= EffectRead;
arglist++;
}
}
return arglist;
}
void branch_subroutine(Object *obj, short add_nop, UInt32 *masks) {
int count;
PCode *pc;
PCodeArg *arg;
count = branch_count_volatiles();
if (copts.exceptions && current_statement)
count += countexceptionactionregisters(current_statement->dobjstack);
pc = makepcode(PC_BL, count, obj, 0);
arg = branch_record_volatiles(pc->args + 1, masks);
if (copts.exceptions && current_statement)
noteexceptionactionregisters(current_statement->dobjstack, arg);
appendpcode(pclastblock, pc);
if (add_nop)
emitpcode(PC_NOP);
branch_label(makepclabel());
if (copts.exceptions && current_statement)
recordexceptionactions(pc, current_statement->dobjstack);
}
void branch_subroutine_ctr(UInt32 *masks) {
int count;
PCode *pc;
PCodeArg *arg;
count = branch_count_volatiles();
if (copts.exceptions && current_statement)
count += countexceptionactionregisters(current_statement->dobjstack);
pc = makepcode(PC_BCTRL, count);
arg = branch_record_volatiles(pc->args + 1, masks);
if (copts.exceptions && current_statement)
noteexceptionactionregisters(current_statement->dobjstack, arg);
appendpcode(pclastblock, pc);
branch_label(makepclabel());
if (copts.exceptions && current_statement)
recordexceptionactions(pc, current_statement->dobjstack);
}
void add_immediate(short dest_reg, short base_reg, Object *obj, SInt16 offset) {
short tmp_reg = base_reg;
if (obj && offset && obj->datatype != DLOCAL) {
tmp_reg = used_virtual_registers[RegClass_GPR]++;
add_immediate_lo(tmp_reg, base_reg, obj, 0, 1);
obj = NULL;
}
if (!obj && !offset)
emitpcode(PC_MR, dest_reg, tmp_reg);
else
emitpcode(PC_ADDI, dest_reg, tmp_reg, obj, offset);
}
PCode *add_immediate_lo(short dest_reg, short base_reg, Object *obj, SInt16 offset, char add_to_block) {
PCode *pc;
CError_ASSERT(577, obj);
pc = makepcode(PC_ADDI, dest_reg, base_reg, obj, offset);
if (add_to_block)
appendpcode(pclastblock, pc);
return pc;
}
PCode *op_absolute_ha(short dest_reg, short base_reg, Object *obj, short offset, char add_to_block) {
PCode *pc;
int tmp_reg;
if (obj->datatype == DLOCAL) {
pc = makepcode(PC_ADDIS, dest_reg, base_reg, obj, offset);
} else if (copts.codegen_pic) {
tmp_reg = base_reg;
CError_ASSERT(601, tmp_reg);
pc = makepcode(PC_ADDIS, dest_reg, tmp_reg, obj, offset);
} else {
CError_ASSERT(606, base_reg == 0);
pc = makepcode(PC_LIS, dest_reg, obj, offset);
}
if (add_to_block)
appendpcode(pclastblock, pc);
return pc;
}
void load_store_register(Opcode opcode, short dest_reg, short base_reg, Object *obj, SInt32 offset) {
short addi_tmp;
short offset_reg1;
short offset_reg2;
offset_reg1 = base_reg;
if (obj && offset && obj->datatype != DLOCAL) {
offset_reg1 = used_virtual_registers[RegClass_GPR]++;
add_immediate_lo(offset_reg1, base_reg, obj, 0, 1);
obj = NULL;
}
if (offset != (short)offset) {
if (opcode == PC_LWZ && dest_reg == 12)
offset_reg2 = 12;
else if (opcode == PC_LWZ && dest_reg == 11)
offset_reg2 = 11;
else
offset_reg2 = used_virtual_registers[RegClass_GPR]++;
emitpcode(PC_ADDIS, offset_reg2, offset_reg1, 0, (short) ((offset >> 16) + ((offset & 0x8000) >> 15)));
offset = (short) offset;
offset_reg1 = offset_reg2;
}
if (opcode == PC_STVX || opcode == PC_LVX) {
offset_reg2 = 0;
if (obj) {
addi_tmp = used_virtual_registers[RegClass_GPR]++;
emitpcode(PC_ADDI, addi_tmp, offset_reg1, obj, offset);
offset_reg1 = addi_tmp;
} else if (offset) {
offset_reg2 = used_virtual_registers[RegClass_GPR]++;
emitpcode(PC_LI, offset_reg2, offset);
}
if (!offset_reg2)
emitpcode(opcode, dest_reg, 0, offset_reg1);
else
emitpcode(opcode, dest_reg, offset_reg1, offset_reg2);
} else {
emitpcode(opcode, dest_reg, offset_reg1, obj, offset);
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,792 @@
#include "compiler/StructMoves.h"
#include "compiler/CError.h"
#include "compiler/CParser.h"
#include "compiler/CodeGen.h"
#include "compiler/Operands.h"
#include "compiler/PCode.h"
#include "compiler/PCodeUtilities.h"
#include "compiler/Registers.h"
void make_addressable(Operand *opnd, SInt32 offset, int unusedArg) {
int reg;
if (opnd->optype == OpndType_IndirectSymbol)
coerce_to_addressable(opnd);
if (opnd->optype != OpndType_IndirectGPR_ImmOffset || (opnd->immOffset + offset) > 0x7FFF) {
reg = used_virtual_registers[RegClass_GPR]++;
load_address(reg, opnd);
opnd->optype = OpndType_IndirectGPR_ImmOffset;
opnd->reg = reg;
opnd->object = NULL;
opnd->immOffset = 0;
}
}
static void load_displaced_address(Operand *opnd, SInt32 offset) {
int reg;
reg = used_virtual_registers[RegClass_GPR]++;
if (opnd->optype == OpndType_IndirectSymbol)
coerce_to_addressable(opnd);
if (opnd->optype == OpndType_IndirectGPR_ImmOffset) {
offset += opnd->immOffset;
if (!FITS_IN_SHORT(offset)) {
add_immediate(reg, opnd->reg, opnd->object, opnd->immOffset);
emitpcode(PC_ADDI, reg, reg, 0, offset - opnd->immOffset);
} else {
add_immediate(reg, opnd->reg, opnd->object, offset);
}
} else if (opnd->optype == OpndType_IndirectGPR_Indexed) {
emitpcode(PC_ADD, reg, opnd->reg, opnd->regOffset);
emitpcode(PC_ADDI, reg, reg, 0, offset);
} else {
CError_FATAL(80);
}
opnd->optype = OpndType_IndirectGPR_ImmOffset;
opnd->reg = reg;
opnd->object = NULL;
opnd->immOffset = 0;
}
static void move_block_via_load_store(Operand *dst, Operand *src, SInt32 len, SInt32 align) {
SInt32 step;
SInt32 pos;
int floatReg;
int reg;
if (src->optype == OpndType_IndirectSymbol)
coerce_to_addressable(src);
if (dst->optype == OpndType_IndirectSymbol)
coerce_to_addressable(dst);
if (len == 8) {
floatReg = used_virtual_registers[RegClass_FPR]++;
if (src->optype == OpndType_IndirectGPR_ImmOffset) {
load_store_register(PC_LFD, floatReg, src->reg, src->object, src->immOffset);
setpcodeflags(src->flags);
} else if (src->optype == OpndType_IndirectGPR_Indexed) {
emitpcode(PC_LFDX, floatReg, src->reg, src->regOffset);
setpcodeflags(src->flags);
} else {
CError_FATAL(145);
}
if (dst->optype == OpndType_IndirectGPR_ImmOffset) {
load_store_register(PC_STFD, floatReg, dst->reg, dst->object, dst->immOffset);
setpcodeflags(dst->flags);
} else if (dst->optype == OpndType_IndirectGPR_Indexed) {
emitpcode(PC_STFDX, floatReg, dst->reg, dst->regOffset);
setpcodeflags(dst->flags);
} else {
CError_FATAL(157);
}
return;
}
if (copts.misaligned_mem_access == 0 && (UInt32) align < 4) {
SInt32 tmp = (align == 0) ? 1 : (align > len) ? len : align;
step = ((UInt32) tmp > 4) ? 4 : ((UInt32) tmp <= 2) ? (UInt32) tmp : 2;
} else {
step = ((UInt32) len > 4) ? 4 : ((UInt32) len <= 2) ? len : 2;
}
if (step != len) {
if (dst->optype == OpndType_IndirectGPR_Indexed)
make_addressable(dst, len, 0);
if (src->optype == OpndType_IndirectGPR_Indexed)
make_addressable(src, len, 0);
}
for (pos = 0; len != 0; len -= step, pos += step) {
reg = used_virtual_registers[RegClass_GPR]++;
if (src->optype == OpndType_IndirectGPR_ImmOffset) {
load_store_register(
(step == 1) ? PC_LBZ : (step == 2) ? PC_LHZ : PC_LWZ,
reg,
src->reg,
src->object,
src->immOffset + pos
);
setpcodeflags(src->flags);
} else if (src->optype == OpndType_IndirectGPR_Indexed) {
emitpcode(
(step == 1) ? PC_LBZX : (step == 2) ? PC_LHZX : PC_LWZX,
reg,
src->reg,
src->regOffset
);
setpcodeflags(src->flags);
} else {
CError_FATAL(183);
}
if (dst->optype == OpndType_IndirectGPR_ImmOffset) {
load_store_register(
(step == 1) ? PC_STB : (step == 2) ? PC_STH : PC_STW,
reg,
dst->reg,
dst->object,
dst->immOffset + pos
);
setpcodeflags(dst->flags);
} else if (dst->optype == OpndType_IndirectGPR_Indexed) {
emitpcode(
(step == 1) ? PC_STBX : (step == 2) ? PC_STHX : PC_STWX,
reg,
dst->reg,
dst->regOffset
);
setpcodeflags(dst->flags);
} else {
CError_FATAL(195);
}
}
}
static void move_block_via_load_store_sequence(Operand *dst, Operand *src, SInt32 len, SInt32 align) {
SInt32 pos;
int i;
SInt32 step;
pos = 0;
make_addressable(dst, len, 0);
make_addressable(src, len, 0);
if ((align % 8) == 0) {
while (len >= 16) {
int reg1 = used_virtual_registers[RegClass_FPR]++;
int reg2 = used_virtual_registers[RegClass_FPR]++;
load_store_register(PC_LFD, reg1, src->reg, src->object, src->immOffset + pos);
setpcodeflags(src->flags);
load_store_register(PC_LFD, reg2, src->reg, src->object, src->immOffset + pos + 8);
setpcodeflags(src->flags);
load_store_register(PC_STFD, reg1, dst->reg, dst->object, dst->immOffset + pos);
setpcodeflags(dst->flags);
load_store_register(PC_STFD, reg2, dst->reg, dst->object, dst->immOffset + pos + 8);
setpcodeflags(dst->flags);
pos += 16;
len -= 16;
}
}
while (len >= 8) {
if ((align % 8) == 0) {
int reg = used_virtual_registers[RegClass_FPR]++;
load_store_register(PC_LFD, reg, src->reg, src->object, src->immOffset + pos);
setpcodeflags(src->flags);
load_store_register(PC_STFD, reg, dst->reg, dst->object, dst->immOffset + pos);
setpcodeflags(dst->flags);
pos += 8;
len -= 8;
} else {
if (copts.misaligned_mem_access == 0 && (UInt32) align < 4) {
SInt32 tmp = (align == 0) ? 1 : (align > len) ? len : align;
step = ((UInt32) tmp > 4) ? 4 : ((UInt32) tmp > 2) ? 2 : 1;
} else {
step = 4;
}
for (i = 0; i < 8; i += (step * 2)) {
int reg1 = used_virtual_registers[RegClass_GPR]++;
int reg2 = used_virtual_registers[RegClass_GPR]++;
load_store_register(
(step == 1) ? PC_LBZ : (step == 2) ? PC_LHZ : PC_LWZ,
reg1,
src->reg,
src->object,
src->immOffset + pos
);
setpcodeflags(src->flags);
load_store_register(
(step == 1) ? PC_LBZ : (step == 2) ? PC_LHZ : PC_LWZ,
reg2,
src->reg,
src->object,
src->immOffset + pos + step
);
setpcodeflags(src->flags);
load_store_register(
(step == 1) ? PC_STB : (step == 2) ? PC_STH : PC_STW,
reg1,
dst->reg,
dst->object,
dst->immOffset + pos
);
setpcodeflags(dst->flags);
load_store_register(
(step == 1) ? PC_STB : (step == 2) ? PC_STH : PC_STW,
reg2,
dst->reg,
dst->object,
dst->immOffset + pos + step
);
setpcodeflags(dst->flags);
pos += (step * 2);
len -= (step * 2);
}
}
}
while (len) {
int reg;
if (copts.misaligned_mem_access == 0 && (UInt32) align < 4) {
SInt32 tmp = (align == 0) ? 1 : (align > len) ? len : align;
step = ((UInt32) tmp > 4) ? 4 : ((UInt32) tmp <= 2) ? (UInt32) tmp : 2;
} else {
step = ((UInt32) len > 4) ? 4 : ((UInt32) len <= 2) ? len : 2;
}
reg = used_virtual_registers[RegClass_GPR]++;
load_store_register(
(step == 1) ? PC_LBZ : (step == 2) ? PC_LHZ : PC_LWZ,
reg,
src->reg,
src->object,
src->immOffset + pos
);
setpcodeflags(src->flags);
load_store_register(
(step == 1) ? PC_STB : (step == 2) ? PC_STH : PC_STW,
reg,
dst->reg,
dst->object,
dst->immOffset + pos
);
setpcodeflags(dst->flags);
len -= step;
pos += step;
}
}
static void move_block_via_inline_loop(Operand *dst, Operand *src, SInt32 len, SInt32 align) {
PCodeLabel *label; // r25
SInt32 pos; // r25
SInt32 step; // r24
int reg1; // r22
int reg2; // r23
SInt32 remainder; // r23
label = makepclabel();
if (copts.misaligned_mem_access == 0 && (UInt32) align < 4) {
SInt32 tmp = (align == 0) ? 1 : (align > len) ? len : align;
step = ((UInt32) tmp > 4) ? 4 : ((UInt32) tmp <= 2) ? (UInt32) tmp : 2;
} else {
step = 4;
}
load_displaced_address(dst, -step);
load_displaced_address(src, -step);
CError_ASSERT(377, (len / step) != 0);
reg1 = used_virtual_registers[RegClass_GPR]++;
load_immediate(reg1, len / (step * 2));
emitpcode(PC_MTCTR, reg1);
branch_label(label);
reg1 = used_virtual_registers[RegClass_GPR]++;
reg2 = used_virtual_registers[RegClass_GPR]++;
load_store_register(
(step == 1) ? PC_LBZ : (step == 2) ? PC_LHZ : PC_LWZ,
reg1,
src->reg,
NULL,
step
);
setpcodeflags(src->flags);
load_store_register(
(step == 1) ? PC_LBZU : (step == 2) ? PC_LHZU : PC_LWZU,
reg2,
src->reg,
NULL,
step * 2
);
setpcodeflags(src->flags);
load_store_register(
(step == 1) ? PC_STB : (step == 2) ? PC_STH : PC_STW,
reg1,
dst->reg,
NULL,
step
);
setpcodeflags(dst->flags);
load_store_register(
(step == 1) ? PC_STBU : (step == 2) ? PC_STHU : PC_STWU,
reg2,
dst->reg,
NULL,
step * 2
);
setpcodeflags(dst->flags);
branch_decrement_always(PC_BDNZ, label);
for (remainder = len & 7, pos = step; remainder != 0; remainder -= step, pos += step) {
int reg;
if (copts.misaligned_mem_access == 0 && (UInt32) align < 4) {
SInt32 tmp = (align == 0) ? 1 : (align > remainder) ? remainder : align;
step = ((UInt32) tmp > 4) ? 4 : ((UInt32) tmp <= 2) ? (UInt32) tmp : 2;
} else {
step = ((UInt32) remainder > 4) ? 4 : ((UInt32) remainder <= 2) ? remainder : 2;
}
reg = used_virtual_registers[RegClass_GPR]++;
load_store_register(
(step == 1) ? PC_LBZ : (step == 2) ? PC_LHZ : PC_LWZ,
reg,
src->reg,
NULL,
pos
);
setpcodeflags(src->flags);
load_store_register(
(step == 1) ? PC_STB : (step == 2) ? PC_STH : PC_STW,
reg,
dst->reg,
NULL,
pos
);
setpcodeflags(dst->flags);
}
}
void move_block(Operand *dst, Operand *src, SInt32 len, SInt32 align) {
Operand myDst;
myDst = *dst;
CError_ASSERT(447, myDst.optype >= OpndType_IndirectGPR_ImmOffset);
CError_ASSERT(449, src->optype >= OpndType_IndirectGPR_ImmOffset);
if (len == 1 || len == 2 || len == 4)
move_block_via_load_store(&myDst, src, len, align);
else if (len == 8 && align == 8)
move_block_via_load_store(&myDst, src, len, align);
else if (len <= 16 || (copts.optimizesize == 0 && len <= 64))
move_block_via_load_store_sequence(&myDst, src, len, align);
else
move_block_via_inline_loop(&myDst, src, len, align);
}
static void load_word_of_small_struct(short dstReg, short srcReg, Operand *opnd, SInt32 offset, SInt32 len, SInt32 align) {
short tmpReg;
short extra = 0;
switch (len) {
case 1:
tmpReg = used_virtual_registers[RegClass_GPR]++;
load_store_register(PC_LBZ, tmpReg, srcReg, opnd->object, offset);
setpcodeflags(opnd->flags);
emitpcode(PC_RLWINM, dstReg, tmpReg, 24, 0, 7);
setpcodeflags(opnd->flags);
break;
case 2:
case 3:
if (align > 1) {
tmpReg = used_virtual_registers[RegClass_GPR]++;
load_store_register(PC_LHZ, tmpReg, srcReg, opnd->object, offset);
extra += 2;
setpcodeflags(opnd->flags);
emitpcode(PC_RLWINM, dstReg, tmpReg, 16, 0, 15);
setpcodeflags(opnd->flags);
} else {
tmpReg = used_virtual_registers[RegClass_GPR]++;
load_store_register(PC_LBZ, tmpReg, srcReg, opnd->object, offset);
setpcodeflags(opnd->flags);
emitpcode(PC_RLWINM, dstReg, tmpReg, 24, 0, 7);
setpcodeflags(opnd->flags);
load_store_register(PC_LBZ, tmpReg, srcReg, opnd->object, offset + 1);
extra += 2;
setpcodeflags(opnd->flags);
emitpcode(PC_RLWIMI, dstReg, tmpReg, 16, 8, 15);
setpcodeflags(opnd->flags);
}
if (len == 3) {
load_store_register(PC_LBZ, tmpReg, srcReg, opnd->object, offset + extra);
setpcodeflags(opnd->flags);
emitpcode(PC_RLWIMI, dstReg, tmpReg, 8, 16, 23);
setpcodeflags(opnd->flags);
}
break;
case 4:
if (align > 2) {
load_store_register(PC_LWZ, dstReg, srcReg, opnd->object, offset);
setpcodeflags(opnd->flags);
} else if (align > 1) {
tmpReg = used_virtual_registers[RegClass_GPR]++;
load_store_register(PC_LHZ, tmpReg, srcReg, opnd->object, offset);
setpcodeflags(opnd->flags);
emitpcode(PC_RLWINM, dstReg, tmpReg, 16, 0, 15);
setpcodeflags(opnd->flags);
load_store_register(PC_LHZ, tmpReg, srcReg, opnd->object, offset + 2);
setpcodeflags(opnd->flags);
emitpcode(PC_RLWIMI, dstReg, tmpReg, 0, 16, 31);
setpcodeflags(opnd->flags);
} else {
tmpReg = used_virtual_registers[RegClass_GPR]++;
load_store_register(PC_LBZ, tmpReg, srcReg, opnd->object, offset);
setpcodeflags(opnd->flags);
emitpcode(PC_RLWINM, dstReg, tmpReg, 24, 0, 7);
setpcodeflags(opnd->flags);
load_store_register(PC_LBZ, tmpReg, srcReg, opnd->object, offset + 1);
setpcodeflags(opnd->flags);
emitpcode(PC_RLWIMI, dstReg, tmpReg, 16, 8, 15);
setpcodeflags(opnd->flags);
load_store_register(PC_LBZ, tmpReg, srcReg, opnd->object, offset + 2);
setpcodeflags(opnd->flags);
emitpcode(PC_RLWIMI, dstReg, tmpReg, 8, 16, 23);
setpcodeflags(opnd->flags);
load_store_register(PC_LBZ, tmpReg, srcReg, opnd->object, offset + 3);
setpcodeflags(opnd->flags);
emitpcode(PC_RLWIMI, dstReg, tmpReg, 0, 24, 31);
setpcodeflags(opnd->flags);
}
break;
}
}
void load_small_block_into_reg(short dstReg, Operand *srcOpnd, Type *type, SInt32 align) {
short finalReg;
short tmpReg;
SInt32 absAddress;
coerce_to_addressable(srcOpnd);
if (srcOpnd->optype == OpndType_IndirectGPR_Indexed) {
CError_FATAL(557);
tmpReg = used_virtual_registers[RegClass_GPR]++;
load_address(tmpReg, srcOpnd);
srcOpnd->optype = OpndType_IndirectGPR_ImmOffset;
srcOpnd->reg = tmpReg;
srcOpnd->object = NULL;
srcOpnd->immOffset = 0;
}
if (copts.misaligned_mem_access)
align = 4;
switch (srcOpnd->optype) {
case OpndType_GPRPair:
return;
case OpndType_GPR:
return;
case OpndType_GPR_ImmOffset:
finalReg = dstReg ? dstReg : used_virtual_registers[RegClass_GPR]++;
add_immediate(finalReg, srcOpnd->reg, srcOpnd->object, srcOpnd->immOffset);
break;
case OpndType_GPR_Indexed:
finalReg = dstReg ? dstReg : used_virtual_registers[RegClass_GPR]++;
emitpcode(PC_ADD, finalReg, srcOpnd->reg, srcOpnd->regOffset);
break;
case OpndType_Absolute:
finalReg = dstReg ? dstReg : used_virtual_registers[RegClass_GPR]++;
absAddress = srcOpnd->immediate;
if (FITS_IN_SHORT(absAddress)) {
emitpcode(PC_LI, finalReg, absAddress);
} else {
tmpReg = finalReg;
if (copts.optimizationlevel > 1 && absAddress)
tmpReg = used_virtual_registers[RegClass_GPR]++;
emitpcode(PC_LIS, tmpReg, 0, HIGH_PART(absAddress));
if (absAddress)
emitpcode(PC_ADDI, finalReg, tmpReg, 0, LOW_PART(absAddress));
}
break;
case OpndType_IndirectGPR_ImmOffset:
finalReg = dstReg ? dstReg : used_virtual_registers[RegClass_GPR]++;
load_word_of_small_struct(finalReg, srcOpnd->reg, srcOpnd, srcOpnd->immOffset, type->size, align);
break;
default:
CError_FATAL(606);
}
srcOpnd->optype = OpndType_GPR;
srcOpnd->reg = finalReg;
}
void load_small_block_into_reg_pair(short dstRegLo, short dstRegHi, Operand *srcOpnd, Type *type, SInt32 align) {
short finalRegLo;
short finalRegHi;
short tmpRegLo;
short tmpRegHi;
short tmpReg;
SInt32 absAddress;
finalRegHi = -1;
coerce_to_addressable(srcOpnd);
if (srcOpnd->optype == OpndType_IndirectGPR_Indexed) {
CError_FATAL(624);
tmpReg = used_virtual_registers[RegClass_GPR]++;
load_address(tmpReg, srcOpnd);
srcOpnd->optype = OpndType_IndirectGPR_ImmOffset;
srcOpnd->reg = tmpReg;
srcOpnd->object = NULL;
srcOpnd->immOffset = 0;
}
if (copts.misaligned_mem_access)
align = 4;
switch (srcOpnd->optype) {
case OpndType_GPRPair:
if (dstRegLo != 0 && dstRegHi == 0)
dstRegHi = used_virtual_registers[RegClass_GPR]++;
if (dstRegHi != 0 && dstRegLo == 0)
dstRegLo = used_virtual_registers[RegClass_GPR]++;
if (srcOpnd->reg != dstRegLo || srcOpnd->regHi != dstRegHi) {
tmpRegLo = dstRegLo ? dstRegLo : srcOpnd->reg;
tmpRegHi = dstRegHi ? dstRegHi : srcOpnd->regHi;
if (tmpRegLo != srcOpnd->reg) {
if (tmpRegLo == srcOpnd->regHi) {
CError_ASSERT(657, tmpRegLo != tmpRegHi);
emitpcode(PC_MR, tmpRegHi, srcOpnd->regHi);
emitpcode(PC_MR, tmpRegLo, srcOpnd->reg);
} else {
emitpcode(PC_MR, tmpRegLo, srcOpnd->reg);
if (srcOpnd->regHi != tmpRegHi)
emitpcode(PC_MR, tmpRegHi, srcOpnd->regHi);
}
} else if (tmpRegHi != srcOpnd->regHi) {
if (tmpRegHi == srcOpnd->reg) {
CError_ASSERT(671, tmpRegLo != tmpRegHi);
emitpcode(PC_MR, tmpRegLo, srcOpnd->reg);
emitpcode(PC_MR, tmpRegHi, srcOpnd->regHi);
} else {
emitpcode(PC_MR, tmpRegHi, srcOpnd->regHi);
if (srcOpnd->reg != tmpRegLo)
emitpcode(PC_MR, tmpRegLo, srcOpnd->reg);
}
}
}
finalRegLo = srcOpnd->reg;
finalRegHi = srcOpnd->regHi;
break;
case OpndType_GPR:
CError_FATAL(688);
break;
case OpndType_GPR_ImmOffset:
CError_FATAL(691);
break;
case OpndType_GPR_Indexed:
CError_FATAL(694);
break;
case OpndType_Absolute:
finalRegLo = dstRegLo ? dstRegLo : used_virtual_registers[RegClass_GPR]++;
absAddress = srcOpnd->immediate;
if (FITS_IN_SHORT(absAddress)) {
emitpcode(PC_LI, finalRegLo, absAddress);
} else {
tmpReg = finalRegLo;
if (copts.optimizationlevel > 1 && absAddress)
tmpReg = used_virtual_registers[RegClass_GPR]++;
emitpcode(PC_LIS, tmpReg, 0, HIGH_PART(absAddress));
if (absAddress)
emitpcode(PC_ADDI, finalRegLo, tmpReg, 0, LOW_PART(absAddress));
}
finalRegHi = dstRegHi ? dstRegHi : used_virtual_registers[RegClass_GPR]++;
if (is_unsigned(type) || absAddress >= 0)
load_immediate(finalRegHi, 0);
else
load_immediate(finalRegHi, -1);
break;
case OpndType_IndirectGPR_ImmOffset:
finalRegLo = dstRegLo ? dstRegLo : used_virtual_registers[RegClass_GPR]++;
finalRegHi = dstRegHi ? dstRegHi : used_virtual_registers[RegClass_GPR]++;
if (srcOpnd->reg == finalRegHi) {
if (srcOpnd->reg == finalRegLo) {
CError_FATAL(726);
} else {
load_word_of_small_struct(
finalRegLo, srcOpnd->reg, srcOpnd,
srcOpnd->immOffset + low_offset, type->size - 4, align);
load_word_of_small_struct(
finalRegHi, srcOpnd->reg, srcOpnd,
srcOpnd->immOffset + high_offset, 4, align);
}
} else {
load_word_of_small_struct(
finalRegHi, srcOpnd->reg, srcOpnd,
srcOpnd->immOffset + high_offset, 4, align);
load_word_of_small_struct(
finalRegLo, srcOpnd->reg, srcOpnd,
srcOpnd->immOffset + low_offset, type->size - 4, align);
}
break;
default:
CError_FATAL(737);
}
if (finalRegHi == -1) {
CError_FATAL(741);
} else {
srcOpnd->optype = OpndType_GPRPair;
srcOpnd->reg = finalRegLo;
srcOpnd->regHi = finalRegHi;
}
}
static void store_word_of_small_struct(short srcReg, short dstReg, Operand *opnd, SInt32 offset, SInt32 len, SInt32 align) {
short tmpReg;
short extra = 0;
switch (len) {
case 1:
tmpReg = used_virtual_registers[RegClass_GPR]++;
emitpcode(PC_RLWINM, tmpReg, srcReg, 8, 24, 31);
setpcodeflags(opnd->flags);
load_store_register(PC_STB, tmpReg, dstReg, opnd->object, offset);
setpcodeflags(opnd->flags);
break;
case 2:
case 3:
if (align > 1) {
tmpReg = used_virtual_registers[RegClass_GPR]++;
emitpcode(PC_RLWINM, tmpReg, srcReg, 16, 16, 31);
setpcodeflags(opnd->flags);
load_store_register(PC_STH, tmpReg, dstReg, opnd->object, offset);
extra += 2;
setpcodeflags(opnd->flags);
} else {
tmpReg = used_virtual_registers[RegClass_GPR]++;
emitpcode(PC_RLWINM, tmpReg, srcReg, 8, 24, 31);
setpcodeflags(opnd->flags);
load_store_register(PC_STB, tmpReg, dstReg, opnd->object, offset);
setpcodeflags(opnd->flags);
emitpcode(PC_RLWINM, tmpReg, srcReg, 16, 24, 31);
setpcodeflags(opnd->flags);
load_store_register(PC_STB, tmpReg, dstReg, opnd->object, offset + 1);
extra += 2;
setpcodeflags(opnd->flags);
}
if (len == 3) {
emitpcode(PC_RLWINM, tmpReg, srcReg, 24, 24, 31);
setpcodeflags(opnd->flags);
load_store_register(PC_STB, tmpReg, dstReg, opnd->object, offset + extra);
setpcodeflags(opnd->flags);
}
break;
case 4:
if (align > 2) {
load_store_register(PC_STW, srcReg, dstReg, opnd->object, offset);
setpcodeflags(opnd->flags);
} else if (align > 1) {
tmpReg = used_virtual_registers[RegClass_GPR]++;
emitpcode(PC_RLWINM, tmpReg, srcReg, 16, 16, 31);
setpcodeflags(opnd->flags);
load_store_register(PC_STH, tmpReg, dstReg, opnd->object, offset);
setpcodeflags(opnd->flags);
load_store_register(PC_STH, srcReg, dstReg, opnd->object, offset + 2);
setpcodeflags(opnd->flags);
} else {
tmpReg = used_virtual_registers[RegClass_GPR]++;
emitpcode(PC_RLWINM, tmpReg, srcReg, 8, 24, 31);
setpcodeflags(opnd->flags);
load_store_register(PC_STB, tmpReg, dstReg, opnd->object, offset);
setpcodeflags(opnd->flags);
emitpcode(PC_RLWINM, tmpReg, srcReg, 16, 24, 31);
setpcodeflags(opnd->flags);
load_store_register(PC_STB, tmpReg, dstReg, opnd->object, offset + 1);
setpcodeflags(opnd->flags);
emitpcode(PC_RLWINM, tmpReg, srcReg, 24, 24, 31);
setpcodeflags(opnd->flags);
load_store_register(PC_STB, tmpReg, dstReg, opnd->object, offset + 2);
setpcodeflags(opnd->flags);
load_store_register(PC_STB, srcReg, dstReg, opnd->object, offset + 3);
setpcodeflags(opnd->flags);
}
break;
}
}
void store_small_block_from_reg(short srcReg, Operand *dstOpnd, Type *type, SInt32 align) {
short tmpReg;
coerce_to_addressable(dstOpnd);
if (dstOpnd->optype == OpndType_IndirectGPR_Indexed) {
CError_FATAL(839);
tmpReg = used_virtual_registers[RegClass_GPR]++;
load_address(tmpReg, dstOpnd);
dstOpnd->optype = OpndType_IndirectGPR_ImmOffset;
dstOpnd->reg = tmpReg;
dstOpnd->object = NULL;
dstOpnd->immOffset = 0;
}
if (copts.misaligned_mem_access)
align = 4;
store_word_of_small_struct(srcReg, dstOpnd->reg, dstOpnd, dstOpnd->immOffset, type->size, align);
}
void store_small_block_from_reg_pair(short srcRegLo, short srcRegHi, Operand *dstOpnd, Type *type, SInt32 align) {
short tmpReg;
coerce_to_addressable(dstOpnd);
if (dstOpnd->optype == OpndType_IndirectGPR_Indexed) {
CError_FATAL(860);
tmpReg = used_virtual_registers[RegClass_GPR]++;
load_address(tmpReg, dstOpnd);
dstOpnd->optype = OpndType_IndirectGPR_ImmOffset;
dstOpnd->reg = tmpReg;
dstOpnd->object = NULL;
dstOpnd->immOffset = 0;
}
if (copts.misaligned_mem_access)
align = 4;
store_word_of_small_struct(
srcRegLo, dstOpnd->reg, dstOpnd,
dstOpnd->immOffset + low_offset, type->size - 4, align);
store_word_of_small_struct(
srcRegHi, dstOpnd->reg, dstOpnd,
dstOpnd->immOffset + high_offset, 4, align);
}

View File

@@ -0,0 +1,518 @@
#include "compiler/Switch.h"
#include "compiler/CError.h"
#include "compiler/CFunc.h"
#include "compiler/CInt64.h"
#include "compiler/CParser.h"
#include "compiler/InstrSelection.h"
#include "compiler/ObjGenMachO.h"
#include "compiler/Operands.h"
#include "compiler/PCode.h"
#include "compiler/PCodeUtilities.h"
#include "compiler/RegisterInfo.h"
#include "compiler/TOC.h"
#include "compiler/CompilerTools.h"
#include "compiler/objects.h"
ObjectList *switchtables;
static SwitchCase **caselabels;
static CaseRange *caseranges;
static SInt32 ncases;
static SInt32 nranges_minus1;
static CInt64 min;
static CInt64 max;
static CInt64 first;
static short selector_gpr;
static short selector_gprHi;
static Type *selector_type;
static PCodeLabel *defaultlabel;
static CInt64 range;
static int compare_cases(const void *a, const void *b) {
const SwitchCase **casea = (const SwitchCase **) a;
const SwitchCase **caseb = (const SwitchCase **) b;
if (CInt64_Less((*casea)->min, (*caseb)->min))
return -1;
if (CInt64_Greater((*casea)->min, (*caseb)->min))
return 1;
return 0;
}
static void build_case_ranges(Type *type, SwitchCase *cases, CLabel *label) {
SwitchCase **caseptr;
SInt32 i;
SwitchCase *curcase;
CaseRange *currange;
if (type->size == 8) {
min.lo = 0;
min.hi = 0x80000000;
max.lo = 0xFFFFFFFF;
max.hi = 0x7FFFFFFF;
} else if (type->size == 4) {
CInt64_SetLong(&min, 0x80000000);
CInt64_SetLong(&max, 0x7FFFFFFF);
} else if (is_unsigned(type)) {
min.hi = 0;
min.lo = 0;
max.hi = 0;
max.lo = 0xFFFF;
} else {
CInt64_SetLong(&min, -0x8000);
CInt64_SetLong(&max, 0x7FFF);
}
caselabels = lalloc(sizeof(SwitchCase *) * ncases);
caseptr = caselabels;
while (cases) {
*caseptr = cases;
cases = cases->next;
++caseptr;
}
caseranges = lalloc(((ncases * 2) + 2) * sizeof(CaseRange));
if (type->size < 8) {
for (i = 0; i < ncases; i++)
CInt64_SetLong(&caselabels[i]->min, caselabels[i]->min.lo);
}
qsort(caselabels, ncases, sizeof(SwitchCase *), &compare_cases);
currange = caseranges;
currange->min = min;
currange->range = CInt64_Sub(max, min);
currange->label = label->pclabel;
for (i = 0; i < ncases; i++) {
curcase = caselabels[i];
if (CInt64_GreaterEqual(curcase->min, min) && CInt64_LessEqual(curcase->min, max)) {
if (CInt64_Equal(currange->min, min))
first = curcase->min;
range = CInt64_Sub(curcase->min, first);
if (CInt64_Greater(curcase->min, currange->min)) {
currange->range = CInt64_Sub(CInt64_Sub(curcase->min, currange->min), cint64_one);
(++currange)->min = curcase->min;
} else if (CInt64_Greater(currange->min, min) && curcase->label->pclabel == currange[-1].label) {
currange[-1].range = CInt64_Add(currange[-1].range, cint64_one);
if (CInt64_Equal(currange->range, cint64_zero)) {
currange--;
} else {
currange->min = CInt64_Add(currange->min, cint64_one);
currange->range = CInt64_Sub(currange->range, cint64_one);
}
continue;
}
currange->range = cint64_zero;
currange->label = curcase->label->pclabel;
if (CInt64_Less(curcase->min, max)) {
currange++;
currange->min = CInt64_Add(curcase->min, cint64_one);
currange->range = CInt64_Sub(max, currange->min);
currange->label = label->pclabel;
}
}
}
nranges_minus1 = currange - caseranges;
}
static void treecompare(SInt32 start, SInt32 end) {
SInt32 r30;
SInt32 r29;
CaseRange *currange;
int count;
count = end - start;
CError_ASSERT(175, selector_type->size <= 4);
r29 = start + (count >> 1) + 1;
currange = caseranges + r29;
if (CInt64_Equal(currange[-1].range, cint64_zero) && (!(count & 1) || (CInt64_NotEqual(currange->range, cint64_zero) && count > 1))) {
currange--;
r29--;
}
r30 = r29 - 1;
if (selector_type->size < 4 && is_unsigned(selector_type)) {
emitpcode(PC_CMPLI, 0, selector_gpr, CInt64_GetULong(&currange->min));
} else if (FITS_IN_SHORT((SInt32) CInt64_GetULong(&currange->min))) {
emitpcode(PC_CMPI, 0, selector_gpr, CInt64_GetULong(&currange->min));
} else {
SInt32 value = CInt64_GetULong(&currange->min);
int reg = ALLOC_GPR();
load_immediate(reg, value);
emitpcode(PC_CMP, 0, selector_gpr, reg);
}
if (CInt64_Equal(currange->range, cint64_zero) && r29 < end) {
branch_conditional(0, EEQU, 1, currange->label);
r29++;
}
if (r29 == end) {
if (start == r30) {
if (caseranges[start].label == caseranges[end].label) {
branch_always(caseranges[start].label);
} else {
branch_conditional(0, EGREATEREQU, 1, caseranges[end].label);
branch_always(caseranges[start].label);
}
} else {
branch_conditional(0, EGREATEREQU, 1, caseranges[end].label);
treecompare(start, r30);
}
} else {
if (start == r30) {
branch_conditional(0, ELESS, 1, caseranges[start].label);
treecompare(r29, end);
} else {
PCodeLabel *label = makepclabel();
branch_conditional(0, EGREATEREQU, 1, label);
treecompare(start, r30);
branch_label(label);
treecompare(r29, end);
}
}
}
static void I8_treecompare(SInt32 start, SInt32 end) {
SInt32 r30;
SInt32 r29;
CaseRange *currange;
int count;
count = end - start;
r29 = start + (count >> 1) + 1;
currange = caseranges + r29;
if (CInt64_Equal(currange[-1].range, cint64_zero) && (!(count & 1) || (CInt64_NotEqual(currange->range, cint64_zero) && count > 1))) {
currange--;
r29--;
}
r30 = r29 - 1;
if (CInt64_Equal(currange->range, cint64_zero) && r29 < end) {
short a = ALLOC_GPR();
short b = ALLOC_GPR();
load_immediate(a, currange->min.lo);
load_immediate(b, currange->min.hi);
emitpcode(PC_XOR, a, selector_gpr, a);
emitpcode(PC_XOR, b, selector_gprHi, b);
emitpcode(PC_OR, b, a, b);
emitpcode(PC_CMPI, 0, b, 0);
branch_conditional(0, EEQU, 1, currange->label);
r29++;
}
if (r29 == end) {
if (start == r30) {
if (caseranges[start].label == caseranges[end].label) {
branch_always(caseranges[start].label);
} else {
short a = ALLOC_GPR();
short b = ALLOC_GPR();
short c = ALLOC_GPR();
short d = ALLOC_GPR();
load_immediate(a, currange->min.lo);
load_immediate(b, currange->min.hi);
if (TYPE_INTEGRAL(selector_type)->integral != IT_ULONGLONG && TYPE_INTEGRAL(selector_type)->integral != IT_ULONGLONG) {
emitpcode(PC_XORIS, c, selector_gprHi, 0x8000);
emitpcode(PC_XORIS, d, b, 0x8000);
} else {
c = selector_gprHi;
d = b;
}
emitpcode(PC_SUBFC, a, a, selector_gpr);
emitpcode(PC_SUBFE, b, d, c);
emitpcode(PC_SUBFE, b, a, a);
emitpcode(PC_NEG, b, b);
emitpcode(PC_CMPI, 0, b, 0);
branch_conditional(0, EEQU, 1, caseranges[end].label);
branch_always(caseranges[start].label);
}
} else {
short a = ALLOC_GPR();
short b = ALLOC_GPR();
short c = ALLOC_GPR();
short d = ALLOC_GPR();
load_immediate(a, currange->min.lo);
load_immediate(b, currange->min.hi);
if (TYPE_INTEGRAL(selector_type)->integral != IT_ULONGLONG && TYPE_INTEGRAL(selector_type)->integral != IT_ULONGLONG) {
emitpcode(PC_XORIS, c, selector_gprHi, 0x8000);
emitpcode(PC_XORIS, d, b, 0x8000);
} else {
c = selector_gprHi;
d = b;
}
emitpcode(PC_SUBFC, a, a, selector_gpr);
emitpcode(PC_SUBFE, b, d, c);
emitpcode(PC_SUBFE, b, a, a);
emitpcode(PC_NEG, b, b);
emitpcode(PC_CMPI, 0, b, 0);
branch_conditional(0, EEQU, 1, caseranges[end].label);
I8_treecompare(start, r30);
}
} else {
if (start == r30) {
short a = ALLOC_GPR();
short b = ALLOC_GPR();
short c = ALLOC_GPR();
short d = ALLOC_GPR();
load_immediate(a, currange->min.lo);
load_immediate(b, currange->min.hi);
if (TYPE_INTEGRAL(selector_type)->integral != IT_ULONGLONG && TYPE_INTEGRAL(selector_type)->integral != IT_ULONGLONG) {
emitpcode(PC_XORIS, c, selector_gprHi, 0x8000);
emitpcode(PC_XORIS, d, b, 0x8000);
} else {
c = selector_gprHi;
d = b;
}
emitpcode(PC_SUBFC, a, selector_gpr, a);
emitpcode(PC_SUBFE, b, c, d);
emitpcode(PC_SUBFE, b, a, a);
emitpcode(PC_NEG, b, b);
emitpcode(PC_CMPI, 0, b, 0);
branch_conditional(0, ENOTEQU, 1, caseranges[end].label);
I8_treecompare(r29, end);
} else {
PCodeLabel *label;
short a = ALLOC_GPR();
short b = ALLOC_GPR();
short c = ALLOC_GPR();
short d = ALLOC_GPR();
load_immediate(a, currange->min.lo);
load_immediate(b, currange->min.hi);
if (TYPE_INTEGRAL(selector_type)->integral != IT_ULONGLONG && TYPE_INTEGRAL(selector_type)->integral != IT_ULONGLONG) {
emitpcode(PC_XORIS, c, selector_gprHi, 0x8000);
emitpcode(PC_XORIS, d, b, 0x8000);
} else {
c = selector_gprHi;
d = b;
}
emitpcode(PC_SUBFC, a, a, selector_gpr);
emitpcode(PC_SUBFE, b, d, c);
emitpcode(PC_SUBFE, b, a, a);
emitpcode(PC_NEG, b, b);
emitpcode(PC_CMPI, 0, b, 0);
label = makepclabel();
branch_conditional(0, EEQU, 1, label);
I8_treecompare(start, r30);
branch_label(label);
I8_treecompare(r29, end);
}
}
}
static void generate_tree(ENode *expr) {
Operand op;
memclrw(&op, sizeof(Operand));
if (TYPE_IS_8BYTES(expr->rtype)) {
GEN_NODE(expr, &op);
coerce_to_register_pair(&op, expr->rtype, 0, 0);
selector_type = expr->rtype;
selector_gpr = op.reg;
selector_gprHi = op.regHi;
I8_treecompare(0, nranges_minus1);
} else {
GEN_NODE(expr, &op);
if (expr->rtype->size < 4)
extend32(&op, expr->rtype, 0);
ENSURE_GPR(&op, expr->rtype, 0);
selector_type = expr->rtype;
selector_gpr = op.reg;
treecompare(0, nranges_minus1);
}
}
static Object *create_switch_table(void) {
Object *obj;
ObjectList *list;
UInt32 *outptr;
CaseRange *currange;
SInt32 size;
CInt64 value;
obj = galloc(sizeof(Object));
list = galloc(sizeof(ObjectList));
memclrw(obj, sizeof(Object));
memclrw(list, sizeof(ObjectList));
obj->otype = OT_OBJECT;
obj->access = ACCESSPUBLIC;
obj->datatype = DDATA;
obj->name = CParser_GetUniqueName();
obj->toc = NULL;
obj->sclass = TK_STATIC;
obj->qual = Q_CONST;
obj->flags |= OBJECT_FLAGS_2 | OBJECT_DEFINED;
obj->u.data.linkname = obj->name;
obj->type = NULL;
createIndirect(obj, 0, 0);
obj->type = TYPE(&void_ptr);
size = CInt64_GetULong(&range) + 1;
obj->u.data.u.switchtable.size = size;
obj->u.data.u.switchtable.data = lalloc(4 * size);
currange = caseranges;
outptr = (UInt32 *) obj->u.data.u.switchtable.data;
value = cint64_zero;
while (CInt64_LessEqual(value, range)) {
while (CInt64_Greater(CInt64_Add(first, value), CInt64_Add(currange->min, currange->range)))
currange++;
*outptr = CTool_CreateIndexFromPointer(currange->label);
value = CInt64_Add(value, cint64_one);
outptr++;
}
list->object = obj;
list->next = switchtables;
switchtables = list;
return list->object;
}
static void generate_table(ENode *expr, SwitchInfo *info) {
Object *table;
SwitchCase *curcase;
short reg;
short reg2;
short reg3;
Operand op1;
Operand op2;
CInt64 val3 = {0, 3};
memclrw(&op1, sizeof(Operand));
memclrw(&op2, sizeof(Operand));
if (CInt64_Greater(first, cint64_zero) && CInt64_Less(first, val3)) {
range = CInt64_Add(range, first);
first = cint64_zero;
}
table = create_switch_table();
CError_ASSERT(553, !TYPE_IS_8BYTES(expr->rtype));
GEN_NODE(expr, &op1);
if (expr->rtype->size < 4)
extend32(&op1, expr->rtype, 0);
ENSURE_GPR(&op1, expr->rtype, 0);
reg = op1.reg;
if (CInt64_NotEqual(first, cint64_zero)) {
SInt32 value;
reg = ALLOC_GPR();
value = -CInt64_GetULong(&first);
if (!FITS_IN_SHORT(value)) {
emitpcode(PC_ADDIS, reg, op1.reg, 0, HIGH_PART(value));
if (value)
emitpcode(PC_ADDI, reg, reg, 0, LOW_PART(value));
} else {
emitpcode(PC_ADDI, reg, op1.reg, 0, value);
}
}
if (!FITS_IN_SHORT(CInt64_GetULong(&range))) {
short tmp = ALLOC_GPR();
load_immediate(tmp, CInt64_GetULong(&range));
emitpcode(PC_CMPL, 0, reg, tmp);
} else {
emitpcode(PC_CMPLI, 0, reg, CInt64_GetULong(&range));
}
branch_conditional(0, EGREATER, 1, defaultlabel);
if (table->toc) {
op2.optype = OpndType_Symbol;
op2.object = table->toc;
indirect(&op2, NULL);
} else {
op2.optype = OpndType_Symbol;
op2.object = table;
}
if (op2.optype != OpndType_GPR) {
Coerce_to_register(&op2, TYPE(&void_ptr), reg2 = ALLOC_GPR());
}
if (op2.optype != OpndType_GPR) {
CError_FATAL(599);
} else {
if (op2.reg != reg2)
emitpcode(PC_MR, reg2, op2.reg);
}
if (CInt64_Equal(first, cint64_zero)) {
reg = ALLOC_GPR();
emitpcode(PC_RLWINM, reg, op1.reg, 2, 0, 29);
} else {
emitpcode(PC_RLWINM, reg, reg, 2, 0, 29);
}
reg3 = reg2;
emitpcode(PC_LWZX, reg3, reg3, reg);
for (curcase = info->cases; curcase; curcase = curcase->next)
pcbranch(pclastblock, curcase->label->pclabel);
pcbranch(pclastblock, info->defaultlabel->pclabel);
emitpcode(PC_MTCTR, reg3);
branch_indirect(table);
}
void switchstatement(ENode *expr, SwitchInfo *info) {
Boolean use_table;
SwitchCase *swcase;
use_table = copts.switch_tables;
ncases = 0;
for (swcase = info->cases; swcase; swcase = swcase->next) {
if (!swcase->label->pclabel)
swcase->label->pclabel = makepclabel();
ncases++;
}
CError_ASSERT(656, ncases >= 0 && ncases <= 0x3333332U);
if (!info->defaultlabel->pclabel)
info->defaultlabel->pclabel = makepclabel();
defaultlabel = info->defaultlabel->pclabel;
build_case_ranges(expr->rtype, info->cases, info->defaultlabel);
if (TYPE_IS_8BYTES(expr->rtype)) {
generate_tree(expr);
return;
}
if (!use_table || nranges_minus1 < 8 || (nranges_minus1 * 2) < ((range.lo / 2) + 4))
generate_tree(expr);
else
generate_table(expr, info);
}
void dumpswitchtables(Object *funcobj) {
Object *table;
ObjectList *list;
SInt32 size;
UInt32 *array;
for (list = switchtables; list; list = list->next) {
table = list->object;
CError_ASSERT(694, table->otype == OT_OBJECT && table->access == ACCESSPUBLIC && table->datatype == DDATA);
size = table->u.data.u.switchtable.size;
array = (UInt32 *) table->u.data.u.switchtable.data;
while (size--) {
*array = CTool_EndianConvertWord32(((PCodeLabel *) CTool_ResolveIndexToPointer(*array))->block->codeOffset);
array++;
}
ObjGen_DeclareSwitchTable(table, funcobj);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,747 @@
#include "compiler/Alias.h"
#include "compiler/CClass.h"
#include "compiler/CError.h"
#include "compiler/CParser.h"
#include "compiler/CMachine.h"
#include "compiler/CodeGen.h"
#include "compiler/CopyPropagation.h"
#include "compiler/PCode.h"
#include "compiler/PCodeInfo.h"
#include "compiler/RegisterInfo.h"
#include "compiler/UseDefChains.h"
#include "compiler/ValueNumbering.h"
#include "compiler/BitVectors.h"
#include "compiler/CompilerTools.h"
#include "compiler/objects.h"
#include "compiler/types.h"
static Alias *aliases;
static int n_aliases;
static int n_gathered_aliases;
static Alias *alias_hash[997];
Alias *worst_case;
Object worst_case_obj;
static TypePointer worst_case_memory_type = {
TYPEARRAY,
0xFFFFFF,
TYPE(&stchar)
};
static Boolean is_safe_const(Object *obj) {
Type *type;
type = obj->type;
while (IS_TYPE_ARRAY(type))
type = TPTR_TARGET(type);
if (TYPE_FITS_IN_REGISTER(type) || IS_TYPE_VECTOR(type) || IS_TYPE_FLOAT(type) || IS_TYPE_STRUCT(type))
return is_const_object(obj);
if (IS_TYPE_CLASS(type))
return is_const_object(obj) && CClass_IsPODClass(TYPE_CLASS(type));
return 0;
}
void initialize_aliases(void) {
int i;
memclrw(&worst_case_obj, sizeof(Object));
worst_case_obj.otype = OT_OBJECT;
worst_case_obj.type = TYPE(&worst_case_memory_type);
worst_case_obj.datatype = DDATA;
worst_case_obj.name = GetHashNameNodeExport("@worst_case@");
aliases = NULL;
n_aliases = 0;
n_gathered_aliases = 0;
for (i = 0; i < 997; i++)
alias_hash[i] = NULL;
worst_case = make_alias_set();
add_alias_member(worst_case, make_alias(&worst_case_obj, 0, 0));
}
static UInt32 hash_alias(Object *object, SInt32 offset, SInt32 size) {
return (UInt32) (object->name->hashval * offset * size) % 997;
}
static Alias *create_alias(AliasType type, Object *object, SInt32 offset, SInt32 size, Boolean addToHash) {
Alias *alias;
UInt32 hash;
alias = lalloc(sizeof(Alias));
memclrw(alias, sizeof(Alias));
alias->type = type;
alias->index = n_aliases++;
alias->next = aliases;
aliases = alias;
alias->object = object;
alias->offset = offset;
alias->size = size;
if (addToHash) {
hash = hash_alias(object, offset, size);
alias->hashNext = alias_hash[hash];
alias_hash[hash] = alias;
}
return alias;
}
static Alias *lookup_alias(Object *object, SInt32 offset, SInt32 size) {
Alias *scan;
for (scan = alias_hash[hash_alias(object, offset, size)]; scan; scan = scan->hashNext) {
if (scan->object == object && scan->offset == offset && scan->size == size)
return scan;
}
return NULL;
}
Alias *make_alias(Object *object, SInt32 offset, SInt32 size) {
Alias *alias;
Alias *alias2;
if (!offset && !size)
size = object->type->size;
alias = lookup_alias(object, offset, size);
if (!alias) {
if (offset > 0 || size != object->type->size) {
alias2 = make_alias(object, 0, object->type->size);
alias = create_alias(AliasType1, object, offset, size, 1);
add_alias_member(alias2, alias);
} else {
alias = create_alias(AliasType0, object, offset, size, 1);
}
switch (object->datatype) {
case DLOCAL:
case DNONLAZYPTR:
break;
default:
if (!is_safe_const(object))
add_alias_member(worst_case, make_alias(object, 0, 0));
}
}
if (offset > object->type->size)
return NULL;
else
return alias;
}
Alias *make_alias_set(void) {
return create_alias(AliasType2, NULL, 0, 0, 0);
}
void add_alias_member(Alias *parent, Alias *child) {
AliasMember *member;
if (child->type == AliasType2) {
for (member = child->parents; member; member = member->nextParent)
add_alias_member(parent, member->child);
} else {
if (parent == worst_case && child->type == AliasType1)
child = make_alias(child->object, 0, 0);
for (member = parent->parents; member; member = member->nextParent) {
if (member->child == child)
return;
}
member = lalloc(sizeof(AliasMember));
member->parent = parent;
member->child = child;
member->nextParent = parent->parents;
parent->parents = member;
member->nextChild = child->children;
child->children = member;
}
}
Alias *make_alias_set_from_IR(void) {
CError_FATAL(333);
return NULL;
}
static Boolean aliases_overlap(Alias *a, Alias *b) {
return (
a->offset == b->offset ||
(a->offset > b->offset && a->offset < (b->offset + b->size)) ||
(b->offset > a->offset && b->offset < (a->offset + a->size))
);
}
static int is_address_load(PCode *pcode) {
Object *obj;
switch (pcode->op) {
case PC_LWZ:
if (pcode->args[2].kind == PCOp_MEMORY && pcode->args[2].data.mem.obj->datatype == DNONLAZYPTR)
return 1;
break;
case PC_LBZU:
case PC_LBZUX:
case PC_LHZU:
case PC_LHZUX:
case PC_LHAU:
case PC_LHAUX:
case PC_LWZU:
case PC_LWZUX:
case PC_STBU:
case PC_STBUX:
case PC_STHU:
case PC_STHUX:
case PC_STWU:
case PC_STWUX:
return 1;
case PC_ADDI:
case PC_ADDIS:
if (pcode->args[0].data.reg.reg < n_real_registers[RegClass_GPR]) {
if (pcode->args[2].kind == PCOp_MEMORY) {
obj = pcode->args[2].data.mem.obj;
if (obj->datatype == DLOCAL && !is_safe_const(obj))
add_alias_member(worst_case, make_alias(obj, 0, 0));
return 0;
}
} else {
return 1;
}
break;
case PC_ADD:
return 1;
}
return 0;
}
static int addresspropagatestouse(int candidateID, int useID) {
PCode *candidate_pcode; // r30
PCode *use_pcode; // r29
int reg; // r28
short reg2;
Object *object; // r27
SInt32 offset; // r26
Alias *alias; // r25
Boolean flag24; // r24
SInt32 size; // r23
Alias *aliasSet; // r22
int i;
PCode *scan;
PCodeArg *op;
candidate_pcode = Candidates[candidateID].pcode;
use_pcode = Uses[useID].pcode;
flag24 = 0;
size = 1;
reg = candidate_pcode->args[0].data.reg.reg;
if (candidate_pcode->alias && (candidate_pcode->alias->type == AliasType0 || candidate_pcode->alias->type == AliasType1)) {
object = candidate_pcode->alias->object;
offset = candidate_pcode->alias->offset;
if (offset == 0 && candidate_pcode->alias->size == object->type->size)
flag24 = 1;
} else if (candidate_pcode->args[2].kind == PCOp_MEMORY) {
object = candidate_pcode->args[2].data.mem.obj;
if (candidate_pcode->op == PC_ADDIS)
offset = candidate_pcode->args[2].data.mem.offset << 16;
else
offset = candidate_pcode->args[2].data.mem.offset;
} else {
return 0;
}
CError_ASSERT(478, object->otype == OT_OBJECT);
if ((candidate_pcode->flags & (fIsRead | fIsWrite)) && (candidate_pcode->flags & fUpdatesPtr)) {
reg = candidate_pcode->args[1].data.reg.reg;
offset = 0;
flag24 = 1;
} else if (candidate_pcode->op == PC_LWZ) {
if (object->datatype != DNONLAZYPTR)
return 0;
object = object->u.var.realObj;
CError_ASSERT(495, object->otype == OT_OBJECT);
offset = 0;
} else if (candidate_pcode->op == PC_ADDI) {
if (!candidate_pcode->alias && object)
candidate_pcode->alias = make_alias(object, offset, 1);
} else if (candidate_pcode->op == PC_ADDIS) {
if (!candidate_pcode->alias && object)
candidate_pcode->alias = make_alias(object, offset, 1);
} else if (candidate_pcode->op == PC_ADD) {
offset = 0;
flag24 = 1;
} else {
CError_FATAL(509);
}
if (
!(use_pcode->flags & (fIsRead | fIsWrite)) &&
use_pcode->op != PC_ADDI &&
use_pcode->op != PC_ADD &&
use_pcode->op != PC_ADDIS
) {
if (object->datatype == DLOCAL && !is_safe_const(object))
add_alias_member(worst_case, make_alias(object, 0, 0));
return 1;
}
if (
(use_pcode->flags & (fIsWrite | fPCodeFlag40000)) &&
use_pcode->args[0].kind == PCOp_REGISTER &&
use_pcode->args[0].arg == RegClass_GPR &&
use_pcode->args[0].data.reg.reg == reg &&
object->datatype == DLOCAL &&
!is_safe_const(object)
)
add_alias_member(worst_case, make_alias(object, 0, 0));
if (use_pcode->argCount < 3)
return 1;
CError_ASSERT(543, use_pcode->args[1].kind == PCOp_REGISTER);
if (candidate_pcode->block == use_pcode->block && precedes(candidate_pcode, use_pcode)) {
for (scan = candidate_pcode->nextPCode; scan && scan != use_pcode; scan = scan->nextPCode) {
for (op = scan->args, i = scan->argCount; i--; op++) {
if (op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
(op->data.reg.effect & EffectWrite) &&
op->data.reg.reg == reg)
return 1;
}
}
} else {
if (!bitvectorgetbit(candidateID, propinfo[use_pcode->block->blockIndex].vec8)) {
if (bitvectorgetbit(candidate_pcode->defID, usedefinfo[use_pcode->block->blockIndex].defvec8)) {
for (scan = use_pcode->block->firstPCode; scan && scan != use_pcode; scan = scan->nextPCode) {
for (op = scan->args, i = scan->argCount; i--; op++) {
if (op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
(op->data.reg.effect & EffectWrite) &&
op->data.reg.reg == reg)
return 1;
}
}
} else {
return 1;
}
}
for (scan = use_pcode->block->firstPCode; scan; scan = scan->nextPCode) {
if (scan == use_pcode)
break;
for (op = scan->args, i = scan->argCount; i--; op++) {
if (op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
(op->data.reg.effect & EffectWrite) &&
op->data.reg.reg == reg)
return 1;
}
}
}
CError_ASSERT(598, object != NULL);
if (use_pcode->op == PC_ADDI || use_pcode->op == PC_ADD || use_pcode->op == PC_ADDIS) {
if (use_pcode->args[0].data.reg.reg < n_real_registers[RegClass_GPR] && !is_safe_const(object))
add_alias_member(worst_case, make_alias(object, 0, 0));
}
if (use_pcode->flags & (fIsRead | fIsWrite))
size = nbytes_loaded_or_stored_by(use_pcode);
if (use_pcode->args[2].kind == PCOp_REGISTER) {
if (use_pcode->args[1].data.reg.reg == 0) {
if (use_pcode->args[2].data.reg.reg == reg)
alias = make_alias(object, offset, size);
} else {
if (use_pcode->args[1].data.reg.reg == reg)
reg2 = use_pcode->args[2].data.reg.reg;
else if (use_pcode->args[2].data.reg.reg == reg)
reg2 = use_pcode->args[1].data.reg.reg;
else
return 1;
for (scan = use_pcode->prevPCode; scan; scan = scan->prevPCode) {
if (scan->op == PC_LI && scan->args[0].data.reg.reg == reg2)
break;
for (i = 0; i < scan->argCount; i++) {
if (scan->args[i].kind == PCOp_REGISTER &&
scan->args[i].arg == RegClass_GPR &&
scan->args[i].data.reg.reg == reg2 &&
(scan->args[i].data.reg.effect & EffectWrite)) {
scan = NULL;
break;
}
}
if (!scan)
break;
}
if (scan) {
offset += scan->args[1].data.mem.offset;
alias = make_alias(object, offset, size);
} else {
alias = make_alias(object, 0, 0);
}
}
} else {
if (use_pcode->args[1].kind != PCOp_REGISTER ||
use_pcode->args[1].arg != RegClass_GPR ||
use_pcode->args[1].data.reg.reg != reg)
return 1;
if (use_pcode->args[1].data.reg.effect & EffectWrite) {
alias = make_alias(object, 0, 0);
} else if (use_pcode->args[2].kind == PCOp_IMMEDIATE) {
if (use_pcode->op == PC_ADDIS) {
offset += use_pcode->args[2].data.imm.value << 16;
alias = make_alias(object, offset, 1);
} else {
offset += use_pcode->args[2].data.imm.value;
alias = make_alias(object, offset, size);
}
} else {
return 1;
}
}
if (flag24)
alias = make_alias(object, 0, 0);
if (!alias)
return 1;
if (!use_pcode->alias) {
if (
use_pcode->op == PC_ADDI ||
use_pcode->op == PC_ADD ||
use_pcode->op == PC_ADDIS ||
((candidate_pcode->flags & (fIsRead | fIsWrite)) && (candidate_pcode->flags & fUpdatesPtr))
)
recursive_propagation = 1;
}
if (use_pcode->alias) {
if (use_pcode->alias == worst_case) {
add_alias_member(worst_case, make_alias(object, 0, 0));
} else if (use_pcode->alias == alias) {
return 1;
} else if (use_pcode->alias->type == AliasType0 || use_pcode->alias->type == AliasType1) {
if (object == use_pcode->alias->object) {
use_pcode->alias = make_alias(object, 0, 0);
} else {
aliasSet = make_alias_set();
if (
use_pcode->op == PC_ADDI ||
use_pcode->op == PC_ADD ||
use_pcode->op == PC_ADDIS ||
((use_pcode->flags & (fIsRead | fIsWrite)) && (use_pcode->flags & fUpdatesPtr))
) {
if (alias->type == AliasType2)
add_alias_member(worst_case, alias);
else
add_alias_member(worst_case, make_alias(use_pcode->alias->object, 0, 0));
}
add_alias_member(aliasSet, use_pcode->alias);
add_alias_member(aliasSet, alias);
use_pcode->alias = aliasSet;
}
} else {
add_alias_member(use_pcode->alias, alias);
}
} else {
use_pcode->alias = alias;
}
propagated_instructions = 1;
return 1;
}
static void finishpropagatealiases(int id) {
propagated_instructions = 1;
}
static Propagation alias_prop = {
&is_address_load,
&addresspropagatestouse,
&finishpropagatealiases,
"ALIAS",
"ALIASES",
"A%" PRId32,
1
};
static void propagatealiasinfo(Object *proc) {
propagateinstructions(proc, &alias_prop, (copts.optimizationlevel >= 4) ? 4 : 1, 1);
}
void gather_alias_info(void) {
UInt32 *myvec; // r31
Alias *alias; // r22
AliasMember *member;
AliasMember *member2;
PCodeBlock *block; // r21
PCode *pcode; // r20
PCodeArg *op; // r19
RegUseOrDef *list; // r18
int i; // r17
Alias *alias_choice; // r16
int aliases_idx; // r15 (helper in r23)
PCode *defpcode; // r14
Alias *alias_array[3];
UseOrDef *def;
int defID;
if (coloring) {
propagatealiasinfo(gFunction);
myvec = oalloc(4 * ((number_of_Defs + 31) >> 5));
for (block = pcbasicblocks; block; block = block->nextBlock) {
bitvectorcopy(myvec, usedefinfo[block->blockIndex].defvec8, number_of_Defs);
for (pcode = block->firstPCode; pcode; pcode = pcode->nextPCode) {
if (pcode->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000)) {
if (!pcode->alias) {
pcode->alias = worst_case;
} else {
if ((pcode->alias->type == AliasType0 || pcode->alias->type == AliasType1) &&
pcode->alias->size == nbytes_loaded_or_stored_by(pcode)) {
pcode->flags &= ~fIsPtrOp;
} else {
pcode->flags |= fIsPtrOp;
}
if (pcode->alias != worst_case) {
aliases_idx = 0;
alias_choice = NULL;
op = pcode->args;
for (i = 0; i < pcode->argCount; i++, op++) {
if (
(!(pcode->flags & (fIsWrite | fPCodeFlag40000)) || op != pcode->args) &&
op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
(op->data.reg.effect & EffectRead)
) {
alias_array[aliases_idx] = NULL;
if (aliases_idx >= 2) {
alias_choice = worst_case;
break;
}
alias_array[aliases_idx] = pcode->alias;
for (list = reg_Defs[RegClass_GPR][op->data.reg.reg]; list; list = list->next) {
if (bitvectorgetbit(list->id, myvec)) {
defpcode = Defs[list->id].pcode;
if (!defpcode->alias || !is_address_load(defpcode) || defpcode->alias == worst_case) {
alias_array[aliases_idx] = worst_case;
break;
}
}
}
aliases_idx++;
}
}
if (!alias_choice) {
if (aliases_idx > 0) {
alias_choice = alias_array[0];
if (aliases_idx == 2) {
if (alias_array[0] != worst_case) {
if (alias_array[1] != worst_case)
alias_choice = worst_case;
} else if (alias_array[1] != worst_case) {
alias_choice = alias_array[1];
}
}
}
if (alias_choice == worst_case) {
pcode->flags |= fIsPtrOp;
if (pcode->alias->type == AliasType2)
add_alias_member(worst_case, pcode->alias);
else
add_alias_member(worst_case, make_alias(pcode->alias->object, 0, 0));
}
if (alias_choice)
pcode->alias = alias_choice;
}
}
}
} else {
if ((pcode->flags & fIsCall) && !pcode->alias)
pcode->alias = worst_case;
}
for (def = &Defs[defID = pcode->defID]; defID < number_of_Defs && def->pcode == pcode; defID++, def++) {
if (def->v.kind == PCOp_REGISTER && def->v.arg == RegClass_GPR) {
for (list = reg_Defs[RegClass_GPR][def->v.u.reg]; list; list = list->next)
bitvectorclearbit(list->id, myvec);
}
bitvectorsetbit(defID, myvec);
}
}
}
freeoheap();
} else {
for (block = pcbasicblocks; block; block = block->nextBlock) {
for (pcode = block->firstPCode; pcode; pcode = pcode->nextPCode) {
if ((pcode->flags & (fIsRead | fIsWrite | fIsCall | fPCodeFlag20000 | fPCodeFlag40000)) && !pcode->alias)
pcode->alias = worst_case;
}
}
}
if (n_gathered_aliases != n_aliases) {
for (alias = aliases; alias; alias = alias->next) {
if (alias->type == AliasType2) {
alias->vec24 = lalloc(4 * ((n_aliases + 31) >> 5));
bitvectorinitialize(alias->vec24, n_aliases, 0);
for (member = alias->parents; member; member = member->nextParent) {
bitvectorsetbit(member->child->index, alias->vec24);
for (member2 = member->child->parents; member2; member2 = member2->nextParent)
bitvectorsetbit(member2->child->index, alias->vec24);
}
}
}
n_gathered_aliases = n_aliases;
}
}
static Boolean may_alias_alias(Alias *a, Alias *b) {
switch ((a->type * 3) + b->type) {
case (AliasType0 * 3) + AliasType0:
return a == b;
case (AliasType0 * 3) + AliasType1:
case (AliasType1 * 3) + AliasType0:
return a->object == b->object;
case (AliasType1 * 3) + AliasType1:
return (a->object == b->object) && aliases_overlap(a, b);
case (AliasType0 * 3) + AliasType2:
case (AliasType1 * 3) + AliasType2:
return bitvectorgetbit(a->index, b->vec24) != 0;
case (AliasType2 * 3) + AliasType0:
case (AliasType2 * 3) + AliasType1:
return bitvectorgetbit(b->index, a->vec24) != 0;
case (AliasType2 * 3) + AliasType2:
return (a == b) || !bitvectorintersectionisempty(a->vec24, b->vec24, n_aliases);
default:
CError_FATAL(1054);
return 1;
}
}
Boolean may_alias(PCode *a, PCode *b) {
return may_alias_alias(a->alias, b->alias);
}
Boolean uniquely_aliases(PCode *a, PCode *b) {
if (may_alias_alias(a->alias, b->alias)) {
if (
a->alias->type != AliasType2 &&
b->alias->type != AliasType2 &&
a->alias &&
b->alias &&
a->alias->size == nbytes_loaded_or_stored_by(a) &&
b->alias->size == nbytes_loaded_or_stored_by(b)
)
return 1;
}
return 0;
}
Boolean may_alias_worst_case(PCode *pcode) {
return may_alias_alias(pcode->alias, worst_case);
}
Boolean may_alias_object(PCode *pcode, Object *object) {
return may_alias_alias(pcode->alias, make_alias(object, 0, 0));
}
void initialize_alias_values(void) {
Alias *alias;
for (alias = aliases; alias; alias = alias->next) {
alias->valuenumber = nextvaluenumber++;
alias->valuepcode = NULL;
}
}
void update_alias_value(Alias *alias, PCode *pcode) {
AliasMember *member;
AliasMember *member2;
AliasMember *member3;
switch (alias->type) {
case AliasType0:
killmemory(alias, pcode);
for (member = alias->children; member; member = member->nextChild) {
CError_ASSERT(1152, member->parent->type == AliasType2);
killmemory(member->parent, NULL);
}
for (member = alias->parents; member; member = member->nextParent) {
CError_ASSERT(1157, member->child->type == AliasType1);
killmemory(member->child, NULL);
for (member2 = member->child->children; member2; member2 = member2->nextChild) {
if (member2->parent != alias) {
CError_ASSERT(1163, member2->parent->type == AliasType2);
killmemory(member2->parent, NULL);
}
}
}
break;
case AliasType1:
killmemory(alias, pcode);
for (member = alias->children; member; member = member->nextChild) {
killmemory(member->parent, NULL);
if (member->parent->type == AliasType0) {
for (member2 = member->parent->parents; member2; member2 = member2->nextParent) {
if (member2->child != alias && aliases_overlap(alias, member2->child)) {
killmemory(member2->child, NULL);
}
}
}
}
break;
case AliasType2:
killmemory(alias, NULL);
for (member = alias->parents; member; member = member->nextParent) {
killmemory(member->child, NULL);
for (member2 = member->child->children; member2; member2 = member2->nextChild) {
if (member2->parent != alias)
killmemory(member2->parent, NULL);
}
for (member3 = member->child->parents; member3; member3 = member3->nextParent) {
killmemory(member3->child, NULL);
for (member2 = member3->child->children; member2; member2 = member2->nextChild) {
if (member2->parent != member->child)
killmemory(member2->parent, NULL);
}
}
}
break;
}
}
void update_all_alias_values(void) {
Alias *alias;
for (alias = aliases; alias; alias = alias->next)
killmemory(alias, NULL);
}

View File

@@ -0,0 +1,906 @@
#include "compiler/CodeMotion.h"
#include "compiler/Alias.h"
#include "compiler/BitVectors.h"
#include "compiler/LoopDetection.h"
#include "compiler/LoopOptimization.h"
#include "compiler/CompilerTools.h"
#include "compiler/PCode.h"
#include "compiler/UseDefChains.h"
#include "compiler/RegisterInfo.h"
int movedloopinvariantcode;
int unswitchedinvariantcode;
static int isloopinvariant(PCode *pcode, Loop *loop, UInt32 *vec, int flag1, int flag2) {
PCodeArg *op;
RegUseOrDef *list;
int i;
if (pcode->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000)) {
if (pcode->alias) {
if (pcode->alias->type == AliasType2 || (pcode->flags & (fIsVolatile | fSideEffects)))
return 0;
if (pcode->flags & fIsRead) {
for (list = findobjectusedef(pcode->alias->object)->defs; list; list = list->next) {
if (
may_alias(pcode, Defs[list->id].pcode) &&
bitvectorgetbit(list->id, vec) &&
bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)
)
return 0;
}
}
if (pcode->flags & fIsWrite) {
for (list = findobjectusedef(pcode->alias->object)->uses; list; list = list->next) {
if (
may_alias(pcode, Uses[list->id].pcode) &&
bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)
)
return 0;
}
}
} else {
return 0;
}
}
if ((pcode->flags & fIsWrite) && !bitvectorgetbit(pcode->block->blockIndex, loop->vec2C))
return 0;
op = pcode->args;
i = pcode->argCount;
while (i--) {
switch (op->kind) {
case PCOp_MEMORY:
if ((pcode->flags & fIsRead) && ((pcode->flags == 0) & 0x40)) {
for (list = findobjectusedef(op->data.mem.obj)->defs; list; list = list->next) {
if (
may_alias(pcode, Defs[list->id].pcode) &&
bitvectorgetbit(list->id, vec) &&
bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)
)
return 0;
}
}
if (pcode->flags & fIsWrite) {
for (list = findobjectusedef(op->data.mem.obj)->uses; list; list = list->next) {
if (
may_alias(pcode, Uses[list->id].pcode) &&
bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)
)
return 0;
}
}
break;
case PCOp_REGISTER:
if (op->data.reg.effect & (EffectRead | EffectWrite)) {
if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR) {
if (op->data.reg.reg == _FP_)
break;
if (op->data.reg.reg == _CALLER_SP_)
break;
if (op->data.reg.reg == 2)
break;
}
if (op->data.reg.reg < n_real_registers[op->arg]) {
if (op->arg == RegClass_CRFIELD) {
if (!flag2 || (op->data.reg.effect & EffectRead))
return 0;
} else if (op->arg == RegClass_SPR) {
if (!flag1)
return 0;
} else {
return 0;
}
} else if (op->data.reg.effect & EffectRead) {
if (flag1 && op->kind == PCOp_REGISTER && op->arg == RegClass_SPR)
break;
if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR) {
if (op->data.reg.reg == _FP_)
break;
if (op->data.reg.reg == _CALLER_SP_)
break;
if (op->data.reg.reg == 2)
break;
}
for (list = reg_Defs[op->arg][op->data.reg.reg]; list; list = list->next) {
if (
bitvectorgetbit(list->id, vec) &&
bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)
)
return 0;
}
}
}
break;
}
op++;
}
return 1;
}
static int isuniquedefinition(PCode *pcode, Loop *loop) {
RegUseOrDef *list;
int defID;
UseOrDef *def;
defID = pcode->defID;
def = &Defs[defID];
if (defID >= number_of_Defs)
return 0;
if (def->pcode != pcode)
return 0;
if ((defID + 1) < number_of_Defs && def[1].pcode == pcode)
return 0;
if (def->v.kind == PCOp_REGISTER) {
for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next) {
if (
bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) &&
list->id != defID
)
return 0;
}
} else if (def->v.kind == PCOp_MEMORY) {
for (list = findobjectusedef(def->v.u.object)->defs; list; list = list->next) {
if (
may_alias(pcode, Defs[list->id].pcode) &&
bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) &&
list->id != defID
)
return 0;
}
} else {
CError_FATAL(292);
}
return 1;
}
static int uniquelyreachesuse(int defID, int useID) {
UseOrDef *def;
UseOrDef *use;
RegUseOrDef *list;
PCode *pcode;
def = &Defs[defID];
use = &Uses[useID];
if (def->v.kind == PCOp_REGISTER) {
for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next) {
if (
list->id != defID &&
bitvectorgetbit(list->id, usedefinfo[use->pcode->block->blockIndex].defvec8)
)
break;
}
} else if (def->v.kind == PCOp_MEMORY) {
for (list = findobjectusedef(def->v.u.object)->defs; list; list = list->next) {
if (
may_alias(def->pcode, Defs[list->id].pcode) &&
list->id != defID &&
bitvectorgetbit(list->id, usedefinfo[use->pcode->block->blockIndex].defvec8)
)
break;
}
}
if (!list)
return 1;
if (def->pcode->block == use->pcode->block) {
for (pcode = use->pcode->prevPCode; pcode; pcode = pcode->prevPCode) {
if (pcode == def->pcode)
return 1;
}
}
return 0;
}
static int uniquelyreachesalluses(int defID, Loop *loop) {
UseOrDef *def;
RegUseOrDef *list;
def = &Defs[defID];
if (def->v.kind == PCOp_REGISTER) {
for (list = reg_Uses[def->v.arg][def->v.u.reg]; list; list = list->next) {
if (
bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].usevec1C) ||
(bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks) && !uniquelyreachesuse(defID, list->id))
)
return 0;
}
} else if (def->v.kind == PCOp_MEMORY) {
for (list = findobjectusedef(def->v.u.object)->uses; list; list = list->next) {
if (may_alias(def->pcode, Uses[list->id].pcode)) {
if (
bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].usevec1C) ||
(bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks) && !uniquelyreachesuse(defID, list->id))
)
return 0;
}
}
} else {
CError_FATAL(382);
}
return 1;
}
static int isliveonexit(TinyValue *v, Loop *loop) {
RegUseOrDef *list;
UInt32 *vec;
vec = usedefinfo[loop->preheader->blockIndex].usevec1C;
if (v->kind == PCOp_REGISTER) {
for (list = reg_Uses[v->arg][v->u.reg]; list; list = list->next) {
if (
bitvectorgetbit(list->id, vec) &&
!bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)
)
return 1;
}
} else if (v->kind == PCOp_MEMORY) {
for (list = findobjectusedef(v->u.object)->uses; list; list = list->next) {
if (
bitvectorgetbit(list->id, vec) &&
!bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)
)
return 1;
}
}
return 0;
}
static int dominatesallexits(PCode *pcode, Loop *loop) {
return bitvectorgetbit(pcode->block->blockIndex, loop->vec28) != 0;
}
static int maymove(PCode *pcode, Loop *loop) {
short reg;
if (!isuniquedefinition(pcode, loop))
return 0;
if (!uniquelyreachesalluses(pcode->defID, loop))
return 0;
if (!dominatesallexits(pcode, loop) && isliveonexit(&Defs[pcode->defID].v, loop))
return 0;
if (loop->bodySize > 25) {
switch (pcode->op) {
case PC_LI:
if (
pcode->nextPCode &&
pcode->nextPCode->op == PC_LVX &&
(pcode->nextPCode->flags & fIsConst)
) {
reg = pcode->args[0].data.reg.reg;
if (pcode->nextPCode->args[1].data.reg.reg == reg ||
pcode->nextPCode->args[2].data.reg.reg == reg)
return 1;
}
case PC_VSPLTISB:
case PC_VSPLTISH:
case PC_VSPLTISW:
return 0;
default:
if (!bitvectorgetbit(pcode->block->blockIndex, loop->vec2C))
return 0;
}
}
return 1;
}
static void moveinvariantcomputation(PCode *pcode, Loop *loop) {
ObjectUseDef *oud;
BlockList *blocklist;
RegUseOrDef *list;
UseOrDef *def;
int defID;
defID = pcode->defID;
def = &Defs[defID];
deletepcode(pcode);
insertpcodebefore(loop->preheader->lastPCode, pcode);
loop->bodySize--;
movedloopinvariantcode = 1;
if (def->v.kind == PCOp_REGISTER) {
for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next)
bitvectorclearbit(list->id, usedefinfo[blocklist->block->blockIndex].defvec8);
bitvectorsetbit(defID, usedefinfo[blocklist->block->blockIndex].defvec8);
}
} else if (def->v.kind == PCOp_MEMORY) {
oud = findobjectusedef(def->v.u.object);
for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
for (list = oud->defs; list; list = list->next) {
if (uniquely_aliases(pcode, Defs[list->id].pcode))
bitvectorclearbit(list->id, usedefinfo[blocklist->block->blockIndex].defvec8);
}
bitvectorsetbit(defID, usedefinfo[blocklist->block->blockIndex].defvec8);
}
} else {
CError_FATAL(545);
}
}
static int srawi_addze_maymove(PCode *pcode, Loop *loop) {
RegUseOrDef *list;
UseOrDef *def;
int defID;
int nextDefID;
defID = pcode->defID;
nextDefID = pcode->nextPCode->defID;
def = &Defs[defID];
if (defID >= number_of_Defs)
return 0;
if (def->pcode != pcode)
return 0;
if ((defID + 1) < number_of_Defs && def[1].pcode == pcode)
return 0;
if (def->v.kind == PCOp_REGISTER && def->v.arg == RegClass_GPR) {
for (list = reg_Defs[RegClass_GPR][def->v.u.reg]; list; list = list->next) {
if (
bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) &&
list->id != defID &&
list->id != nextDefID
)
return 0;
}
} else {
CError_FATAL(582);
}
if (!uniquelyreachesalluses(pcode->defID, loop))
return 0;
if (!dominatesallexits(pcode, loop) && isliveonexit(&Defs[pcode->defID].v, loop))
return 0;
if (!dominatesallexits(pcode->nextPCode, loop) && isliveonexit(&Defs[pcode->nextPCode->defID].v, loop))
return 0;
return 1;
}
static int srawi_addze_isloopinvariant(PCode *pcode, Loop *loop, UInt32 *vec) {
static PCode *oldNextInstr;
PCode *nextInstr;
nextInstr = pcode->nextPCode;
if (
pcode->op == PC_ADDZE &&
oldNextInstr == pcode
) {
oldNextInstr = NULL;
return 1;
} else if (
pcode->op == PC_SRAWI &&
nextInstr &&
nextInstr->op == PC_ADDZE &&
pcode->args[0].data.reg.reg == nextInstr->args[0].data.reg.reg &&
nextInstr->args[0].data.reg.reg == nextInstr->args[1].data.reg.reg &&
!(pcode->flags & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects)) &&
!(nextInstr->flags & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects)) &&
isloopinvariant(pcode, loop, vec, 1, 0) &&
srawi_addze_maymove(pcode, loop)
) {
oldNextInstr = nextInstr;
return 1;
} else {
oldNextInstr = NULL;
return 0;
}
}
static void removeblockfromloop(Loop *loop, PCodeBlock *block) {
BlockList *list;
BlockList **ptr;
bitvectorclearbit(block->blockIndex, loop->memberblocks);
bitvectorclearbit(block->blockIndex, loop->vec24);
bitvectorclearbit(block->blockIndex, loop->vec28);
bitvectorclearbit(block->blockIndex, loop->vec2C);
loop->bodySize -= block->pcodeCount;
ptr = &loop->blocks;
while ((list = *ptr)) {
if (list->block == block)
*ptr = list->next;
else
ptr = &list->next;
}
}
static void changesuccessor(PCodeBlock *block, PCodeBlock *from, PCodeBlock *to) {
PCLink **ptr;
PCLink *link;
for (link = block->successors; link; link = link->nextLink) {
if (link->block == from)
link->block = to;
}
ptr = &from->predecessors;
while ((link = *ptr)) {
if (link->block == block) {
*ptr = link->nextLink;
link->nextLink = to->predecessors;
to->predecessors = link;
} else {
ptr = &link->nextLink;
}
}
}
static void movesuccessor(PCodeBlock *to, PCodeBlock *from, PCodeBlock *block) {
PCLink **ptr;
PCLink *link;
for (link = block->predecessors; link; link = link->nextLink) {
if (link->block == from)
link->block = to;
}
ptr = &from->successors;
while ((link = *ptr)) {
if (link->block == block) {
*ptr = link->nextLink;
link->nextLink = to->successors;
to->successors = link;
} else {
ptr = &link->nextLink;
}
}
}
static void movecmptopreheader(Loop *loop, PCodeBlock *block, PCode *pc1, PCode *pc2, PCodeArg *op) {
PCodeBlock *preheader;
PCode *pc3;
preheader = loop->preheader;
if (PCODE_FLAG_SET_F(pc1) & fRecordBit) {
moveinvariantcomputation(pc1, loop);
} else {
deletepcode(pc1);
insertpcodebefore(loop->preheader->lastPCode, pc1);
loop->bodySize--;
movedloopinvariantcode = 1;
}
loop->preheader = NULL;
insertpreheaderblock(loop);
pc3 = preheader->lastPCode;
CError_ASSERT(775, pc3->op == PC_B);
deletepcode(pc3);
deletepcode(pc2);
appendpcode(preheader, pc2);
movesuccessor(preheader, block, op->data.label.label->block);
}
static PCodeBlock *appendheadercopy(Loop *loop, PCodeBlock *block1, PCodeBlock *block2, PCodeBlock *block3) {
PCodeBlock *newblock1;
PCodeBlock *newblock2;
PCLink *link;
PCode *scan;
newblock1 = lalloc(sizeof(PCodeBlock));
newblock2 = lalloc(sizeof(PCodeBlock));
newblock1->labels = NULL;
newblock1->predecessors = newblock1->successors = NULL;
newblock1->firstPCode = newblock1->lastPCode = NULL;
newblock1->pcodeCount = 0;
newblock1->loopWeight = loop->body->loopWeight;
newblock1->flags = 0;
newblock1->blockIndex = pcblockcount++;
newblock2->labels = NULL;
newblock2->predecessors = newblock2->successors = NULL;
newblock2->firstPCode = newblock2->lastPCode = NULL;
newblock2->pcodeCount = 0;
newblock2->loopWeight = loop->body->loopWeight;
newblock2->flags = 0;
newblock2->blockIndex = pcblockcount++;
newblock1->nextBlock = newblock2;
newblock2->prevBlock = newblock1;
newblock1->prevBlock = block1;
newblock2->nextBlock = block1->nextBlock;
block1->nextBlock = newblock1;
newblock2->nextBlock->prevBlock = newblock2;
pclabel(newblock1, makepclabel());
pclabel(newblock2, makepclabel());
changesuccessor(block1, block1->successors->block, newblock1);
link = lalloc(sizeof(PCLink));
link->block = newblock2;
link->nextLink = newblock1->successors;
newblock1->successors = link;
link = lalloc(sizeof(PCLink));
link->block = newblock1;
link->nextLink = newblock2->predecessors;
newblock2->predecessors = link;
appendpcode(newblock2, makepcode(PC_B, block2->nextBlock->labels));
pcbranch(newblock2, block2->nextBlock->labels);
pccomputepredecessors1(newblock2);
for (scan = block2->firstPCode; scan; scan = scan->nextPCode)
appendpcode(newblock1, copypcode(scan));
pcbranch(newblock1, block3->labels);
link = lalloc(sizeof(PCLink));
link->block = newblock1;
link->nextLink = block3->predecessors;
block3->predecessors = link;
addblocktoloop(loop, newblock1);
if (bitvectorgetbit(block2->blockIndex, loop->vec28))
bitvectorsetbit(newblock1->blockIndex, loop->vec28);
if (bitvectorgetbit(block2->blockIndex, loop->vec2C))
bitvectorsetbit(newblock1->blockIndex, loop->vec2C);
for (loop = loop->parent; loop; loop = loop->parent) {
addblocktoloop(loop, newblock1);
if (bitvectorgetbit(block2->blockIndex, loop->vec28))
bitvectorsetbit(newblock1->blockIndex, loop->vec28);
if (bitvectorgetbit(block2->blockIndex, loop->vec2C))
bitvectorsetbit(newblock1->blockIndex, loop->vec2C);
addblocktoloop(loop, newblock2);
if (bitvectorgetbit(block2->blockIndex, loop->vec28))
bitvectorsetbit(newblock2->blockIndex, loop->vec28);
if (bitvectorgetbit(block2->blockIndex, loop->vec2C))
bitvectorsetbit(newblock2->blockIndex, loop->vec2C);
}
return newblock1;
}
static BlockList *findswitchpath(Loop *loop, PCodeBlock *block) {
BlockList *head;
BlockList *tail;
BlockList *node;
PCodeBlock *scan;
head = NULL;
tail = NULL;
for (scan = block; scan && scan != loop->body; scan = scan->successors->block) {
if (!bitvectorgetbit(scan->blockIndex, loop->memberblocks))
return NULL;
if (scan->successors && scan->successors->nextLink)
return NULL;
node = oalloc(sizeof(BlockList));
node->block = scan;
node->next = NULL;
if (head) {
tail->next = node;
tail = node;
} else {
head = node;
tail = node;
}
}
return head;
}
static void simpleunswitchloop(Loop *loop) {
PCode *pc29;
PCodeArg *op27;
UInt32 *myvec;
PCodeBlock *block26;
PCode *pc25; // r25
BlockList *path2_24;
PCodeArg *op23;
PCode *pc23; // r23
BlockList *scanlist; // r23
BlockList *bestpath1; // r23
BlockList *bestpath2; // r22
PCodeBlock *headercopy; // r22
Loop *newloop; // r21
PCodeBlock *preheader21;
BlockList *path20;
PCode *scan20;
PCode *lastpcode;
int i;
BlockList *pathiter1;
BlockList *pathiter2;
if (!(lastpcode = loop->body->lastPCode))
return;
if (lastpcode->op != PC_BT && lastpcode->op != PC_BF)
return;
if (lastpcode->args[2].kind != PCOp_LABEL)
return;
if (!bitvectorgetbit(lastpcode->args[2].data.label.label->block->blockIndex, loop->memberblocks))
return;
if (loop->x57)
return;
if (loop->x4D)
return;
if (bitvectorgetbit(loop->body->nextBlock->blockIndex, loop->memberblocks))
return;
for (block26 = pcbasicblocks; block26; block26 = block26->nextBlock) {
if (bitvectorgetbit(block26->blockIndex, loop->memberblocks))
break;
}
if (!block26)
return;
myvec = oalloc(4 * ((number_of_Defs + 31) >> 5));
bitvectorcopy(myvec, usedefinfo[block26->blockIndex].defvec8, number_of_Defs);
for (pc25 = loop->preheader->nextBlock->firstPCode; pc25; pc25 = pc25->nextPCode) {
if (!(PCODE_FLAG_SET_F(pc25) & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects | fRecordBit))) {
if (isloopinvariant(pc25, loop, myvec, 0, 1))
break;
}
}
if (!pc25 || pc25->argCount < 1)
return;
if (
pc25->argCount < 1 ||
pc25->args[0].kind != PCOp_REGISTER ||
pc25->args[0].arg != RegClass_CRFIELD
)
return;
pc29 = pc25->block->lastPCode;
if (
!pc29 ||
!(pc29->flags & fIsBranch) ||
pc29->args[0].kind != PCOp_REGISTER ||
pc29->args[0].arg != RegClass_CRFIELD
)
return;
if (pc29->args[0].data.reg.reg != pc25->args[0].data.reg.reg)
return;
op27 = NULL;
for (i = 0; i < pc29->argCount; i++) {
if (pc29->args[i].kind == PCOp_LABEL)
op27 = &pc29->args[i];
}
if (op27) {
preheader21 = loop->preheader;
path20 = findswitchpath(loop, block26->nextBlock);
if (!path20)
return;
path2_24 = findswitchpath(loop, op27->data.label.label->block);
if (!path2_24)
return;
bestpath1 = NULL;
bestpath2 = NULL;
for (pathiter1 = path20; pathiter1; pathiter1 = pathiter1->next) {
for (pathiter2 = path2_24; pathiter2; pathiter2 = pathiter2->next) {
if (pathiter1->block == pathiter2->block) {
bestpath1 = pathiter1;
break;
}
}
if (bestpath1)
break;
bestpath2 = pathiter1;
}
CError_ASSERT(1192, bestpath2->block);
if (bestpath2->block->lastPCode && bestpath2->block->lastPCode->op == PC_B)
deletepcode(bestpath2->block->lastPCode);
while (bestpath1) {
for (scan20 = bestpath1->block->firstPCode; scan20; scan20 = scan20->nextPCode) {
if (scan20->op != PC_B)
appendpcode(bestpath2->block, copypcode(scan20));
}
bestpath1 = bestpath1->next;
}
headercopy = appendheadercopy(loop, bestpath2->block, loop->body, block26);
movecmptopreheader(loop, block26, pc25, pc29, op27);
if (block26->pcodeCount) {
if (path2_24->block->firstPCode) {
pc23 = path2_24->block->firstPCode;
for (scan20 = block26->firstPCode; scan20; scan20 = scan20->nextPCode) {
if (scan20->op != PC_B)
insertpcodebefore(pc23, copypcode(scan20));
}
} else {
for (scan20 = block26->firstPCode; scan20; scan20 = scan20->nextPCode) {
if (scan20->op != PC_B)
appendpcode(path2_24->block, copypcode(scan20));
}
}
}
op23 = NULL;
for (i = 0; i < loop->body->lastPCode->argCount; i++) {
if (loop->body->lastPCode->args[i].kind == PCOp_LABEL)
op23 = &loop->body->lastPCode->args[i];
}
CError_ASSERT(1250, op23 != NULL);
changesuccessor(loop->body, op23->data.label.label->block, path2_24->block);
op23->data.label.label = path2_24->block->labels;
op23 = NULL;
for (i = 0; i < preheader21->lastPCode->argCount; i++) {
if (preheader21->lastPCode->args[i].kind == PCOp_LABEL)
op23 = &preheader21->lastPCode->args[i];
}
CError_ASSERT(1267, op23 != NULL);
changesuccessor(preheader21, op23->data.label.label->block, loop->body);
op23->data.label.label = loop->body->labels;
op23 = NULL;
for (i = 0; i < loop->preheader->lastPCode->argCount; i++) {
if (loop->preheader->lastPCode->args[i].kind == PCOp_LABEL)
op23 = &loop->preheader->lastPCode->args[i];
}
CError_ASSERT(1284, op23 != NULL);
changesuccessor(loop->preheader, op23->data.label.label->block, headercopy);
op23->data.label.label = headercopy->labels;
newloop = lalloc(sizeof(Loop));
newloop->parent = loop->parent;
newloop->children = NULL;
newloop->nextSibling = loop->nextSibling;
loop->nextSibling = newloop;
newloop->body = loop->body;
newloop->preheader = NULL;
newloop->blocks = NULL;
newloop->basicInductionVars = NULL;
newloop->footer = NULL;
newloop->pc18 = NULL;
newloop->loopWeight = loop->loopWeight;
bitvectorinitialize(newloop->memberblocks = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
bitvectorinitialize(newloop->vec24 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
bitvectorinitialize(newloop->vec28 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
bitvectorinitialize(newloop->vec2C = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
removeblockfromloop(loop, newloop->body);
addblocktoloop(newloop, newloop->body);
bitvectorsetbit(newloop->body->blockIndex, newloop->vec24);
bitvectorsetbit(newloop->body->blockIndex, newloop->vec2C);
bitvectorsetbit(newloop->body->blockIndex, newloop->vec28);
for (scanlist = path2_24; scanlist; scanlist = scanlist->next) {
removeblockfromloop(loop, scanlist->block);
addblocktoloop(newloop, scanlist->block);
bitvectorsetbit(scanlist->block->blockIndex, newloop->vec2C);
}
newloop->preheader = NULL;
insertpreheaderblock(newloop);
analyzeloop(newloop);
loop->body = headercopy;
for (scanlist = loop->blocks; scanlist; scanlist = scanlist->next)
bitvectorsetbit(scanlist->block->blockIndex, loop->vec2C);
bitvectorsetbit(headercopy->blockIndex, loop->vec24);
analyzeloop(loop);
unswitchedinvariantcode = 1;
}
}
static void simpleunswitchloops(Loop *loop) {
while (loop) {
if (loop->children)
simpleunswitchloops(loop->children);
else if (!loop->x4F)
simpleunswitchloop(loop);
loop = loop->nextSibling;
}
}
static void moveinvariantsfromloop(Loop *loop) {
RegUseOrDef *list;
BlockList *blocklist;
PCode *instr;
PCode *nextInstr;
UInt32 *myvec;
UseOrDef *def;
int defID;
int flag;
PCodeBlock *block;
myvec = oalloc(4 * ((number_of_Defs + 31) >> 5));
do {
flag = 0;
for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
block = blocklist->block;
bitvectorcopy(myvec, usedefinfo[block->blockIndex].defvec8, number_of_Defs);
for (instr = block->firstPCode; instr; instr = nextInstr) {
nextInstr = instr->nextPCode;
if (!(instr->flags & fIsBranch) && instr->argCount) {
if (
!(instr->flags & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects)) &&
isloopinvariant(instr, loop, myvec, 0, 0) &&
maymove(instr, loop)
) {
moveinvariantcomputation(instr, loop);
flag = 1;
} else if (srawi_addze_isloopinvariant(instr, loop, myvec)) {
moveinvariantcomputation(instr, loop);
flag = 1;
}
for (def = &Defs[defID = instr->defID]; defID < number_of_Defs && def->pcode == instr; def++, defID++) {
if (def->v.kind == PCOp_REGISTER) {
for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next)
bitvectorclearbit(list->id, myvec);
} else if (def->v.kind == PCOp_MEMORY) {
if (def->v.arg == PCOpMemory0) {
for (list = findobjectusedef(def->v.u.object)->defs; list; list = list->next) {
if (uniquely_aliases(instr, Defs[list->id].pcode))
bitvectorclearbit(list->id, myvec);
}
}
} else {
CError_FATAL(1434);
}
bitvectorsetbit(defID, myvec);
}
}
}
}
} while (flag);
}
static void moveinvariantsfromloops(Loop *loop) {
while (loop) {
if (loop->children)
moveinvariantsfromloops(loop->children);
moveinvariantsfromloop(loop);
loop = loop->nextSibling;
}
}
void moveloopinvariantcode(void) {
unswitchedinvariantcode = 0;
movedloopinvariantcode = 0;
if (loopsinflowgraph) {
moveinvariantsfromloops(loopsinflowgraph);
simpleunswitchloops(loopsinflowgraph);
}
freeoheap();
}

View File

@@ -0,0 +1,643 @@
#include "compiler/ConstantPropagation.h"
#include "compiler/Alias.h"
#include "compiler/BitVectors.h"
#include "compiler/CompilerTools.h"
#include "compiler/PCode.h"
#include "compiler/PCodeInfo.h"
#include "compiler/RegisterInfo.h"
#include "compiler/StackFrame.h"
#include "compiler/UseDefChains.h"
#include "compiler/objects.h"
int propagatedconstants;
static int changed;
static PCode **defininginstruction;
static PCode **vrdefininginstruction;
static void computedefininginstructions(PCodeBlock *block) {
RegUseOrDef *list;
PCode *instr;
int i;
for (i = 0; i < used_virtual_registers[RegClass_GPR]; i++) {
instr = NULL;
for (list = reg_Defs[RegClass_GPR][i]; list; list = list->next) {
if (bitvectorgetbit(list->id, usedefinfo[block->blockIndex].defvec8)) {
if (instr == NULL) {
instr = Defs[list->id].pcode;
} else {
instr = NULL;
break;
}
}
}
defininginstruction[i] = instr;
}
for (i = 0; i < used_virtual_registers[RegClass_VR]; i++) {
instr = NULL;
for (list = reg_Defs[RegClass_VR][i]; list; list = list->next) {
if (bitvectorgetbit(list->id, usedefinfo[block->blockIndex].defvec8)) {
if (instr == NULL) {
instr = Defs[list->id].pcode;
} else {
instr = NULL;
break;
}
}
}
vrdefininginstruction[i] = instr;
}
}
static PCode *isstackoperand(PCodeArg *op, SInt16 *resultValue, SInt16 addend) {
PCode *instr;
if ((instr = defininginstruction[op->data.reg.reg]) && instr->op == PC_ADDI) {
if (
instr->args[2].kind == PCOp_MEMORY &&
(instr->args[1].data.reg.reg == _FP_ || instr->args[1].data.reg.reg == _CALLER_SP_) &&
instr->args[2].data.mem.obj->datatype == DLOCAL
)
{
if (can_add_displ_to_local(instr->args[2].data.mem.obj, addend)) {
*resultValue = instr->args[2].data.mem.offset;
return instr;
} else {
return NULL;
}
} else {
return NULL;
}
} else {
return NULL;
}
}
static int isconstantoperand(PCodeArg *op, SInt16 *resultValue) {
PCode *instr;
if (
(instr = defininginstruction[op->data.reg.reg]) &&
instr->op == PC_LI &&
instr->args[1].kind == PCOp_IMMEDIATE
)
{
*resultValue = instr->args[1].data.imm.value;
return 1;
} else {
return 0;
}
}
static int isuint16constantoperand(PCodeArg *op, SInt16 *resultValue) {
PCode *instr;
if (
(instr = defininginstruction[op->data.reg.reg]) &&
instr->op == PC_LI &&
instr->args[1].kind == PCOp_IMMEDIATE &&
FITS_IN_USHORT(instr->args[1].data.imm.value)
)
{
*resultValue = instr->args[1].data.imm.value;
return 1;
} else {
return 0;
}
}
static int isvectorconstantoperand(PCodeArg *op, SInt16 *resultValue, Opcode *resultNewOp) {
PCode *instr;
if (
(instr = vrdefininginstruction[op->data.reg.reg]) &&
(instr->op == PC_VSPLTISB || instr->op == PC_VSPLTISH || instr->op == PC_VSPLTISW) &&
instr->args[1].kind == PCOp_IMMEDIATE
)
{
*resultValue = instr->args[1].data.imm.value;
*resultNewOp = instr->op;
return 1;
} else {
return 0;
}
}
static int isunsignedloadoperand(PCodeArg *op) {
PCode *instr;
if ((instr = defininginstruction[op->data.reg.reg])) {
if (instr->flags & fIsRead) {
if (instr->op >= PC_LHZ && instr->op <= PC_LHZUX)
return 2;
if (instr->op >= PC_LBZ && instr->op <= PC_LBZUX)
return 1;
} else if (instr->op == PC_RLWINM) {
int var3 = instr->args[3].data.imm.value;
int var4 = instr->args[4].data.imm.value;
if (var4 == 31) {
if (var3 == 24)
return 1;
if (var3 == 16)
return 2;
}
}
}
return 0;
}
static int ismaskedoperand(PCodeArg *op, UInt32 *resultMask) {
PCode *instr;
UInt32 mask;
if ((instr = defininginstruction[op->data.reg.reg]) && instr->op == PC_RLWINM) {
if (instr->args[3].data.imm.value <= instr->args[4].data.imm.value) {
mask =
((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) &
~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1)));
} else {
mask =
((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) |
~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1)));
}
*resultMask = mask;
return 1;
}
return 0;
}
static int issignedloadoperand(PCodeArg *op) {
PCode *instr;
if ((instr = defininginstruction[op->data.reg.reg])) {
if (instr->flags & fIsRead) {
if (instr->op >= PC_LHA && instr->op <= PC_LHAUX)
return 2;
} else if (instr->op == PC_EXTSB) {
return 1;
} else if (instr->op == PC_EXTSH) {
return 2;
}
}
return 0;
}
static void propagateconstantstoblock(PCodeBlock *block) {
PCode *instr;
SInt16 immAddend;
SInt16 value1;
SInt16 valueU16;
Opcode newOpcode;
SInt16 value2;
UInt32 mask;
UInt32 mask2;
int loadSize;
PCodeArg *op;
int i;
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
switch (instr->op) {
case PC_MR:
if (isconstantoperand(&instr->args[1], &value1)) {
change_opcode(instr, PC_LI);
instr->args[1].kind = PCOp_IMMEDIATE;
instr->args[1].data.imm.value = value1;
instr->args[1].data.imm.obj = NULL;
propagatedconstants = 1;
changed = 1;
}
break;
case PC_VMR:
if (isvectorconstantoperand(&instr->args[1], &value1, &newOpcode)) {
change_opcode(instr, newOpcode);
instr->args[1].kind = PCOp_IMMEDIATE;
instr->args[1].data.imm.value = value1;
instr->args[1].data.imm.obj = NULL;
propagatedconstants = 1;
changed = 1;
}
break;
case PC_RLWINM:
if (
!(PCODE_FLAG_SET_F(instr) & fRecordBit) &&
instr->args[2].data.imm.value == 0 &&
instr->args[4].data.imm.value == 31
)
{
if (isconstantoperand(&instr->args[1], &value1)) {
if (
(instr->args[3].data.imm.value == 16 && value1 == (value1 & 0x7FFF)) ||
(instr->args[3].data.imm.value == 24 && value1 == (value1 & 0xFF))
)
{
change_opcode(instr, PC_LI);
instr->args[1].kind = PCOp_IMMEDIATE;
instr->args[1].data.imm.value = value1;
instr->args[1].data.imm.obj = NULL;
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
break;
}
}
loadSize = isunsignedloadoperand(&instr->args[1]);
if (
(loadSize == 2 && instr->args[3].data.imm.value <= 16) ||
(loadSize == 1 && instr->args[3].data.imm.value <= 24)
)
{
change_opcode(instr, PC_MR);
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
break;
}
if (ismaskedoperand(&instr->args[1], &mask)) {
if (instr->args[3].data.imm.value <= instr->args[4].data.imm.value) {
mask2 =
((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) &
~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1)));
} else {
mask2 =
((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) |
~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1)));
}
if (mask == (mask & mask2)) {
change_opcode(instr, PC_MR);
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
}
}
}
break;
case PC_EXTSH:
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
break;
if (isconstantoperand(&instr->args[1], &value1)) {
change_opcode(instr, PC_LI);
instr->args[1].kind = PCOp_IMMEDIATE;
instr->args[1].data.imm.value = value1;
instr->args[1].data.imm.obj = NULL;
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
break;
}
loadSize = issignedloadoperand(&instr->args[1]);
if (loadSize == 1 || loadSize == 2) {
change_opcode(instr, PC_MR);
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
}
break;
case PC_EXTSB:
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
break;
if (
isconstantoperand(&instr->args[1], &value1) &&
value1 >= -128 &&
value1 <= 127
)
{
change_opcode(instr, PC_LI);
instr->args[1].kind = PCOp_IMMEDIATE;
instr->args[1].data.imm.value = value1;
instr->args[1].data.imm.obj = NULL;
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
break;
}
loadSize = issignedloadoperand(&instr->args[1]);
if (loadSize == 1) {
change_opcode(instr, PC_MR);
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
}
break;
case PC_ADDI:
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
break;
immAddend = instr->args[2].data.imm.value;
if (
isconstantoperand(&instr->args[1], &value1) &&
FITS_IN_SHORT(immAddend + value1)
)
{
change_opcode(instr, PC_LI);
instr->args[1].kind = PCOp_IMMEDIATE;
instr->args[1].data.imm.value = immAddend + value1;
instr->args[1].data.imm.obj = NULL;
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
}
break;
case PC_ADD:
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
break;
if (isconstantoperand(&instr->args[2], &value1)) {
if (value1 == 0) {
change_opcode(instr, PC_MR);
change_num_operands(instr, 2);
} else {
change_opcode(instr, PC_ADDI);
instr->args[2].kind = PCOp_IMMEDIATE;
instr->args[2].data.imm.value = value1;
instr->args[2].data.imm.obj = NULL;
}
propagatedconstants = 1;
changed = 1;
immAddend = value1;
}
if (isconstantoperand(&instr->args[1], &value1)) {
if (instr->op == PC_ADDI || instr->op == PC_MR) {
if (FITS_IN_SHORT(immAddend + value1)) {
change_opcode(instr, PC_LI);
instr->args[1].kind = PCOp_IMMEDIATE;
instr->args[1].data.imm.value = immAddend + value1;
instr->args[1].data.imm.obj = NULL;
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
}
} else {
instr->args[1] = instr->args[2];
if (value1 == 0) {
change_opcode(instr, PC_MR);
change_num_operands(instr, 2);
} else {
change_opcode(instr, PC_ADDI);
instr->args[2].kind = PCOp_IMMEDIATE;
instr->args[2].data.imm.value = value1;
instr->args[2].data.imm.obj = NULL;
}
propagatedconstants = 1;
changed = 1;
}
}
if (changed) {
if (instr->op == PC_MR) {
PCode *stackInstr;
if ((stackInstr = isstackoperand(&instr->args[1], &value1, 0))) {
change_opcode(instr, PC_ADDI);
instr->flags = stackInstr->flags;
instr->args[1] = stackInstr->args[1];
instr->args[2] = stackInstr->args[2];
change_num_operands(instr, 3);
propagatedconstants = 1;
changed = 1;
}
} else if (instr->op == PC_ADDI && instr->args[2].kind == PCOp_IMMEDIATE) {
PCode *stackInstr;
SInt16 addend = instr->args[2].data.imm.value;
if ((stackInstr = isstackoperand(&instr->args[1], &value1, addend))) {
change_opcode(instr, PC_ADDI);
instr->flags = stackInstr->flags;
instr->args[1] = stackInstr->args[1];
instr->args[2] = stackInstr->args[2];
instr->args[2].data.imm.value = value1 + addend;
if (instr->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000))
instr->alias = make_alias(instr->args[2].data.imm.obj, instr->args[2].data.imm.value, 1);
propagatedconstants = 1;
changed = 1;
}
}
}
break;
case PC_OR:
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
break;
value1 = 0;
immAddend = 0;
if (isconstantoperand(&instr->args[2], &value1)) {
if (isuint16constantoperand(&instr->args[2], &valueU16)) {
if (valueU16 != 0) {
change_opcode(instr, PC_ORI);
instr->args[2].kind = PCOp_IMMEDIATE;
instr->args[2].data.imm.value = valueU16;
instr->args[2].data.imm.obj = NULL;
propagatedconstants = 1;
changed = 1;
} else {
change_opcode(instr, PC_MR);
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
}
value1 = valueU16;
} else if (value1 == 0) {
change_opcode(instr, PC_MR);
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
}
immAddend = value1;
}
if (isconstantoperand(&instr->args[1], &value1)) {
if (instr->op == PC_ORI || instr->op == PC_MR) {
change_opcode(instr, PC_LI);
instr->args[1].kind = PCOp_IMMEDIATE;
instr->args[1].data.imm.value = immAddend | value1;
instr->args[1].data.imm.obj = NULL;
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
} else if (isuint16constantoperand(&instr->args[1], &valueU16)) {
if (valueU16 != 0) {
change_opcode(instr, PC_ORI);
instr->args[1] = instr->args[2];
instr->args[2].kind = PCOp_IMMEDIATE;
instr->args[2].data.imm.value = valueU16;
instr->args[2].data.imm.obj = NULL;
propagatedconstants = 1;
changed = 1;
} else {
change_opcode(instr, PC_MR);
instr->args[1] = instr->args[2];
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
}
} else if (value1 == 0) {
change_opcode(instr, PC_MR);
instr->args[1] = instr->args[2];
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
}
}
break;
case PC_SUBF:
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
break;
if (isconstantoperand(&instr->args[1], &value1) && FITS_IN_SHORT(-value1)) {
if (isconstantoperand(&instr->args[2], &value2) && FITS_IN_SHORT(value2 - value1)) {
change_opcode(instr, PC_LI);
instr->args[1].kind = PCOp_IMMEDIATE;
instr->args[1].data.imm.value = value2 - value1;
instr->args[1].data.imm.obj = NULL;
change_num_operands(instr, 2);
} else if (value1 == 0) {
change_opcode(instr, PC_MR);
instr->args[1] = instr->args[2];
change_num_operands(instr, 2);
} else {
change_opcode(instr, PC_ADDI);
instr->args[1] = instr->args[2];
instr->args[2].kind = PCOp_IMMEDIATE;
instr->args[2].data.imm.value = -value1;
instr->args[2].data.imm.obj = NULL;
}
propagatedconstants = 1;
changed = 1;
value2 = value1;
} else if (isconstantoperand(&instr->args[2], &value1) && FITS_IN_SHORT(-value1)) {
if (value1 == 0) {
change_opcode(instr, PC_NEG);
change_num_operands(instr, 2);
} else {
instr->flags = opcodeinfo[PC_SUBFIC].flags | (instr->flags & ~opcodeinfo[PC_SUBF].flags);
change_opcode(instr, PC_SUBFIC);
instr->args[2].kind = PCOp_IMMEDIATE;
instr->args[2].data.imm.value = value1;
instr->args[2].data.imm.obj = NULL;
instr->args[3].kind = PCOp_REGISTER;
instr->args[3].arg = RegClass_SPR;
instr->args[3].data.reg.reg = 0;
instr->args[3].data.reg.effect = EffectWrite;
change_num_operands(instr, 4);
}
propagatedconstants = 1;
changed = 1;
}
break;
case PC_LBZ:
case PC_LHZ:
case PC_LHA:
case PC_LWZ:
case PC_STB:
case PC_STH:
case PC_STW:
case PC_LFS:
case PC_LFD:
case PC_STFS:
case PC_STFD:
if (instr->args[2].kind == PCOp_IMMEDIATE) {
PCode *stackInstr;
SInt16 addend = instr->args[2].data.imm.value;
if ((stackInstr = isstackoperand(&instr->args[1], &value1, addend))) {
instr->args[1] = stackInstr->args[1];
instr->args[2] = stackInstr->args[2];
instr->args[2].data.imm.value = value1 + addend;
if (instr->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000))
instr->alias = make_alias(instr->args[2].data.imm.obj, instr->args[2].data.imm.value,
nbytes_loaded_or_stored_by(instr));
propagatedconstants = 1;
changed = 1;
}
}
break;
case PC_LBZX:
case PC_LHZX:
case PC_LHAX:
case PC_LWZX:
case PC_STBX:
case PC_STHX:
case PC_STWX:
case PC_LFSX:
case PC_LFDX:
case PC_STFSX:
case PC_STFDX:
if (isconstantoperand(&instr->args[2], &value1)) {
instr->op -= 2;
instr->args[2].kind = PCOp_IMMEDIATE;
instr->args[2].data.imm.value = value1;
instr->args[2].data.imm.obj = NULL;
propagatedconstants = 1;
changed = 1;
} else if (isconstantoperand(&instr->args[1], &value1)) {
instr->op -= 2;
instr->args[1] = instr->args[2];
instr->args[2].kind = PCOp_IMMEDIATE;
instr->args[2].data.imm.value = value1;
instr->args[2].data.imm.obj = NULL;
propagatedconstants = 1;
changed = 1;
}
break;
}
for (i = 0, op = instr->args; i < instr->argCount; i++, op++) {
if (
op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
(op->data.reg.effect & EffectWrite)
)
{
defininginstruction[op->data.reg.reg] = instr;
}
else if (
op->kind == PCOp_REGISTER &&
op->arg == RegClass_VR &&
(op->data.reg.effect & EffectWrite)
)
{
vrdefininginstruction[op->data.reg.reg] = instr;
}
}
}
}
void propagateconstants(void) {
PCodeBlock *block;
int i;
propagatedconstants = 0;
computeusedefchains(0);
defininginstruction = galloc(sizeof(PCode *) * used_virtual_registers[RegClass_GPR]);
vrdefininginstruction = galloc(sizeof(PCode *) * used_virtual_registers[RegClass_VR]);
do {
changed = 0;
for (i = 0; i < pcblockcount; i++) {
if ((block = depthfirstordering[i])) {
computedefininginstructions(block);
propagateconstantstoblock(block);
}
}
} while (changed);
freeoheap();
}

View File

@@ -0,0 +1,885 @@
#include "compiler/LoopDetection.h"
#include "compiler/CFunc.h"
#include "compiler/PCode.h"
#include "compiler/TOC.h"
#include "compiler/UseDefChains.h"
#include "compiler/CompilerTools.h"
#include "compiler/BitVectors.h"
#include "compiler/enode.h"
#include "compiler/objects.h"
Loop *loopsinflowgraph;
int loopdetection_nblocks;
static UInt32 **dominators;
static BlockList *loopheaders;
static int nloopheaders;
static PCodeBlock **loopstack;
BitVector *LoopTemp;
struct LoopList *LoopList_First;
static void computedominators(void) {
int i;
PCodeBlock *block;
int blockCount;
int flag;
UInt32 *myvec;
PCLink *link;
blockCount = pcblockcount;
flag = 1;
dominators = oalloc(sizeof(UInt32 *) * pcblockcount);
for (i = 0; i < pcblockcount; i++)
dominators[i] = oalloc(4 * ((blockCount + 31) >> 5));
myvec = oalloc(4 * ((blockCount + 31) >> 5));
bitvectorinitialize(dominators[pcbasicblocks->blockIndex], blockCount, 0);
//dominators[pcbasicblocks->blockIndex][0] |= 1;
bitvectorsetbit(0, dominators[pcbasicblocks->blockIndex]);
for (block = pcbasicblocks->nextBlock; block; block = block->nextBlock)
bitvectorinitialize(dominators[block->blockIndex], blockCount, 0xFFFFFFFF);
computedepthfirstordering();
while (flag) {
flag = 0;
for (i = 0; i < pcblockcount; i++) {
block = depthfirstordering[i];
if (block && block->blockIndex != pcbasicblocks->blockIndex) {
bitvectorcopy(myvec, dominators[block->predecessors->block->blockIndex], blockCount);
for (link = block->predecessors->nextLink; link; link = link->nextLink)
bitvectorintersect(myvec, dominators[link->block->blockIndex], blockCount);
//myvec[block->blockIndex >> 5] |= 1 << (block->blockIndex & 31);
bitvectorsetbit(block->blockIndex, myvec);
if (bitvectorchanged(dominators[block->blockIndex], myvec, blockCount))
flag = 1;
}
}
}
}
static BlockList *findloopheaders(void) {
PCodeBlock *block;
PCLink *link;
BlockList *list;
loopheaders = NULL;
nloopheaders = 0;
for (block = pcbasicblocks->nextBlock; block; block = block->nextBlock) {
for (link = block->predecessors; link; link = link->nextLink) {
//if ((1 << (block->blockIndex & 31)) & dominators[link->block->blockIndex][block->blockIndex >> 5])
if (bitvectorgetbit(block->blockIndex, dominators[link->block->blockIndex]))
break;
}
if (link) {
list = oalloc(sizeof(BlockList));
list->block = block;
list->next = loopheaders;
loopheaders = list;
nloopheaders++;
}
}
return loopheaders;
}
void addblocktoloop(Loop *loop, PCodeBlock *block) {
BlockList *list = lalloc(sizeof(BlockList));
//loop->memberblocks[block->blockIndex >> 5] |= 1 << (block->blockIndex & 31);
bitvectorsetbit(block->blockIndex, loop->memberblocks);
list->block = block;
list->next = loop->blocks;
loop->blocks = list;
}
static void findnaturalloop(Loop *loop) {
BlockList *list;
BlockList *list2;
PCLink *link;
PCodeBlock *block;
int i;
i = 0;
addblocktoloop(loop, loop->body);
for (link = loop->body->predecessors; link; link = link->nextLink) {
if (bitvectorgetbit(loop->body->blockIndex, dominators[link->block->blockIndex]) && link->block != loop->body) {
addblocktoloop(loop, link->block);
loopstack[i++] = link->block;
}
}
while (i) {
link = loopstack[--i]->predecessors;
while (link) {
if (!bitvectorgetbit(link->block->blockIndex, loop->memberblocks)) {
addblocktoloop(loop, link->block);
loopstack[i++] = link->block;
}
link = link->nextLink;
}
}
for (list = loop->blocks; list; list = list->next) {
block = list->block;
for (link = block->successors; link; link = link->nextLink) {
if (!bitvectorgetbit(link->block->blockIndex, loop->memberblocks)) {
bitvectorsetbit(block->blockIndex, loop->vec24);
break;
}
}
}
for (list = loop->blocks; list; list = list->next) {
for (list2 = loop->blocks; list2; list2 = list2->next) {
if (bitvectorgetbit(list2->block->blockIndex, loop->vec24) &&
!bitvectorgetbit(list->block->blockIndex, dominators[list2->block->blockIndex]))
break;
}
if (!list2)
bitvectorsetbit(list->block->blockIndex, loop->vec28);
}
for (list = loop->blocks; list; list = list->next) {
for (link = loop->body->predecessors; link; link = link->nextLink) {
if (bitvectorgetbit(link->block->blockIndex, loop->memberblocks) &&
!bitvectorgetbit(list->block->blockIndex, dominators[link->block->blockIndex]))
break;
}
if (!link)
bitvectorsetbit(list->block->blockIndex, loop->vec2C);
}
}
static void addlooptolist(Loop *loop, Loop **list) {
Loop **scan;
Loop *scanloop;
scan = list;
while ((scanloop = *scan)) {
if (bitvectorgetbit(loop->body->blockIndex, scanloop->memberblocks)) {
loop->parent = scanloop;
addlooptolist(loop, &scanloop->children);
return;
}
if (bitvectorgetbit(scanloop->body->blockIndex, loop->memberblocks)) {
*scan = scanloop->nextSibling;
scanloop->parent = loop;
scanloop->nextSibling = loop->children;
loop->children = scanloop;
} else {
scan = &scanloop->nextSibling;
}
}
loop->nextSibling = *list;
*list = loop;
}
static void findnaturalloops(void) {
Loop *loop;
int size;
loopdetection_nblocks = pcblockcount + 5 * nloopheaders;
loopstack = oalloc(sizeof(PCodeBlock *) * pcblockcount);
while (loopheaders) {
loop = lalloc(sizeof(Loop));
loop->parent = loop->nextSibling = loop->children = NULL;
loop->body = loopheaders->block;
loop->preheader = NULL;
loop->blocks = NULL;
loop->basicInductionVars = NULL;
loop->footer = NULL;
loop->pc18 = NULL;
loop->loopWeight = loop->body->loopWeight;
bitvectorinitialize(loop->memberblocks = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
bitvectorinitialize(loop->vec24 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
bitvectorinitialize(loop->vec28 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
bitvectorinitialize(loop->vec2C = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
findnaturalloop(loop);
addlooptolist(loop, &loopsinflowgraph);
loopheaders = loopheaders->next;
}
}
static PCodeBlock *makepreheaderblock(void) {
PCodeLabel *label;
PCodeBlock *block;
label = makepclabel();
block = lalloc(sizeof(PCodeBlock));
block->nextBlock = NULL;
block->prevBlock = NULL;
block->labels = NULL;
block->successors = NULL;
block->predecessors = NULL;
block->firstPCode = block->lastPCode = NULL;
block->pcodeCount = 0;
block->flags = 0;
block->blockIndex = pcblockcount++;
pclabel(block, label);
return block;
}
static void insertpreheaderbefore(PCodeBlock *a, PCodeBlock *b) {
a->nextBlock = b;
a->prevBlock = b->prevBlock;
b->prevBlock->nextBlock = a;
b->prevBlock = a;
}
void insertpreheaderblock(Loop *loop) {
PCodeBlock *preheader;
PCodeBlock *block29;
PCodeBlock *block28;
PCode *pcode27;
PCLink *link; // r26
PCLink **linkptr; // r25
PCodeLabel *newlabel; // r23
PCLink *innerlink;
PCodeBlock *block;
PCodeArg *arg;
int i;
preheader = loop->preheader = makepreheaderblock();
block29 = NULL;
block28 = loop->body;
if (!block28->labels)
pclabel(block28, makepclabel());
appendpcode(preheader, makepcode(PC_B, block28->labels));
preheader->loopWeight = loop->parent ? loop->parent->loopWeight : 1;
linkptr = &block28->predecessors;
while ((link = *linkptr)) {
if (bitvectorgetbit(link->block->blockIndex, loop->memberblocks)) {
linkptr = &link->nextLink;
} else {
if (link->block->pcodeCount) {
pcode27 = link->block->lastPCode;
if (pcode27->op == PC_B) {
CError_ASSERT(462, pcode27->args[0].kind == PCOp_LABEL);
if (pcode27->args[0].data.label.label->block == block28)
pcode27->args[0].data.label.label = preheader->labels;
} else if (pcode27->op == PC_BT || pcode27->op == PC_BF) {
CError_ASSERT(474, pcode27->args[2].kind == PCOp_LABEL);
if (pcode27->args[2].data.label.label->block == block28)
pcode27->args[2].data.label.label = preheader->labels;
} else if (pcode27->op == PC_BCTR) {
if (pcode27->argCount > 1 && pcode27->args[1].kind == PCOp_MEMORY) {
Object *obj = pcode27->args[1].data.mem.obj;
UInt32 *array = (UInt32 *) obj->u.data.u.switchtable.data;
int i;
for (i = 0; i < obj->u.data.u.switchtable.size; i++) {
if (((PCodeLabel *) CTool_ResolveIndexToPointer(array[i]))->block == block28)
array[i] = CTool_CreateIndexFromPointer(preheader->labels);
}
} else {
CodeLabelList *cll;
for (cll = codelabellist; cll; cll = cll->next) {
if (cll->label->pclabel->block == block28)
cll->label->pclabel = preheader->labels;
}
}
} else {
CError_ASSERT(505, link->block->nextBlock == block28);
}
}
for (innerlink = link->block->successors; innerlink; innerlink = innerlink->nextLink) {
if (innerlink->block == block28)
innerlink->block = preheader;
}
*linkptr = link->nextLink;;
link->nextLink = preheader->predecessors;
preheader->predecessors = link;
}
}
if (!bitvectorgetbit(block28->prevBlock->blockIndex, loop->memberblocks)) {
insertpreheaderbefore(preheader, block28);
if (
(!block28->nextBlock || !bitvectorgetbit(block28->nextBlock->blockIndex, loop->memberblocks)) &&
block28->lastPCode &&
(block28->lastPCode->flags & fIsBranch) &&
block28->lastPCode->op != PC_BDNZ
) {
i = block28->lastPCode->argCount;
arg = block28->lastPCode->args;
while (i && arg->kind != PCOp_LABEL) {
arg++;
i--;
}
if (i && arg->kind == PCOp_LABEL && arg->data.label.label->block == block28) {
block29 = makepreheaderblock();
insertpreheaderbefore(block29, block28);
newlabel = makepclabel();
pclabel(block29, newlabel);
arg->data.label.label = newlabel;
link = lalloc(sizeof(PCLink));
link->block = block28;
link->nextLink = block29->predecessors;
block29->predecessors = link;
link = lalloc(sizeof(PCLink));
link->block = block28;
link->nextLink = block29->successors;
block29->successors = link;
for (link = block28->successors; link; link = link->nextLink) {
if (link->block == block28)
link->block = block29;
}
for (link = block28->predecessors; link; link = link->nextLink) {
if (link->block == block28)
link->block = block29;
}
bitvectorsetbit(block29->blockIndex, loop->vec2C);
addblocktoloop(loop, block29);
}
}
} else {
for (block = pcbasicblocks; block; block = block->nextBlock) {
if (bitvectorgetbit(block->blockIndex, loop->memberblocks))
break;
}
insertpreheaderbefore(preheader, block);
}
link = lalloc(sizeof(PCLink));
link->block = preheader;
link->nextLink = block28->predecessors;
block28->predecessors = link;
link = lalloc(sizeof(PCLink));
link->block = block28;
link->nextLink = preheader->successors;
preheader->successors = link;
for (loop = loop->parent; loop; loop = loop->parent) {
addblocktoloop(loop, preheader);
if (bitvectorgetbit(block28->blockIndex, loop->vec28)) {
bitvectorsetbit(preheader->blockIndex, loop->vec28);
if (block29)
bitvectorsetbit(block29->blockIndex, loop->vec28);
}
if (bitvectorgetbit(block28->blockIndex, loop->vec2C)) {
bitvectorsetbit(preheader->blockIndex, loop->vec2C);
if (block29)
bitvectorsetbit(block29->blockIndex, loop->vec2C);
}
}
}
static void insertpreheaderblocks(Loop *loop) {
while (loop) {
if (loop->children)
insertpreheaderblocks(loop->children);
insertpreheaderblock(loop);
loop = loop->nextSibling;
}
}
void findloopsinflowgraph(void) {
loopsinflowgraph = NULL;
computedominators();
if (findloopheaders()) {
findnaturalloops();
insertpreheaderblocks(loopsinflowgraph);
}
freeoheap();
}
static int checklooplimits(SInt32 opcode, SInt32 condition, SInt32 c, SInt32 d, SInt32 addend, SInt32 *result) {
if (opcode == PC_BT) {
if (condition == 0) {
if (addend <= 0)
return 0;
if (c < d)
*result = (d - c + addend - 1) / addend;
else
*result = 0;
} else if (condition == 1) {
if (addend >= 0)
return 0;
if (c > d)
*result = (c - d - addend - 1) / -addend;
else
*result = 0;
} else {
return 0;
}
} else {
if (condition == 0) {
if (addend >= 0)
return 0;
if (c >= d)
*result = (c - d - addend) / -addend;
else
*result = 0;
} else if (condition == 1) {
if (addend <= 0)
return 0;
if (c <= d)
*result = (d - c + addend) / addend;
else
*result = 0;
} else if (c < d) {
if (addend <= 0)
return 0;
if ((d - c) % addend)
return 0;
*result = (d - c) / addend;
} else if (c > d) {
if (addend >= 0)
return 0;
if ((c - d) % -addend)
return 0;
*result = (c - d) / -addend;
} else {
*result = 0;
}
}
return 1;
}
static int checkunsignedlooplimits(SInt32 opcode, SInt32 condition, UInt32 c, UInt32 d, SInt32 addend, UInt32 *result) {
if (opcode == PC_BT) {
if (condition == 0) {
if (addend <= 0)
return 0;
if (c < d)
*result = (d - c + addend - 1) / addend;
else
*result = 0;
} else if (condition == 1) {
if (addend >= 0)
return 0;
if (c > d)
*result = (c - d - addend - 1) / -addend;
else
*result = 0;
} else {
return 0;
}
} else {
if (condition == 0) {
if (addend >= 0)
return 0;
if (c >= d)
*result = (c - d - addend) / -addend;
else
*result = 0;
} else if (condition == 1) {
if (addend <= 0)
return 0;
if (c <= d)
*result = (d - c + addend) / addend;
else
*result = 0;
} else if (c < d) {
if (addend <= 0)
return 0;
if ((d - c) % addend)
return 0;
*result = (d - c) / addend;
} else if (c > d) {
if (addend >= 0)
return 0;
if ((c - d) % -addend)
return 0;
*result = (c - d) / -addend;
} else {
*result = 0;
}
}
return (*result & 0x80000000) == 0;
}
static int checkunknownloop(int a, int b, int c, unsigned char *op) {
if (a == PC_BT) {
if (b == 0) {
if (c <= 0)
return 0;
*op = ELESS;
} else if (b == 1) {
if (c >= 0)
return 0;
*op = EGREATER;
} else {
return 0;
}
} else {
if (b == 0) {
if (c >= 0)
return 0;
*op = EGREATEREQU;
} else if (b == 1) {
if (c <= 0)
return 0;
*op = ELESSEQU;
} else if (c == 1) {
*op = ENOTEQU;
} else if (c == -1) {
*op = ENOTEQU;
} else {
return 0;
}
}
return 1;
}
static void checkcountingloop(Loop *loop) {
RegUseOrDef *list;
PCode *lastpcode;
PCode *prevpcode;
PCode *pc8;
PCode *check;
short op12;
short reg11;
SInt16 reg4;
short reg11b;
Loop *child;
if (!(lastpcode = loop->body->lastPCode))
return;
if (lastpcode->op != PC_BT && lastpcode->op != PC_BF)
return;
if (lastpcode->args[2].kind != PCOp_LABEL)
return;
if (!bitvectorgetbit(lastpcode->args[2].data.label.label->block->blockIndex, loop->memberblocks))
return;
if (bitvectorgetbit(loop->body->nextBlock->blockIndex, loop->memberblocks))
return;
reg11 = lastpcode->args[0].data.reg.reg;
reg4 = lastpcode->args[1].data.imm.value;
prevpcode = lastpcode->prevPCode;
if (!prevpcode)
return;
op12 = prevpcode->op;
if (op12 == PC_ADDI && prevpcode->args[2].kind == PCOp_IMMEDIATE) {
pc8 = prevpcode;
prevpcode = prevpcode->prevPCode;
if (!prevpcode)
return;
op12 = prevpcode->op;
if (pc8->args[0].data.reg.reg != pc8->args[1].data.reg.reg)
return;
if (op12 != PC_CMP && op12 != PC_CMPL && op12 != PC_CMPI && op12 != PC_CMPLI)
return;
if (prevpcode->args[1].data.reg.reg != pc8->args[0].data.reg.reg)
return;
if ((loop->step = pc8->args[2].data.imm.value) == 0)
return;
}
if (op12 != PC_CMP && op12 != PC_CMPL && op12 != PC_CMPI && op12 != PC_CMPLI)
return;
if (prevpcode->args[0].data.reg.reg != reg11)
return;
reg11b = prevpcode->args[1].data.reg.reg;
if (reg11b < 32)
return;
if (loop->preheader->nextBlock != lastpcode->args[2].data.label.label->block)
return;
if (op12 == PC_CMPI) {
if (prevpcode->prevPCode)
return;
loop->upper = prevpcode->args[2].data.imm.value;
loop->upperType = LOOP_BOUND_CONSTANT;
} else if (op12 == PC_CMPLI) {
if (prevpcode->prevPCode)
return;
loop->upper = prevpcode->args[2].data.imm.value & 0xFFFF;
loop->upperType = LOOP_BOUND_CONSTANT;
} else if (op12 == PC_CMP || op12 == PC_CMPL) {
if (prevpcode->prevPCode) {
if (
prevpcode->prevPCode->op == PC_LI &&
prevpcode->prevPCode->args[1].kind == PCOp_IMMEDIATE &&
prevpcode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg &&
!prevpcode->prevPCode->prevPCode
) {
loop->upper = prevpcode->prevPCode->args[1].data.imm.value;
loop->upperType = LOOP_BOUND_CONSTANT;
} else if (
prevpcode->prevPCode->op == PC_LIS &&
prevpcode->prevPCode->args[1].kind == PCOp_IMMEDIATE &&
prevpcode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg &&
!prevpcode->prevPCode->prevPCode
) {
loop->upper = prevpcode->prevPCode->args[1].data.imm.value << 16;
loop->upperType = LOOP_BOUND_CONSTANT;
} else if (
prevpcode->prevPCode->op == PC_ADDI &&
prevpcode->prevPCode->args[2].kind == PCOp_IMMEDIATE &&
prevpcode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg &&
prevpcode->prevPCode->args[1].data.reg.reg == prevpcode->args[2].data.reg.reg &&
prevpcode->prevPCode->prevPCode &&
prevpcode->prevPCode->prevPCode->op == PC_LIS &&
prevpcode->prevPCode->prevPCode->args[1].kind == PCOp_IMMEDIATE &&
prevpcode->prevPCode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg &&
!prevpcode->prevPCode->prevPCode->prevPCode
) {
loop->upper = prevpcode->prevPCode->args[2].data.imm.value +
(prevpcode->prevPCode->prevPCode->args[1].data.imm.value << 16);
loop->upperType = LOOP_BOUND_CONSTANT;
} else {
return;
}
} else {
pc8 = NULL;
for (list = reg_Defs[RegClass_GPR][prevpcode->args[2].data.reg.reg]; list; list = list->next) {
if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks))
return;
}
for (list = reg_Defs[RegClass_GPR][prevpcode->args[2].data.reg.reg]; list; list = list->next) {
if (bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].defvec8)) {
if (!pc8) {
pc8 = Defs[list->id].pcode;
if (
pc8->op == PC_LI &&
pc8->args[1].kind == PCOp_IMMEDIATE
) {
loop->upper = pc8->args[1].data.imm.value;
loop->upperType = LOOP_BOUND_CONSTANT;
} else if (
pc8->op == PC_LIS &&
pc8->args[1].kind == PCOp_IMMEDIATE
) {
loop->upper = pc8->args[1].data.imm.value << 16;
loop->upperType = LOOP_BOUND_CONSTANT;
} else if (
pc8->op == PC_ADDI &&
pc8->args[2].kind == PCOp_IMMEDIATE &&
pc8->args[1].data.reg.reg == prevpcode->args[2].data.reg.reg &&
pc8->prevPCode &&
pc8->prevPCode->op == PC_LIS &&
pc8->prevPCode->args[1].kind == PCOp_IMMEDIATE &&
pc8->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg
) {
loop->upper = pc8->args[2].data.imm.value +
(pc8->prevPCode->args[1].data.imm.value << 16);
loop->upperType = LOOP_BOUND_CONSTANT;
} else {
loop->upperType = LOOP_BOUND_VARIABLE;
break;
}
} else {
loop->upperType = LOOP_BOUND_VARIABLE;
break;
}
}
}
if (loop->upperType == LOOP_BOUND_INDETERMINATE)
loop->upperType = LOOP_BOUND_VARIABLE;
}
}
pc8 = NULL;
for (list = reg_Defs[RegClass_GPR][reg11b]; list; list = list->next) {
check = Defs[list->id].pcode;
if (bitvectorgetbit(check->block->blockIndex, loop->memberblocks)) {
if (!pc8) {
pc8 = check;
if (check->op != PC_ADDI)
return;
if (check->args[1].data.reg.reg != reg11b)
return;
if (check->args[2].kind != PCOp_IMMEDIATE)
return;
if ((loop->step = check->args[2].data.imm.value) == 0)
return;
} else {
return;
}
}
}
if (!pc8)
return;
if (pc8->block != prevpcode->block && !bitvectorgetbit(prevpcode->block->blockIndex, loop->vec2C))
return;
if (loop->children) {
for (child = loop->children; child; child = child->nextSibling) {
if (bitvectorgetbit(pc8->block->blockIndex, child->memberblocks))
return;
}
}
loop->pc18 = pc8;
pc8 = NULL;
for (list = reg_Defs[RegClass_GPR][reg11b]; list; list = list->next) {
if (bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].defvec8)) {
if (!pc8) {
pc8 = Defs[list->id].pcode;
if (
pc8->op == PC_LI &&
pc8->args[1].kind == PCOp_IMMEDIATE
) {
loop->lower = pc8->args[1].data.imm.value;
loop->lowerType = LOOP_BOUND_CONSTANT;
} else if (
pc8->op == PC_LIS &&
pc8->args[1].kind == PCOp_IMMEDIATE
) {
loop->lower = pc8->args[1].data.imm.value << 16;
loop->lowerType = LOOP_BOUND_CONSTANT;
} else if (
pc8->op == PC_ADDI &&
pc8->args[2].kind == PCOp_IMMEDIATE &&
pc8->args[1].data.reg.reg == reg11b &&
pc8->prevPCode &&
pc8->prevPCode->op == PC_LIS &&
pc8->prevPCode->args[1].kind == PCOp_IMMEDIATE &&
pc8->prevPCode->args[0].data.reg.reg == reg11b
) {
loop->lower = pc8->args[2].data.imm.value +
(pc8->prevPCode->args[1].data.imm.value << 16);
loop->lowerType = LOOP_BOUND_CONSTANT;
} else {
loop->lowerType = LOOP_BOUND_VARIABLE;
break;
}
} else {
loop->lowerType = LOOP_BOUND_INDETERMINATE;
break;
}
}
}
if (loop->lowerType == LOOP_BOUND_INDETERMINATE)
loop->lowerType = LOOP_BOUND_VARIABLE;
if (loop->lowerType == LOOP_BOUND_CONSTANT && loop->upperType == LOOP_BOUND_CONSTANT) {
if (op12 == PC_CMP || op12 == PC_CMPI) {
if (!checklooplimits(lastpcode->op, reg4, loop->lower, loop->upper, loop->step, &loop->iterationCount))
return;
} else {
if (!checkunsignedlooplimits(lastpcode->op, reg4, loop->lower, loop->upper, loop->step, (UInt32 *) &loop->iterationCount))
return;
}
loop->isKnownCountingLoop = 1;
} else if (loop->lowerType != LOOP_BOUND_INDETERMINATE || loop->upperType != LOOP_BOUND_INDETERMINATE) {
if (!checkunknownloop(lastpcode->op, reg4, loop->step, &loop->unknownCondition))
return;
loop->isUnknownCountingLoop = 1;
}
}
void analyzeForCountableLoops(Loop *loop) {
if (!loop)
return;
while (loop) {
if (loop->children)
analyzeForCountableLoops(loop->children);
checkcountingloop(loop);
loop = loop->nextSibling;
}
}
void analyzeloop(Loop *loop) {
BlockList *list;
PCodeBlock *block;
PCode *pcode;
loop->bodySize = 0;
loop->x4D = 0;
loop->x4E = 0;
loop->x4F = 1;
loop->isKnownCountingLoop = 0;
loop->isUnknownCountingLoop = 0;
loop->lowerType = LOOP_BOUND_INDETERMINATE;
loop->upperType = LOOP_BOUND_INDETERMINATE;
loop->iterationCount = -1;
loop->x57 = 0;
loop->x52 = 0;
for (list = loop->blocks; list; list = list->next) {
block = list->block;
if (!loop->children)
block->flags |= fPCBlockFlag2000;
loop->bodySize += block->pcodeCount;
if (block != loop->body) {
if (!block->successors || !block->predecessors || block->successors->nextLink || block->predecessors->nextLink)
loop->x4F = 0;
}
if ((block->flags & fPCBlockFlag4000) == fPCBlockFlag4000)
loop->x52 = 1;
for (pcode = block->firstPCode; pcode; pcode = pcode->nextPCode) {
if (PCODE_FLAG_SET_T(pcode) & fLink)
loop->x4D = 1;
if (pcode->op == PC_BCTRL || pcode->op == PC_BCTR || pcode->op == PC_BCCTR || pcode->op == PC_MTCTR || pcode->op == PC_MFCTR) {
loop->x4E = 1;
} else if (pcode->flags & fIsRead) {
if (pcode->op == PC_LBZX || pcode->op == PC_LHZX || pcode->op == PC_LHAX || pcode->op == PC_LWZX || pcode->op == PC_LFSX || pcode->op == PC_LFDX)
loop->x53 = 1;
} else if (pcode->flags & fIsWrite) {
if (pcode->op == PC_STBX || pcode->op == PC_STHX || pcode->op == PC_STWX || pcode->op == PC_STFSX || pcode->op == PC_STFDX)
loop->x54 = 1;
} else {
if (pcode->op == PC_EIEIO || pcode->op == PC_SYNC || pcode->op == PC_ISYNC)
loop->x57 = 1;
}
}
}
if (!loop->children && !loop->x4D && loop->bodySize < 32) {
for (list = loop->blocks; list; list = list->next)
list->block->flags |= fPCBlockFlag2000;
}
}
static void analyzeloops(Loop *loop) {
while (loop) {
if (loop->children)
analyzeloops(loop->children);
analyzeloop(loop);
loop = loop->nextSibling;
}
}
void analyzeloopsinflowgraph(void) {
if (loopsinflowgraph)
analyzeloops(loopsinflowgraph);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,751 @@
#include "compiler/StrengthReduction.h"
#include "compiler/BitVectors.h"
#include "compiler/CompilerTools.h"
#include "compiler/LoopDetection.h"
#include "compiler/PCode.h"
#include "compiler/PCodeInfo.h"
#include "compiler/Registers.h"
#include "compiler/UseDefChains.h"
int strengthreducedloops;
static PCode *findinitializer(Loop *loop, short reg) {
UInt32 *vec;
PCode *best;
RegUseOrDef *list;
vec = usedefinfo[loop->body->blockIndex].defvec8;
best = NULL;
for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) {
if (
!(bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)) &&
bitvectorgetbit(list->id, vec)
)
{
if (best)
return NULL;
best = Defs[list->id].pcode;
}
}
if (best) {
if (best->op == PC_LI || best->op == PC_ADDI || best->op == PC_ADD)
return best;
}
return NULL;
}
static int isbasicinductionvariable(Loop *loop, short reg, SInt32 step) {
RegUseOrDef *list;
PCode *instr;
for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) {
instr = Defs[list->id].pcode;
if (bitvectorgetbit(instr->block->blockIndex, loop->memberblocks)) {
if (instr->op != PC_ADDI)
return 0;
if (instr->args[1].data.reg.reg != reg)
return 0;
if (instr->args[2].data.imm.value != step)
return 0;
}
}
return 1;
}
static void addbasicinductionvariable(Loop *loop, short reg, SInt32 step) {
BasicInductionVar *biv;
RegUseOrDef *list;
PCode *instr;
InstrList *instrList;
for (biv = loop->basicInductionVars; biv; biv = biv->next) {
if (biv->reg == reg)
return;
}
biv = oalloc(sizeof(BasicInductionVar));
biv->next = loop->basicInductionVars;
loop->basicInductionVars = biv;
biv->loop = loop;
biv->inductionVars = NULL;
biv->instrsC = NULL;
biv->step = step;
biv->reg = reg;
for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) {
instr = Defs[list->id].pcode;
if (bitvectorgetbit(instr->block->blockIndex, loop->memberblocks)) {
instrList = oalloc(sizeof(InstrList));
instrList->next = biv->instrsC;
biv->instrsC = instrList;
instrList->instr = instr;
}
}
biv->initializer = findinitializer(loop, reg);
}
static void findbasicinductionvariables(Loop *loop) {
SInt16 step;
BlockList *block;
PCode *instr;
short reg;
for (block = loop->blocks; block; block = block->next) {
for (instr = block->block->firstPCode; instr; instr = instr->nextPCode) {
if (instr->op == PC_ADDI) {
if (
(reg = instr->args[0].data.reg.reg) >= 32 &&
instr->args[1].data.reg.reg == reg &&
isbasicinductionvariable(loop, reg, step = instr->args[2].data.imm.value)
)
addbasicinductionvariable(loop, reg, step);
}
}
}
}
static void findallbasicinductionvariables(Loop *loop) {
while (loop) {
if (loop->children)
findallbasicinductionvariables(loop->children);
findbasicinductionvariables(loop);
loop = loop->nextSibling;
}
}
static int isinductionvariable(BasicInductionVar *biv, int useID, SInt32 *result1, short *result2, short *result3, Loop **result4) {
RegUseOrDef *list;
int counter;
Loop *loop;
Loop *scanloop;
PCode *instr;
instr = Uses[useID].pcode;
*result2 = 0;
*result3 = 0;
*result4 = NULL;
switch (instr->op) {
case PC_MULLI:
*result1 = instr->args[2].data.imm.value;
break;
case PC_RLWINM:
if (instr->args[3].data.imm.value)
return 0;
if (instr->args[2].data.imm.value > 15)
return 0;
if (instr->args[4].data.imm.value != (31 - instr->args[2].data.imm.value))
return 0;
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
return 0;
*result1 = 1 << instr->args[2].data.imm.value;
break;
case PC_LBZX:
case PC_LHZX:
case PC_LHAX:
case PC_LWZX:
case PC_STBX:
case PC_STHX:
case PC_STWX:
case PC_LFSX:
case PC_LFDX:
case PC_STFSX:
case PC_STFDX:
*result2 = 0;
*result3 = 0;
if (instr->args[1].data.reg.reg == biv->reg) {
*result2 = 1;
*result3 = 2;
} else if (instr->args[2].data.reg.reg == biv->reg) {
*result2 = 2;
*result3 = 1;
}
counter = 0;
for (list = reg_Defs[RegClass_GPR][instr->args[*result3].data.reg.reg]; list; list = list->next) {
if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, biv->loop->memberblocks))
counter++;
}
if (counter)
return 0;
loop = biv->loop;
for (scanloop = loop->parent; scanloop; scanloop = scanloop->parent) {
counter = 0;
for (list = reg_Defs[RegClass_GPR][instr->args[*result3].data.reg.reg]; list; list = list->next) {
if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, scanloop->memberblocks))
counter++;
}
if (!biv->initializer || bitvectorgetbit(biv->initializer->block->blockIndex, scanloop->memberblocks))
counter++;
if (counter)
break;
loop = scanloop;
}
*result4 = loop;
*result1 = 1;
return 1;
default:
return 0;
}
counter = 0;
for (list = reg_Defs[RegClass_GPR][instr->args[0].data.reg.reg]; list; list = list->next) {
if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, biv->loop->memberblocks))
counter++;
}
return counter == 1;
}
static void addinductionvariable(BasicInductionVar *biv, PCode *instr, SInt32 val1, short val2, short val3, Loop *val4) {
InductionVar *iv;
iv = oalloc(sizeof(InductionVar));
iv->next = biv->inductionVars;
biv->inductionVars = iv;
iv->basicVar = biv;
iv->instr = instr;
iv->instrC = NULL;
iv->step = val1;
iv->x18 = val2;
iv->x1A = val3;
iv->someloop = val4;
if (instr->flags & (fIsRead | fIsWrite))
iv->x1C = -1;
else
iv->x1C = instr->args[0].data.reg.reg;
iv->x1E = -1;
}
static void findnonbasicinductionvariables(Loop *loop) {
BasicInductionVar *biv;
RegUseOrDef *list;
SInt32 result1;
short result2;
short result3;
Loop *result4;
for (biv = loop->basicInductionVars; biv; biv = biv->next) {
for (list = reg_Uses[RegClass_GPR][biv->reg]; list; list = list->next) {
if (bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)) {
if (isinductionvariable(biv, list->id, &result1, &result2, &result3, &result4))
addinductionvariable(biv, Uses[list->id].pcode, result1, result2, result3, result4);
}
}
}
}
static void findallnonbasicinductionvariables(Loop *loop) {
while (loop) {
if (loop->children)
findallnonbasicinductionvariables(loop->children);
if (loop->basicInductionVars)
findnonbasicinductionvariables(loop);
loop = loop->nextSibling;
}
}
static void initializeinductionvariable(InductionVar *iv) {
BasicInductionVar *biv; // r31
PCode *instr; // r27
PCodeBlock *preheader; // r30
SInt32 value30; // r30
short reg29; // r29
short reg26; // r26
biv = iv->basicVar;
preheader = biv->loop->preheader;
if (iv->x1A) {
reg29 = iv->instr->args[iv->x1A].data.reg.reg;
reg26 = iv->instr->args[iv->x18].data.reg.reg;
instr = NULL;
if (
biv->initializer &&
biv->initializer->op == PC_LI &&
biv->initializer->block == preheader
)
{
if (biv->initializer->args[1].data.imm.value == 0)
instr = makepcode(PC_MR, iv->x1E, reg29);
else if (FITS_IN_SHORT(biv->initializer->args[1].data.imm.value))
instr = makepcode(PC_ADDI, iv->x1E, reg29, 0, biv->initializer->args[1].data.imm.value);
}
if (!instr)
instr = makepcode(PC_ADD, iv->x1E, reg29, reg26);
if (biv->initializer && instr->op != PC_ADD)
insertpcodeafter(biv->initializer, instr);
else if (iv->someloop && iv->someloop->preheader->lastPCode)
insertpcodebefore(iv->someloop->preheader->lastPCode, instr);
else
insertpcodebefore(preheader->lastPCode, instr);
iv->instrC = instr;
iv->x1C = reg29;
return;
}
if (!biv->initializer || biv->initializer->op != PC_LI) {
instr = copypcode(iv->instr);
instr->args[0].data.reg.reg = iv->x1E;
insertpcodebefore(preheader->lastPCode, instr);
} else {
value30 = biv->initializer->args[1].data.imm.value * iv->step;
if (!FITS_IN_SHORT(value30)) {
instr = makepcode(PC_LIS, iv->x1E, 0, HIGH_PART(value30));
insertpcodeafter(biv->initializer, instr);
if (value30 != 0)
insertpcodeafter(instr, makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, LOW_PART(value30)));
} else {
instr = makepcode(PC_LI, iv->x1E, value30);
insertpcodeafter(biv->initializer, instr);
}
}
}
static void incrementinductionvariable(InductionVar *iv) {
SInt32 value;
BasicInductionVar *biv;
PCode *instr;
InstrList *list;
biv = iv->basicVar;
value = iv->step * biv->step;
for (list = biv->instrsC; list; list = list->next) {
if (!FITS_IN_SHORT(value)) {
instr = makepcode(PC_ADDIS, iv->x1E, iv->x1E, 0, HIGH_PART(value));
insertpcodeafter(list->instr, instr);
if (value != 0) {
instr = makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, LOW_PART(value));
insertpcodeafter(list->instr->nextPCode, instr);
}
} else {
instr = makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, value);
insertpcodeafter(list->instr, instr);
}
}
}
static void copyinductionvariable(InductionVar *iv) {
if (iv->instr->flags & (fIsRead | fIsWrite)) {
iv->instr->op -= 2;
iv->instr->args[1].data.reg.reg = iv->x1E;
iv->instr->args[2].kind = PCOp_IMMEDIATE;
iv->instr->args[2].data.imm.value = 0;
iv->instr->args[2].data.imm.obj = NULL;
} else {
insertpcodeafter(iv->instr, makepcode(PC_MR, iv->x1C, iv->x1E));
deletepcode(iv->instr);
}
}
static int testnestediv(InductionVar *iv, SInt32 step1, int reg, SInt32 step2, Loop *loop1, Loop *loop2) {
SInt32 addend;
BlockList *list;
PCode *instr;
PCodeArg *op;
int i;
if (iv->instrC && iv->x1C == reg) {
if (iv->instrC->op == PC_MR)
addend = 0;
else if (iv->instrC->op == PC_ADDI)
addend = iv->instrC->args[2].data.imm.value;
else
return 0;
if (step2 == (addend + (step1 * iv->step * loop2->iterationCount))) {
for (list = loop1->blocks; list && list->block != loop2->blocks->block; list = list->next) {
for (instr = list->block->firstPCode; instr; instr = instr->nextPCode) {
op = instr->args;
i = instr->argCount;
while (i--) {
if (
op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
op->data.reg.reg == reg
)
return 0;
op++;
}
}
}
return 1;
}
}
return 0;
}
static void strengthreducenestediv(short reg, SInt32 step, PCode *initializer, Loop *loop) {
Loop *scanloop;
BasicInductionVar *biv;
InductionVar *iv;
PCode *instr;
PCodeArg *op;
int i;
for (scanloop = loop->children; scanloop; scanloop = scanloop->nextSibling) {
if (
scanloop->isKnownCountingLoop &&
scanloop->x4F &&
bitvectorgetbit(scanloop->body->blockIndex, loop->vec2C)
)
{
for (biv = scanloop->basicInductionVars; biv; biv = biv->next) {
for (iv = biv->inductionVars; iv; iv = iv->next) {
if (testnestediv(iv, biv->step, reg, step, loop, scanloop)) {
deletepcode(iv->instrC);
if (initializer) {
insertpcodeafter(initializer, iv->instrC);
} else if (loop->body->lastPCode) {
for (instr = loop->body->lastPCode; instr; instr = instr->prevPCode) {
op = instr->args;
i = instr->argCount;
while (i--) {
if (
op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
(op->data.reg.effect & EffectWrite) &&
op->data.reg.reg == reg
)
break;
op++;
}
}
if (instr)
insertpcodeafter(instr, iv->instrC);
else
insertpcodebefore(loop->body->firstPCode, iv->instrC);
} else {
appendpcode(loop->body, iv->instrC);
}
}
}
}
}
}
}
static void strengthreducenestedbiv(BasicInductionVar *biv) {
Loop *loop;
InductionVar *iv;
loop = biv->loop;
for (iv = biv->inductionVars; iv; iv = iv->next)
strengthreducenestediv(iv->x1E, iv->step * biv->step, iv->instrC, loop);
strengthreducenestediv(biv->reg, biv->step, biv->initializer, loop);
}
static void strengthreduceinductionvariable(BasicInductionVar *biv) {
int counter;
InductionVar *iv;
InductionVar *otherIv;
short reg;
counter = 0;
for (iv = biv->inductionVars; iv; iv = iv->next) {
if (iv->step == 1)
counter++;
}
for (iv = biv->inductionVars; iv; iv = iv->next) {
if (
(counter <= 4 || iv->step != 1) &&
iv->instr->block &&
(iv->x1A == 0 || iv->instr->args[2].kind != PCOp_IMMEDIATE)
)
{
if (iv->x1E == -1) {
iv->x1E = used_virtual_registers[RegClass_GPR]++;
initializeinductionvariable(iv);
incrementinductionvariable(iv);
if (iv->step == 1) {
reg = iv->instr->args[iv->x1A].data.reg.reg;
for (otherIv = iv->next; otherIv; otherIv = otherIv->next) {
if (otherIv->x1A != 0 && otherIv->instr->args[otherIv->x1A].data.reg.reg == reg)
otherIv->x1E = iv->x1E;
}
} else {
for (otherIv = iv->next; otherIv; otherIv = otherIv->next) {
if (otherIv->step == iv->step)
otherIv->x1E = iv->x1E;
}
}
}
copyinductionvariable(iv);
strengthreducedloops = 1;
}
}
}
#ifdef __MWERKS__
#pragma options align=mac68k
#endif
typedef struct BivInit {
SInt32 x0;
short x4;
short x6;
short x8;
Object *xA;
} BivInit;
#ifdef __MWERKS__
#pragma options align=reset
#endif
static void calc_biv_init(BasicInductionVar *biv, BivInit *init) {
PCode *instr;
PCode *scan;
PCodeArg *op;
int i;
instr = biv->initializer;
init->x0 = 0;
init->x4 = -1;
init->x6 = -1;
init->x8 = 0;
init->xA = NULL;
if (!biv->initializer || (biv->initializer->op != PC_ADDI && biv->initializer->op != PC_ADD))
return;
if (instr->op == PC_ADDI) {
if (instr->args[1].data.reg.reg == biv->reg) {
init->x0 = instr->args[2].data.imm.value;
for (scan = instr->prevPCode; scan; scan = scan->prevPCode) {
op = scan->args;
i = scan->argCount;
while (i--) {
if (
op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
op->data.reg.reg == biv->reg &&
(op->data.reg.effect & EffectWrite)
)
{
if (scan->op == PC_ADD) {
init->x4 = scan->args[1].data.reg.reg;
init->x6 = scan->args[2].data.reg.reg;
} else if (scan->op == PC_ADDI) {
if (scan->args[2].kind == PCOp_IMMEDIATE) {
init->x4 = scan->args[1].data.reg.reg;
init->x8 = scan->args[2].data.imm.value;
} else if (scan->args[2].kind == PCOp_MEMORY) {
init->x4 = scan->args[1].data.reg.reg;
init->x8 = scan->args[2].data.mem.offset;
init->xA = scan->args[2].data.mem.obj;
}
}
return;
}
op++;
}
}
} else {
if (instr->args[2].kind == PCOp_IMMEDIATE) {
init->x4 = instr->args[1].data.reg.reg;
init->x8 = instr->args[2].data.imm.value;
} else if (instr->args[2].kind == PCOp_MEMORY) {
init->x4 = instr->args[1].data.reg.reg;
init->x8 = instr->args[2].data.mem.offset;
init->xA = instr->args[2].data.mem.obj;
}
}
} else if (instr->op == PC_ADD) {
if (instr->args[1].data.reg.reg == biv->reg) {
init->x6 = instr->args[2].data.reg.reg;
for (scan = instr->prevPCode; scan; scan = scan->prevPCode) {
op = scan->args;
i = scan->argCount;
while (i--) {
if (
op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
op->data.reg.reg == biv->reg &&
(op->data.reg.effect & EffectWrite) &&
scan->op == PC_ADDI
)
{
if (scan->args[2].kind == PCOp_IMMEDIATE) {
init->x4 = scan->args[1].data.reg.reg;
init->x8 = scan->args[2].data.imm.value;
} else if (scan->args[2].kind == PCOp_MEMORY) {
init->x4 = scan->args[1].data.reg.reg;
init->x8 = scan->args[2].data.mem.offset;
init->xA = scan->args[2].data.mem.obj;
}
return;
}
op++;
}
}
} else {
init->x4 = instr->args[1].data.reg.reg;
init->x6 = instr->args[2].data.reg.reg;
}
}
}
static void combineinductionvariables(Loop *loop, BasicInductionVar *biv1, BasicInductionVar *biv2, SInt32 difference) {
PCode *instr1; // r31
int reg1; // r30
int reg2; // r29
PCode *instr2; // r24
PCodeBlock *nextBlock; // r24
BlockList *list;
PCodeArg *op;
int i;
PCode *instr;
instr1 = NULL;
instr2 = NULL;
reg1 = biv1->reg;
CError_ASSERT(930, reg1 >= 0);
reg2 = biv2->reg;
CError_ASSERT(934, reg2 >= 0);
if (!FITS_IN_SHORT(difference))
return;
for (list = loop->blocks; list; list = list->next) {
for (instr = list->block->firstPCode; instr; instr = instr->nextPCode) {
if (instr1) {
op = instr->args;
i = instr->argCount;
while (i--) {
if (
op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
op->data.reg.reg == reg1
)
return;
op++;
}
}
if (instr->op == PC_ADDI) {
if (instr->args[0].data.reg.reg == reg1) {
if (instr1)
return;
instr1 = instr;
} else if (instr->args[0].data.reg.reg == reg2) {
if (instr2)
return;
instr2 = instr;
}
}
}
}
if (loop->body->lastPCode->flags & fIsBranch) {
nextBlock = NULL;
for (i = 0; i < loop->body->lastPCode->argCount; i++) {
if (loop->body->lastPCode->args[i].kind == PCOp_LABEL) {
nextBlock = loop->body->lastPCode->args[i].data.label.label->block;
break;
}
}
if (!nextBlock)
return;
} else {
nextBlock = loop->body->nextBlock;
}
deletepcode(instr1);
instr1->args[1].data.reg.reg = reg2;
instr1->args[2].data.imm.value = difference;
if (nextBlock->firstPCode)
insertpcodebefore(nextBlock->firstPCode, instr1);
else
appendpcode(nextBlock, instr1);
biv1->reg = -1;
strengthreducedloops = 1;
}
static void strengthreduceinductionvariables(Loop *loop) {
BasicInductionVar *biv1;
BasicInductionVar *biv2;
BivInit init1;
BivInit init2;
for (biv1 = loop->basicInductionVars; biv1; biv1 = biv1->next) {
if (biv1->inductionVars)
strengthreduceinductionvariable(biv1);
strengthreducenestedbiv(biv1);
}
for (biv1 = loop->basicInductionVars; biv1; biv1 = biv1->next) {
if (biv1->reg != -1) {
calc_biv_init(biv1, &init1);
if (init1.x4 != -1) {
for (biv2 = loop->basicInductionVars; biv2; biv2 = biv2->next) {
if (biv2->reg != -1 && biv2 != biv1) {
calc_biv_init(biv2, &init2);
if (
init2.x4 != -1 &&
init1.x4 == init2.x4 &&
init1.x6 == init2.x6 &&
init1.x8 == init2.x8 &&
init1.xA == init2.xA &&
biv1->step == biv2->step
)
{
if (init1.x0 < init2.x0) {
combineinductionvariables(loop, biv2, biv1, init2.x0 - init1.x0);
} else {
combineinductionvariables(loop, biv1, biv2, init1.x0 - init2.x0);
break;
}
}
}
}
}
}
}
}
static void strengthreduceallinductionvariables(Loop *loop) {
while (loop) {
if (loop->children)
strengthreduceallinductionvariables(loop->children);
if (loop->basicInductionVars)
strengthreduceinductionvariables(loop);
loop = loop->nextSibling;
}
}
void strengthreduceloops(void) {
strengthreducedloops = 0;
if (loopsinflowgraph) {
computeusedefchains(0);
findallbasicinductionvariables(loopsinflowgraph);
findallnonbasicinductionvariables(loopsinflowgraph);
strengthreduceallinductionvariables(loopsinflowgraph);
freeoheap();
}
}

View File

@@ -0,0 +1,661 @@
#include "compiler/ValueNumbering.h"
#include "compiler/Alias.h"
#include "compiler/PCode.h"
#include "compiler/Registers.h"
#include "compiler/RegisterInfo.h"
#include "compiler/CompilerTools.h"
#include "compiler/CError.h"
typedef struct ValueLabel {
struct ValueLabel *next;
PCodeArg op;
} ValueLabel;
typedef struct AvailableValue {
struct AvailableValue *next;
ValueLabel *labelled;
PCode *pcode;
int killedregister;
int aliasnumber;
int opnumbers[0];
} AvailableValue;
typedef struct RegValue {
int number;
int x4;
AvailableValue *available;
} RegValue;
typedef struct State {
void *stackedvalues;
int valueceiling;
} State;
typedef struct StackedValue {
struct StackedValue *next;
PCodeArg op;
RegValue value;
Alias *alias;
PCode *valuepcode;
} StackedValue;
int removedcommonsubexpressions;
int nextvaluenumber;
static AvailableValue *opvalue[428];
static RegValue *regvalue[RegClassMax];
static StackedValue *stackedvalues;
static int valueceiling;
static int moreaggressiveoptimization;
static void allocatecsedatastructures(void) {
char rclass;
for (rclass = 0; rclass < RegClassMax; rclass++)
regvalue[(char) rclass] = oalloc(sizeof(RegValue) * used_virtual_registers[(char) rclass]);
}
static void initializecsedatastructures(void) {
RegValue *rv;
char rclass;
int i;
nextvaluenumber = 0;
for (i = 0; i < 428; i++)
opvalue[i] = NULL;
for (rclass = 0; rclass < RegClassMax; rclass++) {
rv = regvalue[(char) rclass];
for (i = 0; i < used_virtual_registers[(char) rclass]; i++, rv++) {
rv->number = nextvaluenumber++;
rv->x4 = 0;
rv->available = NULL;
}
}
initialize_alias_values();
stackedvalues = NULL;
valueceiling = 0x7FFFFFFF;
}
static void labelvalue(AvailableValue *av, PCodeArg *op) {
ValueLabel *label = oalloc(sizeof(ValueLabel));
label->op = *op;
label->next = av->labelled;
av->labelled = label;
}
static void unlabelvalue(AvailableValue *av, PCodeArg *op) {
ValueLabel *labelled;
ValueLabel **ptr;
ptr = &av->labelled;
while ((labelled = *ptr)) {
if (labelled->op.data.reg.reg == op->data.reg.reg)
*ptr = labelled->next;
else
ptr = &labelled->next;
}
}
static void stackregistervalue(PCodeArg *op, RegValue *value) {
StackedValue *stacked = oalloc(sizeof(StackedValue));
stacked->next = stackedvalues;
stackedvalues = stacked;
stacked->op = *op;
stacked->value = *value;
}
static void stackmemoryvalue(Alias *alias) {
StackedValue *stacked = oalloc(sizeof(StackedValue));
stacked->next = stackedvalues;
stackedvalues = stacked;
stacked->op.kind = PCOp_MEMORY;
stacked->alias = alias;
stacked->value.number = alias->valuenumber;
stacked->valuepcode = alias->valuepcode;
}
static void unstackvalue(StackedValue *stacked) {
PCodeArg *op = &stacked->op;
RegValue *value;
if (stacked->op.kind == PCOp_MEMORY) {
stacked->alias->valuenumber = stacked->value.number;
stacked->alias->valuepcode = stacked->valuepcode;
} else {
value = &regvalue[op->arg][op->data.reg.reg];
if (value->available)
unlabelvalue(value->available, op);
value->number = stacked->value.number;
value->x4 = stacked->value.x4;
value->available = stacked->value.available;
if (value->available)
labelvalue(value->available, op);
}
}
static int samevalue(PCodeArg *op1, PCodeArg *op2) {
return regvalue[op1->arg][op1->data.reg.reg].number == regvalue[op2->arg][op2->data.reg.reg].number;
}
static int killregister(PCodeArg *op) {
RegValue *value;
value = &regvalue[op->arg][op->data.reg.reg];
if (value->number < valueceiling && nextvaluenumber >= valueceiling)
stackregistervalue(op, value);
if (value->available)
unlabelvalue(value->available, op);
value->available = NULL;
value->x4 = 0;
return value->number = nextvaluenumber++;
}
void killmemory(Alias *alias, PCode *newValue) {
if (alias->valuenumber < valueceiling && nextvaluenumber >= valueceiling)
stackmemoryvalue(alias);
if (newValue) {
alias->valuenumber = regvalue[newValue->args[0].arg][newValue->args[0].data.reg.reg].number;
alias->valuepcode = newValue;
} else {
alias->valuenumber = nextvaluenumber++;
alias->valuepcode = NULL;
}
}
static void killspecificCSEs(short op) {
AvailableValue *av;
ValueLabel *labelled;
for (av = opvalue[op]; av; av = av->next) {
for (labelled = av->labelled; labelled; labelled = labelled->next)
killregister(&labelled->op);
}
}
static void killallCSEs(void) {
AvailableValue *av;
ValueLabel *labelled;
int i;
for (i = 0; i < 428; i++) {
for (av = opvalue[i]; av; av = av->next) {
for (labelled = av->labelled; labelled; labelled = labelled->next)
killregister(&labelled->op);
}
}
}
static void killregisters(PCode *pcode) {
PCodeArg *op;
int i;
for (i = 0, op = pcode->args; i < pcode->argCount; i++, op++) {
if (op->kind == PCOp_REGISTER && (op->data.reg.effect & EffectWrite))
killregister(op);
}
}
static void copyregister(PCodeArg *src, PCodeArg *dest) {
RegValue *srcvalue;
RegValue *destvalue;
srcvalue = &regvalue[src->arg][src->data.reg.reg];
destvalue = &regvalue[dest->arg][dest->data.reg.reg];
if (destvalue->number < valueceiling && nextvaluenumber >= valueceiling)
stackregistervalue(dest, destvalue);
if (destvalue->available)
unlabelvalue(destvalue->available, dest);
destvalue->available = srcvalue->available;
if (destvalue->available)
labelvalue(destvalue->available, dest);
destvalue->number = srcvalue->number;
if (srcvalue->x4 && srcvalue->number == regvalue[src->arg][srcvalue->x4].number)
destvalue->x4 = srcvalue->x4;
else
destvalue->x4 = src->data.reg.reg;
}
static int matchvalues(AvailableValue *av, PCode *match) {
PCodeArg *avOp;
PCodeArg *matchOp;
int i;
for (avOp = &av->pcode->args[0], matchOp = &match->args[0], i = 0; i < match->argCount; i++, avOp++, matchOp++) {
if (i != 0) {
switch (avOp->kind) {
case PCOp_REGISTER:
if (av->opnumbers[i] != regvalue[matchOp->arg][matchOp->data.reg.reg].number)
return 0;
break;
case PCOp_MEMORY:
if (matchOp->kind != PCOp_MEMORY)
return 0;
if (matchOp->data.mem.obj != avOp->data.mem.obj)
return 0;
if (matchOp->data.mem.offset != avOp->data.mem.offset)
return 0;
if ((unsigned char) matchOp->arg != (unsigned char) avOp->arg)
return 0;
break;
case PCOp_IMMEDIATE:
if (matchOp->kind != PCOp_IMMEDIATE)
return 0;
if (matchOp->data.imm.value != avOp->data.imm.value)
return 0;
break;
case PCOp_LABEL:
if (matchOp->kind != PCOp_LABEL)
return 0;
if (matchOp->data.label.label != avOp->data.label.label)
return 0;
break;
case PCOp_SYSREG:
CError_FATAL(572);
}
}
}
if ((match->flags & (fIsRead | fPCodeFlag20000)) && match->alias->valuenumber != av->aliasnumber)
return 0;
return 1;
}
static void chooselocation(AvailableValue *av, PCodeArg *op) {
ValueLabel *labelled;
PCodeArg *baseop;
baseop = &av->pcode->args[0];
labelled = av->labelled;
while (labelled) {
if (labelled->op.data.reg.reg == baseop->data.reg.reg) {
*op = labelled->op;
return;
}
labelled = labelled->next;
}
*op = av->labelled[0].op;
}
static int findavailablevalue(PCode *pcode, PCodeArg *op) {
AvailableValue *av;
PCodeArg tmp1;
PCodeArg tmp2;
for (av = opvalue[pcode->op]; av; av = av->next) {
if (av->labelled && av->pcode->flags == pcode->flags && av->pcode->argCount == pcode->argCount) {
if (!matchvalues(av, pcode)) {
if (!(pcode->flags & fCommutative))
continue;
tmp1 = pcode->args[1];
pcode->args[1] = pcode->args[2];
pcode->args[2] = tmp1;
if (!matchvalues(av, pcode)) {
tmp2 = pcode->args[1];
pcode->args[1] = pcode->args[2];
pcode->args[2] = tmp2;
continue;
}
}
chooselocation(av, op);
return 1;
}
}
return 0;
}
static void addavailablevalue(PCode *pcode) {
AvailableValue *av;
PCodeArg *op;
int i;
av = oalloc(sizeof(AvailableValue) + sizeof(int) * pcode->argCount);
av->labelled = NULL;
av->pcode = pcode;
for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) {
if (op->kind == PCOp_REGISTER)
av->opnumbers[i] = regvalue[op->arg][op->data.reg.reg].number;
}
if (pcode->flags & (fIsRead | fPCodeFlag20000))
av->aliasnumber = pcode->alias->valuenumber;
op = &pcode->args[0];
av->killedregister = killregister(op);
labelvalue(av, op);
regvalue[op->arg][op->data.reg.reg].available = av;
av->next = opvalue[pcode->op];
opvalue[pcode->op] = av;
}
static int isCSEop(PCode *pcode) {
PCodeArg *baseOp;
PCodeArg *op;
int i;
baseOp = &pcode->args[0];
switch (pcode->op) {
case PC_CMPI:
case PC_CMP:
case PC_CMPLI:
case PC_CMPL:
case PC_FCMPU:
case PC_FCMPO:
if (!moreaggressiveoptimization)
return 0;
break;
case PC_LI:
case PC_LIS:
if (!moreaggressiveoptimization)
return 0;
if (pcode->args[0].data.reg.reg < first_fe_temporary_register[RegClass_GPR] || pcode->args[0].data.reg.reg > last_temporary_register[RegClass_GPR])
return 0;
break;
}
if (PCODE_FLAG_SET_F(pcode) & (fIsVolatile | fSideEffects | fOverflow | fSetsCarry | fRecordBit))
return 0;
for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) {
if (op != baseOp &&
op->kind == baseOp->kind &&
op->arg == baseOp->arg &&
op->data.reg.reg == baseOp->data.reg.reg)
return 0;
}
return 1;
}
static int isCSEload(PCode *pcode) {
PCodeArg *op;
int i;
int count;
count = 0;
for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) {
if (op->kind == PCOp_REGISTER && (op->data.reg.effect & EffectWrite))
count++;
}
return count == 1;
}
static void registercopy(PCode *pcode) {
PCodeArg *op1;
PCodeArg *op2;
op1 = &pcode->args[0];
op2 = &pcode->args[1];
if (samevalue(op2, op1))
deletepcode(pcode);
else
copyregister(op2, op1);
}
static PCode *recentlystored(Alias *alias, PCodeArg *op) {
PCode *pc;
if ((pc = alias->valuepcode) && alias->valuenumber == regvalue[pc->args[0].arg][pc->args[0].data.reg.reg].number) {
*op = pc->args[0];
return pc;
} else {
return NULL;
}
}
static void simpleload(PCode *pcode) {
PCodeArg *origOp;
PCodeArg op;
PCode *rs;
origOp = &pcode->args[0];
if ((pcode->flags & fIsVolatile) || !isCSEload(pcode)) {
killregisters(pcode);
return;
}
if (findavailablevalue(pcode, &op)) {
if (!samevalue(origOp, &op)) {
insertpcodebefore(pcode, makecopyinstruction(&op, origOp));
copyregister(&op, origOp);
}
deletepcode(pcode);
removedcommonsubexpressions = 1;
} else if ((rs = recentlystored(pcode->alias, &op)) && can_reuse_stored_value(rs, pcode)) {
if (!samevalue(origOp, &op)) {
insertpcodebefore(pcode, makecopyinstruction(&op, origOp));
copyregister(&op, origOp);
}
deletepcode(pcode);
removedcommonsubexpressions = 1;
} else {
addavailablevalue(pcode);
}
}
static void simplestore(PCode *pcode) {
update_alias_value(pcode->alias, pcode);
killregisters(pcode);
}
static void pointerload(PCode *pcode) {
PCodeArg *op;
PCodeArg buf;
op = &pcode->args[0];
if ((pcode->flags & fIsVolatile) || !isCSEload(pcode)) {
killregisters(pcode);
return;
}
if (findavailablevalue(pcode, &buf)) {
if (!samevalue(op, &buf)) {
insertpcodebefore(pcode, makecopyinstruction(&buf, op));
copyregister(&buf, op);
}
deletepcode(pcode);
removedcommonsubexpressions = 1;
} else {
addavailablevalue(pcode);
}
}
static void pointerstore(PCode *pcode) {
update_alias_value(pcode->alias, NULL);
killregisters(pcode);
}
static void arithmeticop(PCode *pcode) {
PCodeArg *op;
PCodeArg buf;
op = &pcode->args[0];
if (findavailablevalue(pcode, &buf)) {
if (!samevalue(op, &buf)) {
insertpcodebefore(pcode, makecopyinstruction(&buf, op));
copyregister(&buf, op);
}
deletepcode(pcode);
removedcommonsubexpressions = 1;
} else {
addavailablevalue(pcode);
}
}
static void functioncall(PCode *pcode) {
killregisters(pcode);
if (coloring) {
update_all_alias_values();
killallCSEs();
} else {
update_alias_value(pcode->alias, NULL);
}
}
static void operatefrommemory(PCode *pcode) {
CError_FATAL(980);
}
static void operatetomemory(PCode *pcode) {
CError_FATAL(1011);
}
static void propagatecopiesto(PCode *pcode) {
PCodeArg *op;
int i;
for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) {
if (
op->kind == PCOp_REGISTER &&
(op->data.reg.effect & (EffectRead | EffectWrite | Effect8)) == EffectRead &&
op->data.reg.reg >= n_real_registers[op->arg] &&
regvalue[op->arg][op->data.reg.reg].x4 &&
regvalue[op->arg][op->data.reg.reg].x4 >= n_real_registers[op->arg] &&
regvalue[op->arg][op->data.reg.reg].number == regvalue[op->arg][regvalue[op->arg][op->data.reg.reg].x4].number
) {
op->data.reg.reg = regvalue[op->arg][op->data.reg.reg].x4;
}
}
}
static void removecsesfrombasicblock(PCodeBlock *block) {
PCode *pcode;
PCode *next;
for (pcode = block->firstPCode; pcode; pcode = next) {
next = pcode->nextPCode;
propagatecopiesto(pcode);
if (pcode->flags & fIsMove) {
registercopy(pcode);
} else if ((pcode->flags & fIsCall) && (pcode->flags & (fLink | fSideEffects))) {
functioncall(pcode);
} else if (pcode->flags & fIsRead) {
if (pcode->flags & fIsPtrOp)
pointerload(pcode);
else
simpleload(pcode);
} else if (pcode->flags & fIsWrite) {
if (pcode->flags & fIsPtrOp)
pointerstore(pcode);
else
simplestore(pcode);
} else if (pcode->flags & fPCodeFlag20000) {
operatefrommemory(pcode);
} else if (pcode->flags & fPCodeFlag40000) {
operatetomemory(pcode);
} else if ((pcode->flags & fIsCSE) && isCSEop(pcode)) {
arithmeticop(pcode);
} else {
killregisters(pcode);
}
}
block->flags |= fVisited;
}
static void getvaluestate(State *state) {
state->stackedvalues = stackedvalues;
state->valueceiling = valueceiling;
}
static void setvaluestate(State *state) {
stackedvalues = state->stackedvalues;
valueceiling = state->valueceiling;
}
static void forkvaluestate(int number) {
stackedvalues = NULL;
valueceiling = number;
}
static void regressvaluestate(void) {
AvailableValue *av;
AvailableValue **ptr;
int i;
StackedValue *stacked;
for (i = 0; i < 428; i++) {
ptr = &opvalue[i];
while ((av = *ptr)) {
if (av->killedregister >= valueceiling)
*ptr = av->next;
else
ptr = &av->next;
}
}
for (stacked = stackedvalues; stacked; stacked = stacked->next)
unstackvalue(stacked);
}
static void removecsesfromextendedbasicblock(PCodeBlock *block) {
PCLink *succ;
int counter;
State state;
removecsesfrombasicblock(block);
while (block->successors &&
!block->successors->nextLink &&
block->successors->block->predecessors &&
!block->successors->block->predecessors->nextLink) {
block = block->successors->block;
removecsesfrombasicblock(block);
}
counter = 0;
for (succ = block->successors; succ; succ = succ->nextLink) {
if (!(succ->block->flags & fVisited) && succ->block->predecessors && !succ->block->predecessors->nextLink)
counter++;
}
if (counter) {
getvaluestate(&state);
forkvaluestate(nextvaluenumber);
for (succ = block->successors; succ; succ = succ->nextLink) {
if (!(succ->block->flags & fVisited) && succ->block->predecessors && !succ->block->predecessors->nextLink) {
removecsesfromextendedbasicblock(succ->block);
regressvaluestate();
}
}
setvaluestate(&state);
}
}
void removecommonsubexpressions(Object *proc, int flag) {
PCodeBlock *block;
moreaggressiveoptimization = flag;
removedcommonsubexpressions = 0;
gather_alias_info();
allocatecsedatastructures();
for (block = pcbasicblocks; block; block = block->nextBlock)
block->flags &= ~fVisited;
for (block = pcbasicblocks; block; block = block->nextBlock) {
if (!(block->flags & fVisited)) {
initializecsedatastructures();
removecsesfromextendedbasicblock(block);
}
}
freeoheap();
}

View File

@@ -0,0 +1,548 @@
#include "compiler/VectorArraysToRegs.h"
#include "compiler/CError.h"
#include "compiler/CFunc.h"
#include "compiler/BitVectors.h"
#include "compiler/CompilerTools.h"
#include "compiler/PCode.h"
#include "compiler/PCodeInfo.h"
#include "compiler/Registers.h"
#include "compiler/UseDefChains.h"
#include "compiler/objects.h"
#include "compiler/types.h"
typedef struct LocalVectorArray {
struct LocalVectorArray *next;
Object *object;
unsigned int invalid:1;
SInt32 arraySize;
SInt32 elementCount;
int totalUses;
int elements[1];
} LocalVectorArray;
typedef struct VectorPropInfo {
UInt32 *use;
UInt32 *def;
UInt32 *in;
UInt32 *out;
} VectorPropInfo;
typedef struct ADDI {
PCode *instr;
RegUseOrDef *list;
} ADDI;
static int number_of_ADDIs;
static ADDI *ADDIs;
static VectorPropInfo *vectorpropinfo;
static int *naddsinblock;
static int *firstaddinblock;
static Boolean converted_arrays;
static LocalVectorArray *scanforlocalvectorarrays(void) {
SInt32 elementCount;
LocalVectorArray *head;
LocalVectorArray *array;
ObjectList *list;
int i;
SInt32 arraySize;
head = NULL;
for (list = locals; list; list = list->next) {
if (
list->object &&
!(IS_TYPE_POINTER(list->object->type) ? (TPTR_QUAL(list->object->type) & Q_VOLATILE) : (list->object->qual & Q_VOLATILE)) &&
list->object->type &&
IS_TYPE_ARRAY(list->object->type) &&
IS_TYPE_VECTOR(TPTR_TARGET(list->object->type))
) {
arraySize = list->object->type->size;
elementCount = arraySize / 16;
if (elementCount > 0 && elementCount <= 8) {
array = oalloc(sizeof(int) * (elementCount - 1) + sizeof(LocalVectorArray));
array->next = head;
head = array;
array->object = list->object;
array->arraySize = arraySize;
array->elementCount = elementCount;
array->totalUses = 0;
array->invalid = 0;
for (i = 0; i < elementCount; i++) {
array->elements[i] = 0;
}
}
}
}
return head;
}
static LocalVectorArray *lookup_vector_array_object(LocalVectorArray *arrays, Object *object) {
while (arrays) {
if (arrays->object == object)
return arrays;
arrays = arrays->next;
}
return NULL;
}
static void scaninstructions(LocalVectorArray *arrays) {
PCodeBlock *block;
PCode *instr;
int counter;
int i;
PCodeArg *op;
LocalVectorArray *array;
int element;
naddsinblock = oalloc(sizeof(int) * pcblockcount);
memclrw(naddsinblock, sizeof(int) * pcblockcount);
firstaddinblock = oalloc(sizeof(int) * pcblockcount);
memclrw(firstaddinblock, sizeof(int) * pcblockcount);
number_of_ADDIs = 0;
for (block = pcbasicblocks; block; block = block->nextBlock) {
firstaddinblock[block->blockIndex] = number_of_ADDIs;
counter = 0;
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
if (!(instr->flags & fIsBranch) && instr->argCount) {
op = instr->args;
i = instr->argCount;
while (i--) {
if (
op->kind == PCOp_MEMORY &&
(PCOpMemoryArg) op->arg == PCOpMemory1 &&
(array = lookup_vector_array_object(arrays, op->data.mem.obj)) &&
!array->invalid
)
{
if (instr->op != PC_ADDI) {
array->invalid = 1;
} else if (instr->args[0].data.reg.reg < n_real_registers[RegClass_GPR]) {
array->invalid = 1;
} else {
number_of_ADDIs++;
counter++;
}
if (!array->invalid) {
element = op->data.mem.offset / 16;
if (element < array->elementCount)
array->elements[element]++;
else
array->invalid = 1;
}
}
op++;
}
}
}
naddsinblock[block->blockIndex] = counter;
}
}
static void computeaddilist(LocalVectorArray *arrays) {
PCodeBlock *block;
PCode *instr;
RegUseOrDef *list;
ADDI *addi;
UInt32 *vec;
LocalVectorArray *array;
UseOrDef *def;
int defID;
UseOrDef *use;
int useID;
ADDIs = oalloc(sizeof(ADDI) * number_of_ADDIs);
memclrw(ADDIs, sizeof(ADDI) * number_of_ADDIs);
vec = oalloc(4 * ((number_of_Uses + 31) >> 5));
for (block = pcbasicblocks; block; block = block->nextBlock) {
if (naddsinblock[block->blockIndex]) {
bitvectorcopy(vec, usedefinfo[block->blockIndex].usevec1C, number_of_Uses);
addi = &ADDIs[firstaddinblock[block->blockIndex] + naddsinblock[block->blockIndex] - 1];
for (instr = block->lastPCode; instr; instr = instr->prevPCode) {
if (!(instr->flags & fIsBranch) && instr->argCount) {
int reg; // r18
if (
instr->op == PC_ADDI &&
(reg = instr->args[0].data.reg.reg) >= n_real_registers[RegClass_GPR] &&
instr->args[2].kind == PCOp_MEMORY &&
(PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 &&
(array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) &&
!array->invalid
)
{
addi->instr = instr;
addi->list = NULL;
for (list = reg_Uses[RegClass_GPR][reg]; list; list = list->next) {
if (bitvectorgetbit(list->id, vec)) {
RegUseOrDef *node = oalloc(sizeof(RegUseOrDef));
node->id = list->id;
node->next = addi->list;
addi->list = node;
}
}
addi--;
}
for (def = &Defs[defID = instr->defID]; defID < number_of_Defs && def->pcode == instr; def++, defID++) {
if (def->v.kind == PCOp_REGISTER) {
RegUseOrDef *l;
for (l = reg_Uses[def->v.arg][def->v.u.reg]; l; l = l->next)
bitvectorclearbit(l->id, vec);
}
}
for (use = &Uses[useID = instr->useID]; useID < number_of_Uses && use->pcode == instr; use++, useID++) {
if (use->v.kind == PCOp_REGISTER)
bitvectorsetbit(useID, vec);
}
}
}
}
}
}
static void allocatevectorpropinfo(void) {
VectorPropInfo *info;
int i;
vectorpropinfo = oalloc(sizeof(VectorPropInfo) * pcblockcount);
for (i = 0, info = vectorpropinfo; i < pcblockcount; i++, info++) {
info->use = oalloc(4 * ((number_of_ADDIs + 31) >> 5));
info->def = oalloc(4 * ((number_of_ADDIs + 31) >> 5));
info->in = oalloc(4 * ((number_of_ADDIs + 31) >> 5));
info->out = oalloc(4 * ((number_of_ADDIs + 31) >> 5));
}
}
static void computelocalvectorpropinfo(LocalVectorArray *arrays) {
VectorPropInfo *info;
PCodeBlock *block;
PCode *instr;
UInt32 *vec0;
UInt32 *vec4;
int index;
PCodeArg *op;
int i;
int addi_i;
ADDI *addi;
LocalVectorArray *array;
for (block = pcbasicblocks; block; block = block->nextBlock) {
info = &vectorpropinfo[block->blockIndex];
vec0 = info->use;
vec4 = info->def;
bitvectorinitialize(vec0, number_of_ADDIs, 0);
bitvectorinitialize(vec4, number_of_ADDIs, 0);
index = firstaddinblock[block->blockIndex];
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
if (!(instr->flags & fIsBranch) && instr->argCount) {
i = instr->argCount;
op = instr->args;
while (i--) {
if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR && (op->data.reg.effect & EffectWrite)) {
for (addi_i = 0, addi = ADDIs; addi_i < number_of_ADDIs; addi_i++, addi++) {
if (
addi->instr &&
addi->instr->args[0].arg == op->arg &&
addi->instr->args[0].data.reg.reg == op->data.reg.reg
)
{
if (addi->instr->block == block)
bitvectorclearbit(addi_i, vec0);
else
bitvectorsetbit(addi_i, vec4);
}
}
}
op++;
}
if (
instr->op == PC_ADDI &&
instr->args[2].kind == PCOp_MEMORY &&
(PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 &&
(array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) &&
!array->invalid
)
{
bitvectorsetbit(index, vec0);
index++;
}
}
}
}
}
static void computeglobalvectorpropinfo(void) {
VectorPropInfo *info;
PCodeBlock *block;
UInt32 *vec0;
UInt32 *vec4;
UInt32 *vec8;
UInt32 *vecC;
int bitvecsize;
int blockIndex;
int i;
int j;
int flag;
PCLink *preds;
UInt32 val;
bitvecsize = (number_of_ADDIs + 31) >> 5;
flag = 1;
info = &vectorpropinfo[pcbasicblocks->blockIndex];
bitvectorinitialize(info->in, number_of_ADDIs, 0);
bitvectorcopy(info->out, info->use, number_of_ADDIs);
for (block = pcbasicblocks->nextBlock; block; block = block->nextBlock) {
info = &vectorpropinfo[block->blockIndex];
vecC = info->out;
vec4 = info->def;
for (i = 0; i < bitvecsize; vecC++, vec4++, i++)
*vecC = ~*vec4;
}
while (flag) {
flag = 0;
for (blockIndex = 0; blockIndex < pcblockcount; blockIndex++) {
if (depthfirstordering[blockIndex]) {
info = &vectorpropinfo[depthfirstordering[blockIndex]->blockIndex];
if ((preds = depthfirstordering[blockIndex]->predecessors)) {
vec8 = info->in;
bitvectorcopy(vec8, vectorpropinfo[preds->block->blockIndex].out, number_of_ADDIs);
for (preds = preds->nextLink; preds; preds = preds->nextLink)
bitvectorintersect(vec8, vectorpropinfo[preds->block->blockIndex].out, number_of_ADDIs);
}
vecC = info->out;
vec8 = info->in;
vec0 = info->use;
vec4 = info->def;
for (j = 0; j < bitvecsize; j++) {
val = *vec0 | (*vec8 & ~*vec4);
if (val != *vecC) {
*vecC = val;
flag = 1;
}
vec8++;
vecC++;
vec4++;
vec0++;
}
}
}
}
}
static int precedes(PCode *a, PCode *b) {
PCode *scan;
for (scan = a->nextPCode; scan; scan = scan->nextPCode) {
if (scan == b)
return 1;
}
return 0;
}
static int checkvectorstoreorload(int addiID, int useID) {
PCode *addiInstr;
UseOrDef *use;
addiInstr = ADDIs[addiID].instr;
use = Uses + useID;
if (!addiInstr)
return 0;
if (addiInstr->args[0].data.reg.reg < n_real_registers[RegClass_GPR])
return 0;
if (use->pcode->op != PC_LVX && use->pcode->op != PC_STVX)
return 0;
if (
use->pcode->args[1].kind != PCOp_REGISTER ||
use->pcode->args[1].arg != RegClass_GPR ||
use->pcode->args[1].data.reg.reg != 0
)
return 0;
return use->pcode->args[2].data.reg.reg == addiInstr->args[0].data.reg.reg;
}
static int checkalluses(LocalVectorArray *arrays, int addiID) {
RegUseOrDef *list;
PCode *instr;
LocalVectorArray *array;
instr = ADDIs[addiID].instr;
for (list = ADDIs[addiID].list; list; list = list->next) {
if (list && !checkvectorstoreorload(addiID, list->id)) {
array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj);
array->invalid = 1;
return 0;
}
}
return 1;
}
static void convert_array_to_register(LocalVectorArray *arrays, int addiID) {
ADDI *addi;
int newReg;
RegUseOrDef *list;
PCode *instr;
PCode *useInstr;
LocalVectorArray *array;
int element;
addi = ADDIs + addiID;
if (!(instr = addi->instr))
return;
if (
!(array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) ||
array->invalid
)
return;
element = instr->args[2].data.mem.offset / 16;
if (element > array->elementCount)
return;
newReg = array->elements[element];
for (list = addi->list; list; list = list->next) {
useInstr = Uses[list->id].pcode;
if (useInstr->op == PC_LVX) {
converted_arrays = 1;
change_opcode(useInstr, PC_VMR);
change_num_operands(useInstr, 2);
useInstr->args[1].kind = PCOp_REGISTER;
useInstr->args[1].arg = RegClass_VR;
useInstr->args[1].data.reg.reg = newReg;
useInstr->args[1].data.reg.effect = EffectRead;
} else if (useInstr->op == PC_STVX) {
converted_arrays = 1;
change_opcode(useInstr, PC_VMR);
change_num_operands(useInstr, 2);
useInstr->args[1] = useInstr->args[0];
useInstr->args[0].kind = PCOp_REGISTER;
useInstr->args[0].arg = RegClass_VR;
useInstr->args[0].data.reg.reg = newReg;
useInstr->args[0].data.reg.effect = EffectWrite;
} else {
CError_FATAL(661);
}
}
deletepcode(addi->instr);
}
static void convert_arrays_to_registers(LocalVectorArray *arrays) {
int i;
int counter;
LocalVectorArray **ptr;
LocalVectorArray *array;
for (i = 0; i < number_of_ADDIs; i++)
checkalluses(arrays, i);
counter = 0;
ptr = &arrays;
array = *ptr;
while (array) {
if (array->invalid) {
*ptr = array->next;
array = *ptr;
continue;
}
counter += array->elementCount;
for (i = 0; i < array->elementCount; i++)
array->totalUses += array->elements[i];
array = array->next;
}
if (arrays) {
while (counter > 32) {
LocalVectorArray *best;
int score;
score = 0;
best = NULL;
for (array = arrays; array; array = array->next) {
if (best) {
if (array->totalUses < score) {
score = array->totalUses;
best = array;
}
} else {
best = array;
score = array->totalUses;
}
}
if (!best)
break;
if (best == arrays) {
arrays = best->next;
} else {
for (array = arrays; array; array = array->next) {
if (array->next == best) {
array->next = best->next;
break;
}
}
}
counter -= best->elementCount;
}
if (!(array = arrays))
return;
while (array) {
for (i = 0; i < array->elementCount; i++)
array->elements[i] = used_virtual_registers[RegClass_VR]++;
array = array->next;
}
if (arrays) {
for (i = 0; i < number_of_ADDIs; i++)
convert_array_to_register(arrays, i);
}
}
}
int vectorarraystoregs(void) {
LocalVectorArray *arrays;
converted_arrays = 0;
if ((arrays = scanforlocalvectorarrays())) {
scaninstructions(arrays);
if (number_of_ADDIs > 0) {
computeusedefchains(0);
computeaddilist(arrays);
allocatevectorpropinfo();
computelocalvectorpropinfo(arrays);
computedepthfirstordering();
computeglobalvectorpropinfo();
convert_arrays_to_registers(arrays);
}
}
freeoheap();
return converted_arrays;
}

View File

@@ -0,0 +1,393 @@
#include "compiler/FuncLevelAsmPPC.h"
#include "compiler/CCompiler.h"
#include "compiler/CError.h"
#include "compiler/CFunc.h"
#include "compiler/CMangler.h"
#include "compiler/CParser.h"
#include "compiler/CPrepTokenizer.h"
#include "compiler/CodeGen.h"
#include "compiler/Coloring.h"
#include "compiler/CompilerTools.h"
#include "compiler/DumpIR.h"
#include "compiler/InlineAsmPPC.h"
#include "compiler/InlineAsmRegisters.h"
#include "compiler/ObjGenMachO.h"
#include "compiler/PCode.h"
#include "compiler/PCodeAssembly.h"
#include "compiler/PCodeListing.h"
#include "compiler/PCodeUtilities.h"
#include "compiler/PPCError.h"
#include "compiler/RegisterInfo.h"
#include "compiler/StackFrame.h"
#include "compiler/TOC.h"
#include "compiler/objects.h"
static EntryPoint *entrypoints_head;
static EntryPoint **entrypoints_tail;
void setup_assembly_argument(Object *obj, short reg) {
VarInfo *vi;
Type *type;
vi = Registers_GetVarInfo(obj);
type = obj->type;
vi->used = 1;
if (!requires_frame) {
if (is_register_object(obj)) {
if (!reg)
CError_Error(CErrorStr263, obj->name->name);
if (TYPE_IS_8BYTES(type)) {
short regLo;
short regHi;
if (reg < 10) {
if (copts.littleendian) {
regLo = reg;
regHi = reg + 1;
} else {
regLo = reg + 1;
regHi = reg;
}
retain_GPR_pair(obj, regLo, regHi);
InlineAsm_InsertRegister(obj->name->name, RegClass_GPR, regLo, obj);
}
} else if (IS_TYPE_FLOAT(type)) {
retain_register(obj, RegClass_FPR, reg);
InlineAsm_InsertRegister(obj->name->name, RegClass_FPR, reg, obj);
} else if (IS_TYPE_VECTOR(type)) {
retain_register(obj, RegClass_VR, reg);
InlineAsm_InsertRegister(obj->name->name, RegClass_VR, reg, obj);
} else {
retain_register(obj, RegClass_GPR, reg);
InlineAsm_InsertRegister(obj->name->name, RegClass_GPR, reg, obj);
}
}
} else {
if (is_register_object(obj)) {
vi = Registers_GetVarInfo(obj);
if (!vi->reg) {
assign_register_by_type(obj);
if (!(vi->flags & VarInfoFlag2))
CError_Error(CErrorStr263, obj->name->name);
else
InlineAsm_InsertRegister(obj->name->name, vi->rclass, vi->reg, obj);
}
}
}
}
void assign_local_addresses(void) {
VarInfo *vi;
ObjectList *list;
Object *object;
for (list = locals; list; list = list->next) {
vi = CodeGen_GetNewVarInfo();
list->object->u.var.info = vi;
list->object->flags |= OBJECT_USED;
vi->used = 1;
}
for (list = locals; list; list = list->next) {
object = list->object;
if (is_register_object(object)) {
vi = Registers_GetVarInfo(object);
if (!vi->reg) {
assign_register_by_type(object);
if (!(vi->flags & VarInfoFlag2))
CError_Error(CErrorStr263, object->name->name);
else
InlineAsm_InsertRegister(object->name->name, vi->rclass, vi->reg, object);
}
}
}
for (list = locals; list; list = list->next) {
object = list->object;
if (OBJECT_REG(object) == 0)
assign_local_memory(object);
}
}
static void FuncAsm_PreScanDirectives(void) {
SInt32 directive;
Boolean save_eoltokens;
in_assembler = 1;
save_eoltokens = cprep_eoltokens;
cprep_eoltokens = 1;
if (setjmp(InlineAsm_assemblererror) == 0) {
while (tk == TK_IDENTIFIER && (directive = InlineAsm_IsDirective(AssemblerType_1))) {
InlineAsm_ProcessDirective(directive);
if (tk == ';' || tk == TK_EOL) {
CPrep_TokenStreamFlush();
tk = lex();
} else {
InlineAsm_SyntaxError(CErrorStr113);
}
if (directive == IADirective_FrAlloc) {
requires_frame = 1;
break;
} else if (directive == IADirective_NoFrAlloc) {
user_responsible_for_frame = 1;
break;
}
}
}
in_assembler = 0;
cprep_eoltokens = save_eoltokens;
}
static void FuncAsm_AddEntryPoint(Statement *stmt, PCodeBlock *block) {
EntryPoint *ep;
IAEntryPoint *ia_ep;
ia_ep = (IAEntryPoint *) stmt->expr;
ep = lalloc(sizeof(EntryPoint));
memclrw(ep, sizeof(EntryPoint));
ep->object = ia_ep->x8;
ep->block = block;
*entrypoints_tail = ep;
entrypoints_tail = &ep->next;
block->flags |= fPCBlockFlag8000;
}
void Assembler(Object *func) {
PCodeBlock *block;
Statement *stmt;
Boolean flag17;
Boolean flag16;
char *name;
InlineAsm *ia;
Boolean save_unusedvar;
Boolean save_unusedarg;
flag17 = 0;
flag16 = 0;
init_endian();
init_stack_globals(func);
memclrw(asm_alloc_flags, sizeof(asm_alloc_flags));
fralloc_parameter_area_size = 0;
user_responsible_for_frame = 0;
assembledinstructions = 0;
entrypoints_head = NULL;
entrypoints_tail = &entrypoints_head;
stmt = curstmt;
if (func && func->name)
PrintProgressFunction(func->name->name);
CodeGen_InitialSanityCheck();
if (func->qual & Q_INLINE)
PPCError_Warning(PPCErrorStr173);
CheckCLabels();
if (fatalerrors)
return;
if (copts.filesyminfo)
CPrep_SetSourceFile(&cparser_fileoffset);
sm_section = SECT_TEXT;
initpcode();
pclabel(prologue = makepcblock(), makepclabel());
pclabel(block = makepcblock(), makepclabel());
pcbranch(prologue, block->labels);
resetTOCvarinfo();
InlineAsm_InitializePPC();
FuncAsm_PreScanDirectives();
disable_optimizer = 1;
init_registers();
assign_arguments_to_memory(func, 0, 0);
init_frame_sizes(0);
if (copts.debuglisting)
DumpIR(stmt, func);
cprep_eoltokens = 1;
in_assembler = 1;
save_unusedvar = copts.warn_unusedvar;
save_unusedarg = copts.warn_unusedarg;
copts.warn_unusedvar = 0;
copts.warn_unusedarg = 0;
InlineAsm_ScanFunction('}');
expandTOCreferences(&stmt->next);
if (!anyerrors && copts.debuglisting)
DumpIR(stmt, func);
in_assembler = 0;
cprep_eoltokens = 0;
name = CMangler_GetLinkName(func)->name;
func->flags |= OBJECT_DEFINED;
if (fralloc_parameter_area_size)
update_out_param_size(fralloc_parameter_area_size);
if (!user_responsible_for_frame)
process_arguments(move_assigned_argument, 0);
branch_label(makepclabel());
assign_labels(stmt->next);
copts.warn_unusedvar = save_unusedvar;
copts.warn_unusedarg = save_unusedarg;
for (stmt = stmt->next; stmt; stmt = stmt->next) {
current_statement = stmt;
switch (stmt->type) {
case ST_ASM:
if ((ia = (InlineAsm *) stmt->expr)) {
if (ia->flags & IAFlag1) {
if (ia->opcode == IADirective_Entry) {
branch_label(makepclabel());
FuncAsm_AddEntryPoint(stmt, pclastblock);
} else if (ia->opcode == IADirective_FrFree) {
if (flag16)
PPCError_Error(PPCErrorStr188);
else
flag16 = 1;
asm_alloc_flags[3] = 1;
asm_alloc_flags[4] = 1;
branch_label(makepclabel());
epilogue = pclastblock;
pclastblock->flags |= fIsEpilogue;
CheckCLabels();
if (fatalerrors)
return;
pccomputepredecessors();
if (copts.debuglisting)
pclistblocks(name, "[FUNCTION-LEVEL ASM] INITIAL CODE");
colorinstructions(func);
if (copts.debuglisting)
pclistblocks(name, "[FUNCTION-LEVEL ASM] AFTER REGISTER COLORING");
compute_frame_sizes();
generate_prologue(prologue, 0);
epilogue = pclastblock;
generate_epilogue(epilogue, 0);
if (copts.debuglisting)
pclistblocks(name, "[FUNCTION-LEVEL ASM] AFTER PROLOGUE/EPILOGUE CREATION");
flag17 = 1;
}
} else {
branch_label(makepclabel());
asm_alloc_flags[6] = 0;
asm_alloc_flags[7] = 0;
InlineAsm_TranslateIRtoPCode(stmt);
asm_alloc_flags[4] = 0;
}
}
break;
case ST_LABEL:
if (!stmt->label->pclabel->resolved)
branch_label(stmt->label->pclabel);
break;
default:
CError_FATAL(525);
}
}
current_statement = NULL;
if (fatalerrors)
return;
CheckCLabels();
if (fatalerrors)
return;
if (!flag17) {
branch_label(makepclabel());
epilogue = pclastblock;
pclastblock->flags |= fIsEpilogue;
pccomputepredecessors();
if (copts.debuglisting)
pclistblocks(name, "[FUNCTION-LEVEL ASM] INITIAL CODE");
if (!asm_alloc_flags[1]) {
colorinstructions(func);
if (fatalerrors)
return;
if (copts.debuglisting)
pclistblocks(name, "[FUNCTION-LEVEL ASM] AFTER REGISTER COLORING");
}
compute_frame_sizes();
if (asm_alloc_flags[1])
no_frame_for_asm();
if (fatalerrors)
return;
if (!asm_alloc_flags[1]) {
generate_prologue(prologue, 0);
generate_epilogue(epilogue, !asm_alloc_flags[6] && !asm_alloc_flags[7]);
}
if (copts.debuglisting)
pclistblocks(name, "[FUNCTION-LEVEL ASM] AFTER PROLOGUE/EPILOGUE CREATION");
}
if (fatalerrors)
return;
if (!asm_alloc_flags[1] && needs_frame()) {
if (asm_alloc_flags[3]) {
if (!asm_alloc_flags[5] || !asm_alloc_flags[6])
PPCError_Warning(PPCErrorStr187, "blr");
if (asm_alloc_flags[8])
PPCError_Warning(PPCErrorStr186);
} else {
PPCError_Warning(PPCErrorStr185, "blr");
}
}
func->section = sm_section;
if (copts.filesyminfo)
symdeclend = CPrep_GetFileOffsetInfo(&cparser_fileoffset);
copts.peephole = 0;
if (pic_base_label)
pic_base_pcodelabel = pic_base_label->pclabel;
assemblefunction(func, entrypoints_head);
if (copts.debuglisting)
pclistblocks(CMangler_GetLinkName(func)->name, "[FUNCTION-LEVEL ASM] FINAL CODE");
CFunc_WarnUnused();
}
void SetupAssembler(void) {
}
void CleanupAssembler(void) {
}

View File

@@ -0,0 +1,230 @@
#include "compiler/GCCInlineAsm.h"
#include "compiler/CError.h"
#include "compiler/CExpr.h"
#include "compiler/CFunc.h"
#include "compiler/CInt64.h"
#include "compiler/CParser.h"
#include "compiler/CPrep.h"
#include "compiler/CPrepTokenizer.h"
#include "compiler/InlineAsm.h"
#include "compiler/objects.h"
Statement *first_ST_ASM;
IALookupResult gcc_name_list[20];
int gcc_name_list_index;
void InlineAsm_SkipComment(void) {
while (1) {
if (tk != '/')
break;
if (lookahead() != '*')
break;
tk = lex();
while (!((tk = lex()) == '*' && (tk = lex()) == '/')) {
// nothing
}
tk = lex();
}
}
static char gcc_parse_attribute(void) {
char ch;
while (tk == TK_EOL)
tk = lex();
if (tk != '"')
CError_Error(CErrorStr105);
while ((tk = lex()) != TK_IDENTIFIER) {
// nothing
}
ch = tkidentifier->name[0];
if ((tk = lex()) != '"')
CError_Error(CErrorStr105);
tk = lex();
return ch;
}
static void gcc_parse_name(Boolean flag, char attribute) {
IALookupResult *nameentry;
ENode *expr;
Object *tempobj;
ENode *tempexpr;
Statement *stmt;
while (tk == TK_EOL)
tk = lex();
if (tk != '(')
CError_Error(CErrorStr114);
tk = lex();
if (flag) {
if (tk != TK_IDENTIFIER)
CError_Error(CErrorStr105);
InlineAsm_LookupSymbol(tkidentifier, &gcc_name_list[++gcc_name_list_index]);
if (gcc_name_list[gcc_name_list_index].object && gcc_name_list[gcc_name_list_index].object->u.var.info)
gcc_name_list[gcc_name_list_index].object->u.var.info->used = 1;
tk = lex();
} else {
in_assembler = 0;
cprep_nostring = 0;
nameentry = &gcc_name_list[++gcc_name_list_index];
expr = expression();
if (attribute == 'i' || attribute == 'I') {
if (!ENODE_IS(expr, EINTCONST))
CError_Error(CErrorStr144);
nameentry->value = CInt64_GetULong(&expr->data.intval);
nameentry->has_value = 1;
} else {
tempobj = create_temp_object(expr->rtype);
tempexpr = create_objectnode(tempobj);
if (tempobj->u.var.info)
tempobj->u.var.info->used = 1;
expr = makediadicnode(tempexpr, expr, EASS);
stmt = CFunc_InsertBeforeStatement(ST_EXPRESSION, first_ST_ASM);
first_ST_ASM = stmt->next;
if (!first_ST_ASM->next)
curstmt = first_ST_ASM;
stmt->expr = expr;
nameentry->name = tempobj->name;
nameentry->object = tempobj;
nameentry->label = NULL;
nameentry->type = NULL;
nameentry->has_value = 0;
}
}
cprep_nostring = 1;
in_assembler = 1;
if (tk != ')')
CError_Error(CErrorStr115);
tk = lex();
}
static void gcc_parse_expression(Boolean flag) {
while (1) {
gcc_parse_name(flag, gcc_parse_attribute());
if (tk != ',')
break;
tk = lex();
}
}
static void gcc_parse_input(void) {
if (tk == ':') {
if ((tk = lex()) == ':' || tk == ')' || tk == '}')
return;
gcc_parse_expression(0);
}
}
static void gcc_parse_output(void) {
if (tk == ':') {
if ((tk = lex()) == ':' || tk == ')' || tk == '}')
return;
gcc_parse_expression(1);
}
}
static void gcc_parse_killed(void) {
if (tk == ':') {
while (1) {
if ((tk = lex()) != '"')
return;
tk = lex();
while (1) {
if (tk == '"') {
if (lookahead() == ',') {
tk = lex();
break;
}
tk = lex();
return;
}
tk = lex();
}
}
}
}
static void gcc_replace_arg_st_asm(Statement *stmt) {
InlineAsm *ia;
int i;
IAOperand *op;
short effect;
short rclass;
SInt32 num;
if ((ia = (InlineAsm *) stmt->expr)) {
for (i = 0, op = ia->args; i < ia->argcount; i++, op++) {
switch (op->type) {
case IAOpnd_Imm:
case IAOpnd_Reg:
case IAOpnd_3:
case IAOpnd_4:
case IAOpnd_Lab:
break;
case IAOpnd_6:
if (op->u.unk6.unk4 == 2) {
effect = op->u.unk6.effect;
rclass = op->u.unk6.rclass;
num = op->u.unk6.num;
op->type = IAOpnd_Reg;
op->u.reg.effect = effect;
op->u.reg.rclass = rclass;
op->u.reg.object = NULL;
if (num <= gcc_name_list_index)
op->u.reg.object = gcc_name_list[num].object;
else
CError_Error(CErrorStr144);
op->u.reg.num = 0;
} else {
CError_FATAL(365);
}
break;
case IAOpnd_7:
op->type = IAOpnd_Imm;
op->u.imm.value = gcc_name_list[op->u.unk7.value].value;
break;
}
}
}
}
static void gcc_replace_arg(void) {
Statement *stmt;
for (stmt = first_ST_ASM; stmt; stmt = stmt->next) {
if (stmt->type == ST_ASM)
gcc_replace_arg_st_asm(stmt);
}
}
void InlineAsm_gcc_parse(void) {
gcc_name_list_index = -1;
cprep_eoltokens = 0;
if (tk == TK_EOL)
tk = lex();
gcc_parse_output();
gcc_parse_input();
gcc_parse_killed();
gcc_replace_arg();
}

View File

@@ -0,0 +1,680 @@
#include "compiler/InlineAsm.h"
#include "compiler/InlineAsmPPC.h"
#include "compiler/GCCInlineAsm.h"
#include "compiler/CompilerTools.h"
#include "compiler/CError.h"
#include "compiler/CExpr.h"
#include "compiler/CFunc.h"
#include "compiler/CInit.h"
#include "compiler/CInline.h"
#include "compiler/CInt64.h"
#include "compiler/CMachine.h"
#include "compiler/COptimizer.h"
#include "compiler/CParser.h"
#include "compiler/CPrep.h"
#include "compiler/CPrepTokenizer.h"
#include "compiler/CScope.h"
#include "compiler/PCode.h"
#include "compiler/Registers.h"
#include "compiler/objects.h"
#include "compiler/scopes.h"
#include "compiler/types.h"
int allow_array_expressions = 1;
int backtracking;
jmp_buf backtrack;
jmp_buf InlineAsm_assemblererror;
static int ASMstmtnb;
void AssemblerError(void) {
longjmp(InlineAsm_assemblererror, 1);
}
void InlineAsm_SyntaxError(short code) {
if (backtracking)
longjmp(backtrack, 1);
if (tk == TK_EOL || tk == ';')
code = CErrorStr112;
CError_Error(code);
}
CLabel *InlineAsm_LookupLabel(HashNameNode *name) {
CLabel *label;
for (label = Labels; label; label = label->next) {
if (name == label->name)
break;
}
return label;
}
CLabel *InlineAsm_DeclareLabel(HashNameNode *name) {
CLabel *label = newlabel();
label->name = name;
label->next = Labels;
Labels = label;
return label;
}
static void InlineAsm_DefineLabel(HashNameNode *name) {
CLabel *label;
Statement *stmt;
label = InlineAsm_LookupLabel(name);
if (!label) {
label = InlineAsm_DeclareLabel(name);
} else {
if (label->stmt)
CError_Error(CErrorStr171, name->name);
}
stmt = CFunc_AppendStatement(ST_LABEL);
stmt->label = label;
label->stmt = stmt;
}
Boolean InlineAsm_LookupSymbolOrTag(HashNameNode *name, IALookupResult *result, Boolean allow_tag) {
ObjBase *obj;
NameSpace *nspace;
NameSpaceObjectList *list;
result->name = name;
result->object = NULL;
result->label = NULL;
result->type = NULL;
result->has_value = 0;
if ((result->label = InlineAsm_LookupLabel(name)))
return 1;
for (nspace = cscope_current; nspace; nspace = nspace->parent) {
if ((list = CScope_FindName(nspace, name))) {
obj = list->object;
switch (obj->otype) {
case OT_ENUMCONST:
result->has_value = 1;
result->value = OBJ_ENUM_CONST(list->object)->val.lo;
return 1;
case OT_OBJECT:
if (OBJECT(obj)->datatype == DABSOLUTE) {
result->has_value = 1;
result->value = OBJECT(obj)->u.address;
} else {
if (OBJECT(obj)->datatype == DDATA && (OBJECT(obj)->qual & Q_INLINE_DATA))
CInit_ExportConst(OBJECT(obj));
result->object = OBJECT(obj);
}
return 1;
case OT_TYPE:
result->type = OBJ_TYPE(obj)->type;
return 1;
case OT_TYPETAG:
if (allow_tag) {
result->type = OBJ_TYPE_TAG(obj)->type;
return 1;
}
case OT_NAMESPACE:
case OT_MEMBERVAR:
return 0;
default:
CError_FATAL(245);
}
}
}
return 0;
}
Boolean InlineAsm_LookupSymbol(HashNameNode *name, IALookupResult *result) {
return InlineAsm_LookupSymbolOrTag(name, result, 0);
}
static ObjMemberVar *isclassmember(TypeClass *tclass, HashNameNode *name) {
NameSpaceObjectList *list;
list = CScope_FindName(tclass->nspace, name);
return (list && list->object->otype == OT_MEMBERVAR) ? OBJ_MEMBER_VAR(list->object) : NULL;
}
SInt32 InlineAsm_StructMemberOffset(Type *type) {
StructMember *member;
ObjMemberVar *ivar;
SInt32 offset = 0;
do {
if (IS_TYPE_STRUCT(type)) {
tk = lex();
if (tk != TK_IDENTIFIER)
InlineAsm_SyntaxError(CErrorStr107);
member = ismember(TYPE_STRUCT(type), tkidentifier);
if (!member)
CError_Error(CErrorStr150, tkidentifier->name);
offset += member->offset;
type = member->type;
tk = lex();
} else if (IS_TYPE_CLASS(type)) {
tk = lex();
if (tk != TK_IDENTIFIER)
InlineAsm_SyntaxError(CErrorStr107);
ivar = isclassmember(TYPE_CLASS(type), tkidentifier);
if (!ivar)
CError_Error(CErrorStr150, tkidentifier->name);
offset += ivar->offset;
type = ivar->type;
tk = lex();
} else {
CError_Error(CErrorStr149);
}
} while (tk == '.');
return offset;
}
SInt32 InlineAsm_StructArrayMemberOffset(Type *type) {
StructMember *member;
ObjMemberVar *ivar;
SInt32 offset = 0;
do {
if (tk == '.') {
if (IS_TYPE_STRUCT(type)) {
tk = lex();
if (tk != TK_IDENTIFIER)
InlineAsm_SyntaxError(CErrorStr107);
member = ismember(TYPE_STRUCT(type), tkidentifier);
if (!member)
CError_Error(CErrorStr150, tkidentifier->name);
offset += member->offset;
type = member->type;
tk = lex();
} else if (IS_TYPE_CLASS(type)) {
tk = lex();
if (tk != TK_IDENTIFIER)
InlineAsm_SyntaxError(CErrorStr107);
ivar = isclassmember(TYPE_CLASS(type), tkidentifier);
if (!ivar)
CError_Error(CErrorStr150, tkidentifier->name);
offset += ivar->offset;
type = ivar->type;
tk = lex();
} else {
CError_Error(CErrorStr149);
}
} else {
if (IS_TYPE_ARRAY(type)) {
type = TPTR_TARGET(type);
tk = lex();
offset += type->size * InlineAsm_ConstantExpression();
if (tk != ']')
InlineAsm_SyntaxError(125);
tk = lex();
} else {
CError_Error(CErrorStr148);
}
}
} while (tk == '.' || tk == '[');
return offset;
}
SInt32 InlineAsm_StructPointerMemberOffset(Type *type) {
StructMember *member;
ObjMemberVar *ivar;
SInt32 offset;
tk = lex();
if (tk != TK_IDENTIFIER)
InlineAsm_SyntaxError(107);
if (IS_TYPE_STRUCT(type)) {
member = ismember(TYPE_STRUCT(type), tkidentifier);
if (!member)
CError_Error(CErrorStr150, tkidentifier->name);
offset = member->offset;
type = member->type;
} else {
ivar = isclassmember(TYPE_CLASS(type), tkidentifier);
if (!ivar)
CError_Error(CErrorStr150, tkidentifier->name);
offset = ivar->offset;
type = ivar->type;
}
tk = lex();
if (tk == '.' || tk == '[')
offset += InlineAsm_StructArrayMemberOffset(type);
return offset;
}
static SInt32 DiadicOperator(SInt32 left, short op, SInt32 right) {
CInt64 left64;
CInt64 right64;
CInt64_SetLong(&left64, left);
CInt64_SetLong(&right64, right);
right64 = CMach_CalcIntDiadic(TYPE(&stsignedint), left64, op, right64);
return CInt64_GetULong(&right64);
}
static SInt32 PrimaryExpression(void) {
IALookupResult result;
SInt32 value;
switch (tk) {
case TK_IDENTIFIER:
if (InlineAsm_LookupSymbol(tkidentifier, &result)) {
if (result.has_value) {
tk = lex();
return result.value;
}
if (result.type && (IS_TYPE_STRUCT(result.type) || IS_TYPE_CLASS(result.type))) {
tk = lex();
if (tk != '.')
InlineAsm_SyntaxError(120);
if (allow_array_expressions)
return InlineAsm_StructArrayMemberOffset(result.type);
else
return InlineAsm_StructMemberOffset(result.type);
} else {
InlineAsm_SyntaxError(124);
}
} else {
InlineAsm_SyntaxError(124);
}
break;
case TK_INTCONST:
value = tkintconst.lo;
tk = lex();
return value;
case TK_SIZEOF:
return scansizeof();
case '+':
tk = lex();
return PrimaryExpression();
case '-':
tk = lex();
return -PrimaryExpression();
case '!':
tk = lex();
return PrimaryExpression() == 0;
case '~':
tk = lex();
return ~PrimaryExpression();
case '(':
tk = lex();
value = InlineAsm_ConstantExpression();
if (tk != ')')
InlineAsm_SyntaxError(115);
tk = lex();
return value;
default:
InlineAsm_SyntaxError(120);
}
return 0;
}
static SInt32 ConstantExpressionTail(SInt32 value) {
SInt32 right;
short left_token;
short right_prec;
while (1) {
left_token = tk;
tk = lex();
right = PrimaryExpression();
right_prec = GetPrec(tk);
if (right_prec == 0)
return DiadicOperator(value, left_token, right);
if (GetPrec(left_token) >= right_prec) {
value = DiadicOperator(value, left_token, right);
} else {
value = DiadicOperator(value, left_token, ConstantExpressionTail(right));
if (GetPrec(tk) == 0)
return value;
}
}
}
SInt32 InlineAsm_ConstantExpression(void) {
SInt32 value = PrimaryExpression();
if (GetPrec(tk) == 0)
return value;
else
return ConstantExpressionTail(value);
}
HashNameNode *MakeLocalLabel(CInt64 num) {
char buf[80];
sprintf(buf, "@%i_%i", ASMstmtnb, CInt64_GetULong(&num));
return GetHashNameNodeExport(buf);
}
static void ScanOptionalLabel(void) {
if (tk == TK_INTCONST) {
if (lookahead() == ':') {
InlineAsm_DefineLabel(MakeLocalLabel(tkintconst));
tk = lex();
tk = lex();
}
} else {
if (tkidentifier->name[0] == '@') {
InlineAsm_DefineLabel(tkidentifier);
tk = lex();
if (tk == ':')
tk = lex();
} else {
HashNameNode *name = tkidentifier;
short t = lookahead();
tkidentifier = name;
if (t == ':') {
InlineAsm_DefineLabel(name);
tk = lex();
tk = lex();
}
}
}
}
static void ScanStatements(volatile short endToken, AssemblerType mode) {
if (setjmp(InlineAsm_assemblererror)) {
while (tk != TK_EOL && tk != endToken && tk != '}' && tk)
tk = lex();
if (tk == ';' || tk == TK_EOL)
tk = lex();
} else {
InlineAsm_Initialize(mode);
InlineAsm_gccmode = 0;
if (setjmp(InlineAsm_assemblererror)) {
while (tk != ';' && tk != TK_EOL && tk != endToken && tk != '}' && tk)
tk = lex();
if (tk == ';' || tk == TK_EOL)
tk = lex();
}
while (tk && tk != endToken) {
backtracking = 0;
sourceoffset = CPrep_GetFileOffsetInfo(&cparser_fileoffset);
if (tk == '"') {
if (InlineAsm_gccmode) {
tk = lex();
InlineAsm_gcc_parse();
} else {
InlineAsm_gccmode = 1;
copts.cplusplus = 0;
copts.asmpoundcomment = 1;
tk = lex();
}
}
if (tk == '.') {
InlineAsm_ScanAssemblyDirective();
} else if (tk == TK_IDENTIFIER) {
ScanOptionalLabel();
if (tk == TK_IDENTIFIER)
InlineAsm_ScanAssemblyInstruction();
} else if (tk == TK_INTCONST) {
ScanOptionalLabel();
if (tk == TK_IDENTIFIER)
InlineAsm_ScanAssemblyInstruction();
}
if (InlineAsm_gccmode && tk == '"') {
tk = lex();
InlineAsm_gcc_parse();
}
if (tk == ';' || tk == TK_EOL) {
CPrep_TokenStreamFlush();
tk = lex();
} else if (tk != endToken) {
if (endToken == ')')
CError_Error(CErrorStr115);
else
CError_Error(CErrorStr113);
}
}
}
}
void InlineAsm_ScanStatements(volatile short endToken) {
ScanStatements(endToken, AssemblerType_0);
}
void InlineAsm_ScanFunction(volatile short endToken) {
ScanStatements(endToken, AssemblerType_1);
}
void InlineAsm_Assemble(void) {
short token = (tk == '(') ? ')' : '}';
char save_pc = copts.asmpoundcomment;
char save_cpp = copts.cplusplus;
cprep_nostring = 1;
CFunc_AppendStatement(ST_NOP);
first_ST_ASM = curstmt;
ASMstmtnb++;
cprep_eoltokens = 1;
in_assembler = 1;
tk = lex();
InlineAsm_ScanStatements(token);
in_assembler = 0;
cprep_eoltokens = 0;
cprep_nostring = 0;
copts.asmpoundcomment = save_pc;
copts.cplusplus = save_cpp;
}
void InlineAsm_PackAsmStatement(Statement *stmt, Statement *first, void **output, SInt32 *outsize) {
InlineAsm *src;
InlineAsm *dest;
IAOperand *op;
SInt32 i;
SInt32 size;
src = (InlineAsm *) stmt->expr;
size = sizeof(InlineAsm) + sizeof(IAOperand) * src->argcount;
dest = galloc(size);
memcpy(dest, src, size);
for (i = 0, op = dest->args; i < dest->argcount; i++, op++) {
switch (op->type) {
case IAOpnd_0:
break;
case IAOpnd_Reg:
case IAOpnd_4:
op->u.reg.object = (Object *) CInline_GetLocalID(op->u.reg.object);
break;
case IAOpnd_Lab:
op->u.lab.label = (CLabel *) CInline_GetStatementNumber(first, op->u.lab.label->stmt);
break;
case IAOpnd_LabDiff:
op->u.labdiff.label1 = (CLabel *) CInline_GetStatementNumber(first, op->u.labdiff.label1->stmt);
op->u.labdiff.label2 = (CLabel *) CInline_GetStatementNumber(first, op->u.labdiff.label2->stmt);
break;
}
}
*output = dest;
*outsize = size;
}
void InlineAsm_UnpackAsmStatement(Statement *stmt, CLabel **labelArray, Boolean flag, void *data, SInt32 size) {
InlineAsm *ia;
IAOperand *op;
SInt32 i;
ia = galloc(size);
memcpy(ia, data, size);
for (i = 0, op = ia->args; i < ia->argcount; i++, op++) {
switch (op->type) {
case IAOpnd_0:
break;
case IAOpnd_Reg:
case IAOpnd_4:
op->u.reg.object = CInline_GetLocalObj((SInt32) op->u.reg.object, flag);
break;
case IAOpnd_Lab:
op->u.lab.label = labelArray[(SInt16) op->u.lab.label];
break;
case IAOpnd_LabDiff:
op->u.labdiff.label1 = labelArray[(SInt16) op->u.labdiff.label1];
op->u.labdiff.label2 = labelArray[(SInt16) op->u.labdiff.label2];
break;
}
}
stmt->expr = (ENode *) ia;
}
void InlineAsm_CheckLocalUsage(Statement *stmt) {
InlineAsm *ia = (InlineAsm *) stmt->expr;
IAOperand *op;
SInt32 i;
for (i = 0, op = ia->args; i < ia->argcount; i++, op++) {
switch (op->type) {
case IAOpnd_Reg:
if (op->u.reg.object)
SetVarUsage(op->u.reg.object, 0);
break;
case IAOpnd_4:
SetVarUsage(op->u.obj.obj, 1);
break;
}
}
}
CLabel *InlineAsm_GetReferencedLabel(Statement *stmt) {
InlineAsm *ia = (InlineAsm *) stmt->expr;
IAOperand *op;
SInt32 i;
for (i = 0, op = ia->args; i < ia->argcount; i++, op++) {
if (op->type == IAOpnd_Lab)
return op->u.lab.label;
if (op->type == IAOpnd_LabDiff)
return op->u.labdiff.label1;
}
return NULL;
}
CLabel *InlineAsm_GetReferencedLabel2(Statement *stmt) {
InlineAsm *ia = (InlineAsm *) stmt->expr;
IAOperand *op;
SInt32 i;
for (i = 0, op = ia->args; i < ia->argcount; i++, op++) {
if (op->type == IAOpnd_LabDiff)
return op->u.labdiff.label2;
}
return NULL;
}
Object *InlineAsm_GetObjectOffset(InlineAsm *ia, SInt32 index, SInt32 *offset) {
IAOperand *op;
SInt32 i;
SInt32 counter;
for (i = 0, counter = 0, op = ia->args; i < ia->argcount; i++, op++) {
if (op->type == IAOpnd_3) {
if (counter++ == index) {
*offset = ((intptr_t) &op->u.obj.obj) - ((intptr_t) ia);
return op->u.obj.obj;
}
}
}
return NULL;
}
char *InlineAsm_DumpStatement(Statement *stmt) {
static char buffer[1024];
InlineAsm *ia;
IAOperand *arg;
int i;
char ch;
SInt32 offset;
ia = (InlineAsm *) stmt->expr;
strcpy(buffer, "\"");
strcat(buffer, InlineAsm_GetMnemonic(ia));
strcat(buffer, "\"");
for (i = 0, arg = ia->args; i < ia->argcount; i++, arg++) {
char argbuf[1024];
switch (arg->type) {
case IAOpnd_Imm:
sprintf(argbuf, " imm(%ld)", arg->u.imm.value);
break;
case IAOpnd_Reg:
ch = ' ';
if (arg->u.reg.effect & EffectWrite) {
if (arg->u.reg.effect & EffectRead)
ch = '+';
else
ch = '=';
} else {
if (!(arg->u.reg.effect & EffectRead))
ch = '0';
}
if (arg->u.reg.object) {
sprintf(argbuf,
"%creg(%s)",
ch,
arg->u.reg.object->name->name);
} else {
sprintf(argbuf,
"%creg(%s%d)",
ch,
register_class_name[arg->u.reg.rclass],
arg->u.reg.num);
}
break;
case IAOpnd_3:
case IAOpnd_4:
if (arg->u.obj.offset > 0)
sprintf(argbuf, " obj(%s+%ld)", arg->u.obj.obj->name->name, arg->u.obj.offset);
else if (arg->u.obj.offset < 0)
sprintf(argbuf, " obj(%s-%ld)", arg->u.obj.obj->name->name, -arg->u.obj.offset);
else
sprintf(argbuf, " obj(%s)", arg->u.obj.obj->name->name);
break;
case IAOpnd_Lab:
sprintf(argbuf, " lab(%s)", arg->u.lab.label->uniquename->name);
break;
case IAOpnd_LabDiff:
offset = !arg->negated ? 0 : arg->u.labdiff.offset;
sprintf(argbuf,
" labdiff(%s-%s%c%d)",
arg->u.labdiff.label1->uniquename->name,
arg->u.labdiff.label2->uniquename->name,
(arg->negated == 1) ? '-' : '+',
offset
);
break;
}
strcat(buffer, argbuf);
}
return buffer;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,70 @@
#include "compiler/PPCError.h"
#include "compiler/CError.h"
#include "compiler/CParser.h"
#include "compiler/InlineAsm.h"
#include "cos.h"
static void PPCError_GetErrorString(char *str, short code) {
short scode;
scode = (short) code;
CError_ASSERT(40, scode >= 100 && scode < PPCErrorStrMAX);
COS_GetString(str, 10001, scode - 99);
}
static void PPCError_VAErrorMessage(int code, va_list list, Boolean flag1, Boolean flag2) {
char format[256];
PPCError_GetErrorString(format, code);
CError_ErrorMessageVA(code + 10001, format, list, flag1, flag2);
}
void PPCError_Error(int code, ...) {
va_list list;
if (trychain)
longjmp(trychain->jmpbuf, 1);
va_start(list, code);
PPCError_VAErrorMessage(code, list, 0, 0);
va_end(list);
if (in_assembler)
AssemblerError();
}
void PPCError_Warning(int code, ...) {
va_list list;
if (!trychain) {
va_start(list, code);
PPCError_VAErrorMessage(code, list, 0, 1);
va_end(list);
}
}
void PPCError_Message(char *format, ...) {
va_list list;
if (!trychain) {
va_start(list, format);
CError_ErrorMessageVA(10213, format, list, 0, 1);
va_end(list);
}
}
void PPCError_ErrorTerm(short code, ...) {
va_list list;
if (trychain)
longjmp(trychain->jmpbuf, 1);
va_start(list, code);
PPCError_VAErrorMessage(code, list, 1, 0);
va_end(list);
if (in_assembler)
AssemblerError();
longjmp(errorreturn, 1);
}

View File

@@ -0,0 +1,268 @@
#include "compiler/Coloring.h"
#include "compiler/CFunc.h"
#include "compiler/CompilerTools.h"
#include "compiler/InterferenceGraph.h"
#include "compiler/PCode.h"
#include "compiler/PPCError.h"
#include "compiler/Registers.h"
#include "compiler/RegisterInfo.h"
#include "compiler/SpillCode.h"
#include "compiler/StackFrame.h"
#include "compiler/objects.h"
RegClass coloring_class;
static short used_regs_before_coloring;
static void markspecialregisters(RegClass rclass) {
ObjectList *list;
Object *object;
VarInfo *vi;
UInt32 i;
for (i = 0; i < n_real_registers[rclass]; i++)
interferencegraph[i]->x14 = i;
for (list = arguments; list; list = list->next) {
object = list->object;
vi = Registers_GetVarInfo(object);
if ((vi->flags & VarInfoFlag2) && vi->rclass == rclass) {
interferencegraph[vi->reg]->spillTemporary = object;
if (vi->flags & VarInfoFlag4) {
interferencegraph[vi->reg]->flags |= fPairLow;
interferencegraph[vi->regHi]->flags |= fPairHigh;
interferencegraph[vi->regHi]->spillTemporary = object;
}
}
}
for (list = locals; list; list = list->next) {
object = list->object;
vi = Registers_GetVarInfo(object);
if ((vi->flags & VarInfoFlag2) && vi->rclass == rclass) {
interferencegraph[vi->reg]->spillTemporary = object;
if (vi->flags & VarInfoFlag4) {
interferencegraph[vi->reg]->flags |= fPairLow;
interferencegraph[vi->regHi]->flags |= fPairHigh;
interferencegraph[vi->regHi]->spillTemporary = object;
}
}
}
}
static IGNode *simplifygraph(void) {
int availableRegs;
IGNode *spilledNodes;
IGNode *pushedNodes;
IGNode *best;
IGNode *node;
UInt32 i;
UInt32 j;
int flag;
float bestScore;
float score;
availableRegs = available_registers(coloring_class);
pushedNodes = NULL;
do {
spilledNodes = NULL;
flag = 0;
for (i = n_real_registers[coloring_class]; i < used_virtual_registers[coloring_class]; i++) {
node = interferencegraph[i];
if (!(node->flags & (fPushed | fCoalesced))) {
if (node->x12 < availableRegs) {
for (j = 0; j < node->arraySize; j++)
interferencegraph[node->array[j]]->x12--;
node->flags |= fPushed;
node->next = pushedNodes;
pushedNodes = node;
flag = 1;
} else {
node->next = spilledNodes;
spilledNodes = node;
}
}
}
} while (flag);
if (spilledNodes)
estimatespillcosts();
while (spilledNodes) {
best = spilledNodes;
bestScore = (spilledNodes->x10 >= used_regs_before_coloring) ? FLT_MAX : ((float) spilledNodes->spillCost / (float) spilledNodes->x12);
for (node = spilledNodes->next; node; node = node->next) {
score = (node->x10 >= used_regs_before_coloring) ? FLT_MAX : ((float) node->spillCost / (float) node->x12);
if (score < bestScore) {
best = node;
bestScore = score;
}
}
for (i = 0; i < best->arraySize; i++)
interferencegraph[best->array[i]]->x12--;
best->flags |= fPushed;
best->next = pushedNodes;
pushedNodes = best;
do {
spilledNodes = NULL;
flag = 0;
for (i = n_real_registers[coloring_class]; i < used_virtual_registers[coloring_class]; i++) {
node = interferencegraph[i];
if (!(node->flags & (fPushed | fCoalesced))) {
if (node->x12 < availableRegs) {
for (j = 0; j < node->arraySize; j++)
interferencegraph[node->array[j]]->x12--;
node->flags |= fPushed;
node->next = pushedNodes;
pushedNodes = node;
flag = 1;
} else {
node->next = spilledNodes;
spilledNodes = node;
}
}
}
} while (flag);
}
return pushedNodes;
}
static int colorgraph(IGNode *node) {
UInt32 volatileRegs;
int result;
IGNode *otherNode;
int reg;
UInt32 workingMask;
int i;
short *array;
result = 1;
reset_nonvolatile_registers(coloring_class);
volatileRegs = volatile_registers(coloring_class);
while (node) {
workingMask = volatileRegs;
for (array = node->array, i = 0; i < node->arraySize; i++) {
otherNode = interferencegraph[*(array++)];
reg = otherNode->x14;
if (reg != -1 && reg < n_real_registers[coloring_class])
workingMask &= ~(1 << reg);
}
if (workingMask) {
for (i = 0; i < n_real_registers[coloring_class]; i++) {
if (workingMask & (1 << i)) {
node->x14 = i;
break;
}
}
} else {
reg = obtain_nonvolatile_register(coloring_class);
if (reg != -1) {
volatileRegs |= 1 << (node->x14 = reg);
} else {
node->flags |= fSpilled;
result = 0;
}
}
node = node->next;
}
return result;
}
static void rewritepcode(void) {
PCodeBlock *block;
PCode *instr;
PCodeArg *op;
UInt32 i;
IGNode *node;
int reg;
for (block = pcbasicblocks; block; block = block->nextBlock) {
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
op = instr->args;
i = instr->argCount;
while (i--) {
if (PC_OP_IS_ANY_REGISTER(op, coloring_class))
op->data.reg.reg = interferencegraph[op->data.reg.reg]->x14;
op++;
}
if (
(instr->flags & fIsMove) &&
(instr->args[1].arg == coloring_class) &&
instr->args[1].data.reg.reg == instr->args[0].data.reg.reg
)
deletepcode(instr);
}
}
for (i = n_real_registers[coloring_class]; i < used_virtual_registers[coloring_class]; i++) {
node = interferencegraph[i];
if (node->spillTemporary && !(node->flags & fSpilled)) {
if (node->flags & fCoalesced) {
reg = node->x14;
while (reg >= n_real_registers[coloring_class]) {
reg = interferencegraph[reg]->x14;
if (reg < 0)
break;
}
node->x14 = reg;
}
if (node->flags & fPairHigh) {
reg = node->x14;
Registers_GetVarInfo(node->spillTemporary)->regHi = reg;
} else {
reg = node->x14;
Registers_GetVarInfo(node->spillTemporary)->reg = reg;
}
}
}
}
void colorinstructions(Object *proc) {
RegClass rclass;
int flag;
for (rclass = 0; rclass < RegClassMax; rclass++) {
coloring_class = rclass;
if (rclass == RegClass_GPR)
check_dynamic_aligned_frame();
if (used_virtual_registers[rclass] > n_real_registers[rclass]) {
save_before_coloring_nonvolatile_registers(rclass);
used_regs_before_coloring = used_virtual_registers[rclass];
if (!available_registers(rclass)) {
PPCError_Error(PPCErrorStr102, register_class_name[rclass]);
return;
}
flag = 1;
while (flag && used_virtual_registers[rclass] > n_real_registers[rclass]) {
buildinterferencegraph(proc);
markspecialregisters(rclass);
flag = colorgraph(simplifygraph()) ? 0 : 1;
if (flag)
insertspillcode();
else
rewritepcode();
freeoheap();
}
}
used_virtual_registers[rclass] = n_real_registers[rclass];
}
coloring = 0;
}

View File

@@ -0,0 +1,364 @@
#include "compiler/InterferenceGraph.h"
#include "compiler/CError.h"
#include "compiler/CParser.h"
#include "compiler/BitVectors.h"
#include "compiler/Coloring.h"
#include "compiler/LiveInfo.h"
#include "compiler/PCode.h"
#include "compiler/PCodeListing.h"
#include "compiler/PCodeUtilities.h"
#include "compiler/Registers.h"
#include "compiler/RegisterInfo.h"
#include "compiler/CompilerTools.h"
IGNode **interferencegraph;
static UInt32 *interferencematrix;
Boolean coalesced_nregisters;
static SInt16 *coalesced;
static void makeinterfere(UInt32 a, UInt32 b) {
if (a < b)
bitvectorsetbit(((b * b) / 2) + a, interferencematrix);
else if (a > b)
bitvectorsetbit(((a * a) / 2) + b, interferencematrix);
}
int interferes(UInt32 a, UInt32 b) {
if (a < b)
return bitvectorgetbit(((b * b) / 2) + a, interferencematrix) > 0;
else if (a > b)
return bitvectorgetbit(((a * a) / 2) + b, interferencematrix) > 0;
else
return 0;
}
static void buildinterferencematrix(void) {
PCodeBlock *block; // r30
PCode *instr; // r29
PCodeArg *op;
UInt32 *vec; // r28
UInt32 i;
UInt32 j;
UInt32 regs = used_virtual_registers[coloring_class];
interferencematrix = oalloc(4 * ((((regs * regs) / 2) + 31) >> 5));
bitvectorinitialize(interferencematrix, (regs * regs) / 2, 0);
for (i = 0; i < 32; i++)
for (j = 0; j < 32; j++)
if (i != j)
makeinterfere(i, j);
vec = oalloc(4 * ((regs + 31) >> 5));
for (block = pcbasicblocks; block; block = block->nextBlock) {
bitvectorcopy(vec, liveinfo[block->blockIndex].out, regs);
for (instr = block->lastPCode; instr; instr = instr->prevPCode) {
for (op = instr->args, i = instr->argCount; i--; op++) {
if (PC_OP_IS_WRITE_ANY_REGISTER(op, coloring_class)) {
int reg = op->data.reg.reg;
bitvectorclearbit(reg, vec);
for (j = 0; j < regs; j++) {
if (bitvectorgetbit(j, vec)) {
if (
(instr->flags & fIsMove) &&
PC_OP_IS_ANY_REGISTER(&instr->args[0], coloring_class) &&
instr->args[1].data.reg.reg == j
)
continue;
makeinterfere(reg, j);
}
}
}
}
for (op = instr->args, i = instr->argCount; i--; op++) {
if (PC_OP_IS_READ_ANY_REGISTER(op, coloring_class)) {
int reg = op->data.reg.reg;
if (bitvectorgetbit(op->data.reg.reg, vec) == 0)
op->data.reg.effect |= Effect4;
bitvectorsetbit(reg, vec);
}
}
if (coloring_class == RegClass_GPR) {
if (PCODE_FLAG_SET_F(instr) & (fIsRead | fIsWrite | fPCodeFlag400000)) {
if (instr->args[1].data.reg.reg >= n_real_registers[coloring_class])
makeinterfere(0, instr->args[1].data.reg.reg);
if (PCODE_FLAG_SET_F(instr) & fUpdatesPtr)
makeinterfere(instr->args[0].data.reg.reg, instr->args[1].data.reg.reg);
} else {
switch (instr->op) {
case PC_DCBF:
case PC_DCBST:
case PC_DCBT:
case PC_DCBTST:
case PC_DCBZ:
case PC_DCBI:
case PC_ICBI:
case PC_DCCCI:
case PC_ICBT:
case PC_ICCCI:
case PC_ICREAD:
case PC_DCBA:
case PC_DST:
case PC_DSTT:
case PC_DSTST:
case PC_DSTSTT:
if (instr->args[0].data.reg.reg >= n_real_registers[coloring_class])
makeinterfere(0, instr->args[0].data.reg.reg);
break;
}
}
}
if (coloring_class == RegClass_GPR && (instr->flags & fIsCall)) {
i = branch_count_volatiles();
op = instr->args;
CError_ASSERT(219, instr->argCount != 0);
while (op->kind != PCOp_REGISTER || !(op->data.reg.effect & EffectWrite)) {
i++;
op++;
CError_ASSERT(226, i <= instr->argCount);
}
for (op = instr->args + i; i < instr->argCount; i++, op++) {
if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR) {
for (j = 0; j < n_scratch_registers[coloring_class]; j++)
makeinterfere(op->data.reg.reg, scratch_registers[coloring_class][j]);
}
}
}
}
}
}
static short coalesced_path(short id) {
while (id != coalesced[id])
id = coalesced[id];
return id;
}
static void coalescenodes(void) {
PCodeArg *op;
UInt32 regs;
PCodeBlock *block;
PCode *instr;
UInt32 i;
short path1;
short path2;
short node1;
short node2;
regs = used_virtual_registers[coloring_class];
coalesced = oalloc(sizeof(SInt16) * regs);
for (i = 0; i < regs; i++)
coalesced[i] = i;
for (block = pcbasicblocks; block; block = block->nextBlock) {
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
if ((instr->flags & fIsMove) && !(instr->flags & fSideEffects)) {
if (PCODE_FLAG_SET_F(instr) & fRecordBit) {
CError_FATAL(309);
continue;
}
if (instr->argCount > 2) {
if (instr->argCount != 3 || instr->args[2].kind != PCOp_PLACEHOLDEROPERAND) {
CError_FATAL(316);
continue;
}
}
if (PC_OP_IS_ANY_REGISTER(&instr->args[0], coloring_class)) {
path1 = coalesced_path(instr->args[0].data.reg.reg);
path2 = coalesced_path(instr->args[1].data.reg.reg);
if (path1 == path2) {
deletepcode(instr);
continue;
}
if (!interferes(path1, path2)) {
if (path1 >= n_real_registers[coloring_class] && path2 >= n_real_registers[coloring_class]) {
if (path1 < first_fe_temporary_register[coloring_class])
continue;
if (path1 > last_temporary_register[coloring_class])
continue;
if (path2 < first_fe_temporary_register[coloring_class])
continue;
if (path2 > last_temporary_register[coloring_class])
continue;
}
node1 = (path2 < path1) ? path2 : path1;
node2 = (path2 > path1) ? path2 : path1;
if (coloring_class == RegClass_GPR && node2 == _CALLER_SP_)
continue;
coalesced[node2] = node1;
for (i = 0; i < regs; i++) {
if (interferes(node2, i))
makeinterfere(node1, i);
}
deletepcode(instr);
}
}
}
}
}
for (block = pcbasicblocks; block; block = block->nextBlock) {
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
op = instr->args;
i = instr->argCount;
while (i--) {
if (PC_OP_IS_ANY_REGISTER(op, coloring_class) && op->data.reg.reg != coalesced[op->data.reg.reg])
op->data.reg.reg = coalesced_path(op->data.reg.reg);
op++;
}
}
}
}
static void buildadjacencyvectors(void) {
IGNode *node;
UInt32 regs;
UInt32 i;
UInt32 counter;
short *array;
short *dest;
short *src;
UInt32 j;
regs = used_virtual_registers[coloring_class];
interferencegraph = oalloc(sizeof(IGNode *) * regs);
array = oalloc(sizeof(short) * regs);
for (i = 0; i < regs; i++) {
counter = 0;
for (j = 0; j < regs; j++) {
if (interferes(i, j))
array[counter++] = j;
}
node = interferencegraph[i] = oalloc(sizeof(IGNode) + sizeof(short) * (counter - 1));
memclrw(node, sizeof(IGNode) + sizeof(short) * (counter - 1));
node->x10 = i;
node->x14 = -1;
node->arraySize = counter;
node->x12 = counter;
dest = node->array;
src = array;
for (j = 0; j < counter; j++)
*(dest++) = *(src++);
if (i != coalesced[i]) {
node->flags |= fCoalesced;
j = coalesced_path(i);
interferencegraph[j]->flags |= fCoalescedInto;
node->x14 = j;
}
}
}
static void eliminatedeadcode(void) {
UInt32 regs;
PCodeBlock *block;
PCode *instr;
UInt32 *vec;
UInt32 i;
PCodeArg *op;
regs = used_virtual_registers[coloring_class];
vec = oalloc(4 * ((regs + 31) >> 5));
for (block = pcbasicblocks; block; block = block->nextBlock) {
bitvectorcopy(vec, liveinfo[block->blockIndex].out, regs);
for (instr = block->lastPCode; instr; instr = instr->prevPCode) {
if (dead(instr, coloring_class, vec)) {
deletepcode(instr);
continue;
}
op = instr->args;
i = instr->argCount;
while (i--) {
if (PC_OP_IS_WRITE_ANY_REGISTER(op, coloring_class))
bitvectorclearbit(op->data.reg.reg, vec);
op++;
}
op = instr->args;
i = instr->argCount;
while (i--) {
if (PC_OP_IS_READ_ANY_REGISTER(op, coloring_class)) {
int reg = op->data.reg.reg;
if (!bitvectorgetbit(reg, vec))
op->data.reg.effect |= Effect4;
bitvectorsetbit(reg, vec);
}
op++;
}
}
}
}
static void findrematerializations(void) {
UInt32 regs;
UInt32 i;
PCodeBlock *block;
PCode *instr;
PCodeArg *op;
IGNode *node;
regs = used_virtual_registers[coloring_class];
for (block = pcbasicblocks; block; block = block->nextBlock) {
for (instr = block->lastPCode; instr; instr = instr->prevPCode) {
op = instr->args;
i = instr->argCount;
while (i--) {
if (
PC_OP_IS_WRITE_ANY_REGISTER(op, coloring_class) &&
op->data.reg.reg >= n_real_registers[coloring_class] &&
!(interferencegraph[op->data.reg.reg]->flags & (fPairLow | fPairHigh)) &&
!(interferencegraph[op->data.reg.reg]->flags & fIGNode40)
)
{
node = interferencegraph[op->data.reg.reg];
if (!node->instr8) {
node->instr8 = instr;
} else {
node->instr8 = NULL;
node->flags |= fIGNode40;
}
}
op++;
}
}
}
for (i = 0; i < regs; i++) {
node = interferencegraph[i];
if (node->instr8 && !is_location_independent(node->instr8))
node->instr8 = NULL;
}
}
void buildinterferencegraph(Object *proc) {
int regs = used_virtual_registers[coloring_class];
computelivevariables(proc);
eliminatedeadcode();
buildinterferencematrix();
if (copts.debuglisting)
pclistinterferences(register_class_format[coloring_class], regs);
coalescenodes();
buildadjacencyvectors();
findrematerializations();
}

View File

@@ -0,0 +1,381 @@
#include "compiler/RegisterInfo.h"
#include "compiler/CodeGen.h"
#include "compiler/CError.h"
#include "compiler/CParser.h"
#include "compiler/PCode.h"
#include "compiler/CompilerTools.h"
#include "compiler/objects.h"
#include "compiler/types.h"
short last_exception_register[RegClassMax];
short first_fe_temporary_register[RegClassMax];
short last_argument_register[RegClassMax];
short _FP_;
short _CALLER_SP_;
char *special_register_names[RegClassMax][RegisterMax];
static short used_regs_before_coloring;
static UInt8 save_state[RegisterMax];
short spr_to_sysreg[4] = {1, 8, 9, 0x100};
void asm_used_register(RegClass rclass, short reg) {
int i;
if ((reg < n_real_registers[rclass]) && (reg_state[rclass][reg] == RegState0)) {
if (reg == nonvolatile_registers[rclass][used_nonvolatile_registers[rclass]]) {
if (assignable_registers[rclass] > 0)
assignable_registers[rclass]--;
reg_state[rclass][reg] = RegState1;
used_nonvolatile_registers[rclass]++;
} else {
for (i = used_nonvolatile_registers[rclass]; i < n_nonvolatile_registers[rclass]; i++) {
if (reg == nonvolatile_registers[rclass][i]) {
reg_state[rclass][reg] = RegState1;
if (assignable_registers[rclass] > 0)
assignable_registers[rclass]--;
}
}
}
}
}
void retain_register(Object *obj, RegClass rclass, short reg) {
VarInfo *vi;
CError_ASSERT(95, (short) reg < RegisterMax);
if (reg_state[rclass][reg] == RegState0) {
assignable_registers[rclass]--;
reg_state[rclass][reg] = RegState1;
if (reg == nonvolatile_registers[rclass][used_nonvolatile_registers[rclass]])
used_nonvolatile_registers[rclass]++;
}
if (obj) {
vi = Registers_GetVarInfo(obj);
vi->rclass = rclass;
vi->flags |= VarInfoFlag2;
vi->reg = reg;
}
}
void retain_GPR_pair(Object *obj, short reg, short regHi) {
VarInfo *vi;
retain_register(NULL, RegClass_GPR, reg);
retain_register(NULL, RegClass_GPR, regHi);
if (obj) {
vi = Registers_GetVarInfo(obj);
vi->rclass = RegClass_GPR;
vi->flags |= VarInfoFlag2 | VarInfoFlag4;
vi->reg = reg;
vi->regHi = regHi;
}
}
int is_register_object(Object *obj) {
return obj->sclass == TK_REGISTER;
}
int GetABIFirstNonVolatile(RegClass rclass) {
switch (rclass) {
case RegClass_SPR: return 3;
case RegClass_CRFIELD: return 2;
case RegClass_VR: return 20;
case RegClass_GPR: return 13;
case RegClass_FPR: return 14;
default: return -1;
}
}
char GetRegisterClassName(RegClass rclass) {
switch (rclass) {
case RegClass_VR: return 'v';
case RegClass_GPR: return 'r';
case RegClass_FPR: return 'f';
default:
CError_FATAL(242);
return '?';
}
}
static int first_nonvolatile_reg(RegClass rclass) {
return GetABIFirstNonVolatile(rclass);
}
void setup_diagnostic_reg_strings(void) {
register_class_name[RegClass_SPR] = "SPR";
register_class_format[RegClass_SPR] = "spr%" PRId32;
register_class_name[RegClass_CRFIELD] = "CRFIELD";
register_class_format[RegClass_CRFIELD] = "cr%" PRId32;
register_class_name[RegClass_VR] = "VR";
register_class_format[RegClass_VR] = "vr%" PRId32;
register_class_name[RegClass_FPR] = "FPR";
register_class_format[RegClass_FPR] = "f%" PRId32;
register_class_name[RegClass_GPR] = "GPR";
register_class_format[RegClass_GPR] = "r%" PRId32;
}
void init_target_registers(void) {
int reg;
int end;
RegClass rclass;
static int last_nonvolatile_reg[] = {3, 5, 31, 31, 31};
static int nonvol_reserve[] = {0, 0, 0, 4, 3};
for (rclass = 0; rclass < RegClassMax; rclass++) {
for (reg = 0; reg < RegisterMax; reg++)
special_register_names[rclass][reg] = NULL;
}
special_register_names[RegClass_SPR][0] = "XER";
special_register_names[RegClass_SPR][1] = "LR";
special_register_names[RegClass_SPR][2] = "CTR";
special_register_names[RegClass_SPR][3] = "VRSAVE";
special_register_names[RegClass_GPR][1] = "SP";
setup_diagnostic_reg_strings();
n_real_registers[RegClass_SPR] = 4;
n_real_registers[RegClass_CRFIELD] = 8;
n_real_registers[RegClass_VR] = 32;
n_real_registers[RegClass_FPR] = 32;
n_real_registers[RegClass_GPR] = 32;
reg_state[RegClass_GPR][1] = RegState2;
reg_state[RegClass_GPR][2] = RegState2;
reg_state[RegClass_CRFIELD][5] = RegState2;
for (rclass = 0; rclass < RegClassMax; rclass++) {
n_nonvolatile_registers[rclass] = 0;
if (last_nonvolatile_reg[rclass] >= 0) {
end = first_nonvolatile_reg(rclass);
for (reg = last_nonvolatile_reg[rclass]; reg >= end; reg--) {
if (reg_state[rclass][reg] == RegState0) {
nonvolatile_registers[rclass][n_nonvolatile_registers[rclass]++] = reg;
}
}
}
assignable_registers[rclass] = n_nonvolatile_registers[rclass] - nonvol_reserve[rclass];
if (assignable_registers[rclass] < 0)
assignable_registers[rclass] = 0;
n_scratch_registers[rclass] = 0;
for (reg = 0; reg < n_real_registers[rclass]; reg++) {
if (reg < GetABIFirstNonVolatile(rclass) || reg > last_nonvolatile_reg[rclass]) {
if (reg_state[rclass][reg] == RegState0) {
scratch_registers[rclass][n_scratch_registers[rclass]++] = reg;
}
}
}
}
_FP_ = -1;
_CALLER_SP_ = -1;
optimizing = (copts.optimizationlevel > 0) && !disable_optimizer;
}
void assign_register_by_type(Object *obj) {
VarInfo *vi;
Type *ty;
Boolean flag;
ty = obj->type;
vi = Registers_GetVarInfo(obj);
flag = 0;
vi->rclass = RegClassMax;
vi->reg = 0;
vi->regHi = 0;
if ((ty->type == TYPEINT) || (ty->type == TYPEENUM) || ((ty->type == TYPEPOINTER || ty->type == TYPEARRAY) && (ty->type != TYPEARRAY)) || ((ty->type == TYPEMEMBERPOINTER) && (ty->size == 4U))) {
if (((ty->type == TYPEINT) || (ty->type == TYPEENUM)) && (ty->size == 8))
flag = 1;
vi->rclass = RegClass_GPR;
} else if (ty->type == TYPEFLOAT) {
vi->rclass = RegClass_FPR;
} else if ((ty->type == TYPESTRUCT) && (TYPE_STRUCT(ty)->stype >= STRUCT_VECTOR_UCHAR) && (TYPE_STRUCT(ty)->stype <= STRUCT_VECTOR_PIXEL)) {
vi->rclass = RegClass_VR;
} else {
return;
}
if (vi->rclass < RegClassMax) {
if (flag) {
CError_ASSERT(520, vi->rclass == RegClass_GPR);
if (assignable_registers[vi->rclass] > 1)
assign_GPR_pair(obj);
} else {
if (assignable_registers[vi->rclass] > 0)
assign_register_to_variable(obj, vi->rclass);
}
}
}
void assign_GPR_pair(Object *obj) {
VarInfo *vi;
short reg;
short regHi;
vi = Registers_GetVarInfo(obj);
if (optimizing) {
reg = used_virtual_registers[RegClass_GPR]++;
regHi = used_virtual_registers[RegClass_GPR]++;
} else {
CError_ASSERT(554, assignable_registers[RegClass_GPR] >= 2);
reg = obtain_nonvolatile_register(RegClass_GPR);
regHi = obtain_nonvolatile_register(RegClass_GPR);
retain_GPR_pair(obj, reg, regHi);
}
vi->rclass = RegClass_GPR;
if (reg > 0 && regHi > 0) {
vi->flags |= VarInfoFlag2 | VarInfoFlag4;
vi->reg = reg;
vi->regHi = regHi;
} else {
CError_FATAL(567);
}
}
void open_fe_temp_registers(void) {
int r;
r = used_virtual_registers[RegClass_GPR];
first_fe_temporary_register[RegClass_GPR] = last_temporary_register[RegClass_GPR] = r;
r = used_virtual_registers[RegClass_FPR];
first_fe_temporary_register[RegClass_FPR] = last_temporary_register[RegClass_FPR] = r;
r = used_virtual_registers[RegClass_VR];
first_fe_temporary_register[RegClass_VR] = last_temporary_register[RegClass_VR] = r;
}
void set_last_exception_registers(void) {
last_exception_register[RegClass_GPR] = used_virtual_registers[RegClass_GPR] - 1;
last_exception_register[RegClass_FPR] = used_virtual_registers[RegClass_FPR] - 1;
last_exception_register[RegClass_VR] = used_virtual_registers[RegClass_VR] - 1;
}
static VarInfo *Registers_GetNewVarInfo(void) {
VarInfo *vi = galloc(sizeof(VarInfo));
memclrw(vi, sizeof(VarInfo));
return vi;
}
VarInfo *Registers_GetVarInfo(Object *obj) {
switch (obj->datatype) {
case DDATA:
if (!obj->u.data.info)
obj->u.data.info = Registers_GetNewVarInfo();
return obj->u.data.info;
case DNONLAZYPTR:
if (!obj->u.toc.info) {
CError_FATAL(639);
obj->u.toc.info = CodeGen_GetNewVarInfo();
}
return obj->u.toc.info;
case DLOCAL:
if (!obj->u.var.info)
CError_FATAL(647);
return obj->u.var.info;
case DABSOLUTE:
// not sure if this is the right union
if (!obj->u.data.info)
obj->u.data.info = Registers_GetNewVarInfo();
return obj->u.data.info;
default:
CError_FATAL(660);
return NULL;
}
}
int used_vrstate_VRs(void) {
int count = 0;
int i;
for (i = 0; i < RegisterMax; i++) {
if (reg_state[RegClass_VR][i])
count++;
}
return count;
}
UInt32 colored_vrs_as_vrsave(PCodeBlock *block) {
PCode *pc;
UInt32 mask;
int i;
mask = 0;
if (copts.altivec_vrsave == 2)
return 0xFFFFFFFF;
if (copts.altivec_vrsave == 0)
return 0;
while (block) {
for (pc = block->firstPCode; pc; pc = pc->nextPCode) {
if (pc->flags & fOpTypeVR) {
for (i = 0; i < pc->argCount; i++) {
if (pc->args[i].kind == PCOp_REGISTER && pc->args[i].arg == RegClass_VR)
mask |= 1 << (31 - pc->args[i].data.reg.reg);
}
}
}
block = block->nextBlock;
}
return mask;
}
void save_before_coloring_nonvolatile_registers(RegClass rclass) {
used_regs_before_coloring = used_nonvolatile_registers[rclass];
memcpy(save_state, reg_state[rclass], sizeof(save_state));
}
void reset_nonvolatile_registers(RegClass rclass) {
used_nonvolatile_registers[rclass] = used_regs_before_coloring;
memcpy(reg_state[rclass], save_state, sizeof(save_state));
}
int is_nonvolatile_register(RegClass rclass, int reg) {
int i;
for (i = 0; i < n_nonvolatile_registers[rclass]; i++) {
if (reg == nonvolatile_registers[rclass][i])
return 1;
}
return 0;
}
void init_endian(void) {
if (copts.littleendian) {
high_offset = 4;
low_offset = 0;
high_reg = 4;
low_reg = 3;
high_reg2 = 6;
low_reg2 = 5;
} else {
high_offset = 0;
low_offset = 4;
high_reg = 3;
low_reg = 4;
high_reg2 = 5;
low_reg2 = 6;
}
}
void update_asm_nonvolatile_registers(void) {
RegClass rclass;
int i;
int reg;
for (rclass = 0; rclass < RegClassMax; rclass++) {
reg = n_nonvolatile_registers[rclass];
for (i = n_nonvolatile_registers[rclass] - 1; i >= 0; i--) {
if (reg_state[rclass][nonvolatile_registers[rclass][i]] == RegState1)
break;
reg--;
}
if (reg > used_nonvolatile_registers[rclass])
used_nonvolatile_registers[rclass] = reg;
}
}

View File

@@ -0,0 +1,452 @@
#include "compiler/SpillCode.h"
#include "compiler/CError.h"
#include "compiler/CMachine.h"
#include "compiler/CParser.h"
#include "compiler/CodeGen.h"
#include "compiler/CompilerTools.h"
#include "compiler/Coloring.h"
#include "compiler/InterferenceGraph.h"
#include "compiler/Operands.h"
#include "compiler/PCode.h"
#include "compiler/PCodeUtilities.h"
#include "compiler/Registers.h"
#include "compiler/RegisterInfo.h"
#include "compiler/StackFrame.h"
#include "compiler/objects.h"
static int last_unused_vreg_before_spilling;
static short rTEMP_for_VR_spill;
void estimatespillcosts(void) {
PCodeBlock *block;
PCode *instr;
IGNode *node;
PCodeArg *op;
int i;
int weight;
for (block = pcbasicblocks; block; block = block->nextBlock) {
if (copts.optimizesize)
weight = 1;
else
weight = block->loopWeight;
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
op = instr->args;
i = instr->argCount;
while (i--) {
if (PC_OP_IS_READ_ANY_REGISTER(op, coloring_class)) {
node = interferencegraph[op->data.reg.reg];
if (node->instr8 || copts.optimizesize)
node->spillCost += weight;
else
node->spillCost += weight * 2;
}
op++;
}
op = instr->args;
i = instr->argCount;
while (i--) {
if (PC_OP_IS_WRITE_ANY_REGISTER(op, coloring_class)) {
node = interferencegraph[op->data.reg.reg];
if (node->instr8 || (instr->flags & fIsArgInit))
node->spillCost -= weight;
else
node->spillCost += weight;
}
op++;
}
}
}
}
static Object *makespilltemporary(Type *type) {
Object *obj = lalloc(sizeof(Object));
memclrw(obj, sizeof(Object));
obj->otype = OT_OBJECT;
obj->access = ACCESSPUBLIC;
obj->datatype = DLOCAL;
obj->type = type;
obj->name = CParser_GetUniqueName();
obj->u.var.info = CodeGen_GetNewVarInfo();
obj->u.var.uid = 0;
return obj;
}
static PCode *rematerialize_spilled_register(short reg, IGNode *node) {
PCode *instr = copypcode(node->instr8);
CError_ASSERT(128, instr->args[0].kind == PCOp_REGISTER);
instr->args[0].data.reg.reg = reg;
return instr;
}
static void insert_load_spilled_register(PCode *instr, short reg, IGNode *node) {
Type *type;
Opcode opcode;
Object *object;
PCode *newInstr;
PCode *newInstr2;
SInt32 offset;
Operand operand;
type = node->spillTemporary->type;
switch (coloring_class) {
case RegClass_CRFIELD:
case RegClass_GPR:
switch (type->size) {
case 1:
opcode = PC_LBZ;
break;
case 2:
opcode = is_unsigned(type) ? PC_LHZ : PC_LHA;
break;
case 4:
opcode = PC_LWZ;
break;
case 8:
opcode = PC_LWZ;
break;
default:
CError_FATAL(187);
}
memclrw(&operand, sizeof(Operand));
operand.optype = OpndType_Symbol;
operand.object = node->spillTemporary;
CError_ASSERT(222, node->spillTemporary->datatype == DLOCAL);
coerce_to_addressable(&operand);
CError_ASSERT(233, operand.optype == OpndType_GPR_ImmOffset);
CError_ASSERT(237, node->spillTemporary->datatype == DLOCAL);
if (node->flags & fPairLow)
offset = low_offset;
else if (node->flags & fPairHigh)
offset = high_offset;
else
offset = 0;
insertpcodebefore(instr, makepcode(opcode, reg, operand.reg, operand.object, operand.immOffset + offset));
break;
case RegClass_FPR:
CError_ASSERT(253, node->spillTemporary->datatype == DLOCAL);
if (node->flags & fPairLow)
offset = low_offset;
else if (node->flags & fPairHigh)
offset = high_offset;
else
offset = 0;
object = node->spillTemporary;
insertpcodebefore(
instr,
makepcode(
(type->size == 8) ? PC_LFD : PC_LFS,
reg,
local_base_register(object),
object,
offset
)
);
break;
case RegClass_VR:
CError_ASSERT(320, node->spillTemporary->datatype == DLOCAL);
object = node->spillTemporary;
newInstr = makepcode(PC_ADDI, rTEMP_for_VR_spill, local_base_register(object), object, 0);
newInstr2 = makepcode(PC_LVX, reg, 0, rTEMP_for_VR_spill);
insertpcodebefore(instr, newInstr);
insertpcodeafter(newInstr, newInstr2);
break;
default:
CError_FATAL(333);
}
}
static void insert_store_spilled_register(PCode *instr, Boolean flag, short reg, IGNode *node) {
Object *object; // r31
Opcode opcode; // r30
SInt32 offset; // r26
PCode *newInstr2; // r26
PCode *newInstr; // r25
Type *type; // r25
object = node->spillTemporary;
type = object->type;
switch (coloring_class) {
case RegClass_CRFIELD:
case RegClass_GPR:
switch (type->size) {
case 1:
opcode = PC_STB;
break;
case 2:
opcode = PC_STH;
break;
case 4:
opcode = PC_STW;
break;
case 8:
opcode = PC_STW;
break;
default:
CError_FATAL(391);
}
if (node->flags & fPairLow)
offset = low_offset;
else if (node->flags & fPairHigh)
offset = high_offset;
else
offset = 0;
newInstr = makepcode(opcode, reg, local_base_register(object), object, offset);
if (flag)
insertpcodebefore(instr, newInstr);
else
insertpcodeafter(instr, newInstr);
break;
case RegClass_FPR:
newInstr = makepcode((type->size == 8) ? PC_STFD : PC_STFS, reg, local_base_register(object), object, 0);
if (flag)
insertpcodebefore(instr, newInstr);
else
insertpcodeafter(instr, newInstr);
break;
case RegClass_VR:
newInstr = makepcode(PC_ADDI, rTEMP_for_VR_spill, local_base_register(object), object, 0);
newInstr2 = makepcode(PC_STVX, reg, 0, rTEMP_for_VR_spill);
if (flag)
insertpcodebefore(instr, newInstr);
else
insertpcodeafter(instr, newInstr);
insertpcodeafter(newInstr, newInstr2);
break;
default:
CError_FATAL(527);
}
}
static void spillinstruction(PCodeBlock *block, PCode *instr) {
int reg;
int reg2;
int regs;
IGNode *node;
PCodeArg *op;
int i;
PCodeArg *op2;
int j;
int readCounter;
int writeCounter;
Boolean flag;
regs = used_virtual_registers[coloring_class];
flag = 0;
for (i = 0, op = instr->args; i < instr->argCount; i++, op++) {
CError_ASSERT(563, instr->block != NULL);
if (
PC_OP_IS_ANY_REGISTER(op, coloring_class) &&
(reg = op->data.reg.reg) < regs &&
((node = interferencegraph[op->data.reg.reg])->flags & fSpilled)
)
{
reg2 = used_virtual_registers[coloring_class]++;
readCounter = 0;
writeCounter = 0;
for (j = i, op2 = op; j < instr->argCount; j++, op2++) {
if (PC_OP_IS_REGISTER(op2, coloring_class, reg)) {
if (op2->data.reg.effect & EffectRead)
readCounter++;
if (op2->data.reg.effect & EffectWrite)
writeCounter++;
op2->data.reg.reg = reg2;
op2->data.reg.effect |= Effect40;
}
}
if (readCounter) {
if (node->instr8)
insertpcodebefore(instr, rematerialize_spilled_register(reg2, node));
else
insert_load_spilled_register(instr, reg2, node);
}
if (writeCounter) {
if (node->instr8 || (instr->flags & fIsArgInit))
flag = 1;
else
insert_store_spilled_register(instr, 0, reg2, node);
}
}
}
if (flag)
deletepcode(instr);
}
static void spillcopy(PCodeBlock *block, PCode *instr) {
IGNode *node1;
IGNode *node2;
int reg;
node1 = interferencegraph[instr->args[1].data.reg.reg];
node2 = interferencegraph[instr->args[0].data.reg.reg];
if (node1->flags & fSpilled) {
if (node2->flags & fSpilled) {
reg = used_virtual_registers[coloring_class]++;
if (node1->instr8)
insertpcodebefore(instr, rematerialize_spilled_register(reg, node1));
else
insert_load_spilled_register(instr, reg, node1);
insert_store_spilled_register(instr, 1, reg, node2);
} else {
if (node1->instr8)
insertpcodebefore(instr, rematerialize_spilled_register(instr->args[0].data.reg.reg, node1));
else
insert_load_spilled_register(instr, instr->args[0].data.reg.reg, node1);
}
} else {
insert_store_spilled_register(instr, 1, instr->args[1].data.reg.reg, node2);
}
deletepcode(instr);
}
static void spillcall(PCodeBlock *block, PCode *instr) {
PCodeArg *opSrc;
PCodeArg *opDst;
int opCount;
int volatileCount;
int i;
opCount = instr->argCount;
volatileCount = branch_count_volatiles();
opDst = instr->args + volatileCount;
opSrc = instr->args + volatileCount;
for (i = volatileCount; i < opCount; i++) {
if (
PC_OP_IS_ANY_REGISTER(opSrc, coloring_class) &&
opSrc->data.reg.reg >= n_real_registers[coloring_class] &&
(interferencegraph[opSrc->data.reg.reg]->flags & fSpilled)
)
{
instr->argCount--;
} else {
*opDst = *opSrc;
opDst++;
}
opSrc++;
}
spillinstruction(block, instr);
}
static void assign_spill_locations(void) {
UInt32 i;
IGNode *node;
Type *type;
last_unused_vreg_before_spilling = used_virtual_registers[coloring_class];
for (i = n_real_registers[coloring_class]; i < last_unused_vreg_before_spilling; i++) {
node = interferencegraph[i];
if (node->flags & fCoalesced)
continue;
if (!(node->flags & fSpilled))
continue;
if (!node->spillTemporary) {
switch (coloring_class) {
case RegClass_GPR:
type = TYPE(&stunsignedlong);
break;
case RegClass_CRFIELD:
type = TYPE(&stunsignedlong);
break;
case RegClass_FPR:
type = TYPE(&stunsignedlong);
break;
case RegClass_VR:
type = TYPE(&stvectorunsignedchar);
break;
default:
CError_FATAL(771);
}
node->spillTemporary = makespilltemporary(type);
}
if (node->spillTemporary->datatype == DLOCAL && !(node->spillTemporary->u.var.info->flags & VarInfoFlag1))
assign_local_memory(node->spillTemporary);
if (node->flags & fPairHigh)
Registers_GetVarInfo(node->spillTemporary)->regHi = Register0;
else
Registers_GetVarInfo(node->spillTemporary)->reg = Register0;
}
}
void insertspillcode(void) {
PCodeBlock *block;
PCode *instr;
PCode *nextInstr;
PCodeArg *op;
UInt32 i;
int flag;
rTEMP_for_VR_spill = 0;
assign_spill_locations();
for (block = pcbasicblocks; block; block = block->nextBlock) {
for (instr = block->firstPCode; instr; instr = nextInstr) {
nextInstr = instr->nextPCode;
flag = 0;
op = instr->args;
i = instr->argCount;
while (i--) {
if (
PC_OP_IS_ANY_REGISTER(op, coloring_class) &&
op->data.reg.reg < last_unused_vreg_before_spilling &&
(interferencegraph[op->data.reg.reg]->flags & fSpilled)
)
{
flag = 1;
break;
}
op++;
}
if (flag) {
if (coloring_class == RegClass_VR && rTEMP_for_VR_spill == 0)
rTEMP_for_VR_spill = used_virtual_registers[RegClass_GPR]++;
if (instr->flags & fIsMove)
spillcopy(block, instr);
else if (instr->flags & fIsCall)
spillcall(block, instr);
else
spillinstruction(block, instr);
}
}
}
}

View File

@@ -0,0 +1,552 @@
#include "compiler/Scheduler.h"
#include "compiler/PCode.h"
#include "compiler/PCodeInfo.h"
// https://stuff.mit.edu/afs/sipb/contrib/doc/specs/ic/cpu/powerpc/mpc601.pdf
// https://www.nxp.com/docs/en/user-guide/MPC601UMAD.pdf
typedef enum Stage {
IU, // Integer Unit
FD, // FP Decode
FPM, // FP Multiply
FPA, // FP Add
FWA, // FP Arithmetic Writeback
BPU, // Branch Processing Unit
NumStages,
Serialize, // special form for instructions that use IU but are serialised
Unsupported // instructions not supported by this processor
} Stage;
static struct {
// the instruction currently in this pipeline stage
PCode *instr;
// how many cycles are left for this instruction to finish
int remaining;
} pipeline[NumStages];
static struct {
// the initial stage for this instruction
Stage stage;
// the total amount of cycles required by this instruction
char latency;
// how long it takes to finish each stage
char cycles[4];
} instruction_timing[OPCODE_MAX] = {
BPU, 0, 0, 0, 0, 1, // PC_B
BPU, 0, 0, 0, 0, 1, // PC_BL
BPU, 0, 0, 0, 0, 1, // PC_BC
BPU, 0, 0, 0, 0, 1, // PC_BCLR
BPU, 0, 0, 0, 0, 1, // PC_BCCTR
BPU, 0, 0, 0, 0, 1, // PC_BT
BPU, 0, 0, 0, 0, 1, // PC_BTLR
BPU, 0, 0, 0, 0, 1, // PC_BTCTR
BPU, 0, 0, 0, 0, 1, // PC_BF
BPU, 0, 0, 0, 0, 1, // PC_BFLR
BPU, 0, 0, 0, 0, 1, // PC_BFCTR
BPU, 0, 0, 0, 0, 1, // PC_BDNZ
BPU, 0, 0, 0, 0, 1, // PC_BDNZT
BPU, 0, 0, 0, 0, 1, // PC_BDNZF
BPU, 0, 0, 0, 0, 1, // PC_BDZ
BPU, 0, 0, 0, 0, 1, // PC_BDZT
BPU, 0, 0, 0, 0, 1, // PC_BDZF
BPU, 0, 0, 0, 0, 1, // PC_BLR
BPU, 0, 0, 0, 0, 1, // PC_BCTR
BPU, 0, 0, 0, 0, 1, // PC_BCTRL
BPU, 0, 0, 0, 0, 1, // PC_BLRL
IU, 2, 1, 0, 0, 0, // PC_LBZ
IU, 2, 1, 0, 0, 0, // PC_LBZU
IU, 2, 1, 0, 0, 0, // PC_LBZX
IU, 2, 1, 0, 0, 0, // PC_LBZUX
IU, 2, 1, 0, 0, 0, // PC_LHZ
IU, 2, 1, 0, 0, 0, // PC_LHZU
IU, 2, 1, 0, 0, 0, // PC_LHZX
IU, 2, 1, 0, 0, 0, // PC_LHZUX
IU, 2, 1, 0, 0, 0, // PC_LHA
IU, 2, 1, 0, 0, 0, // PC_LHAU
IU, 2, 1, 0, 0, 0, // PC_LHAX
IU, 2, 1, 0, 0, 0, // PC_LHAUX
IU, 2, 1, 0, 0, 0, // PC_LHBRX
IU, 2, 1, 0, 0, 0, // PC_LWZ
IU, 2, 1, 0, 0, 0, // PC_LWZU
IU, 2, 1, 0, 0, 0, // PC_LWZX
IU, 2, 1, 0, 0, 0, // PC_LWZUX
IU, 2, 1, 0, 0, 0, // PC_LWBRX
IU, 1, 1, 0, 0, 0, // PC_LMW
IU, 1, 1, 0, 0, 0, // PC_STB
IU, 1, 1, 0, 0, 0, // PC_STBU
IU, 1, 1, 0, 0, 0, // PC_STBX
IU, 1, 1, 0, 0, 0, // PC_STBUX
IU, 1, 1, 0, 0, 0, // PC_STH
IU, 1, 1, 0, 0, 0, // PC_STHU
IU, 1, 1, 0, 0, 0, // PC_STHX
IU, 1, 1, 0, 0, 0, // PC_STHUX
IU, 1, 1, 0, 0, 0, // PC_STHBRX
IU, 1, 1, 0, 0, 0, // PC_STW
IU, 1, 1, 0, 0, 0, // PC_STWU
IU, 1, 1, 0, 0, 0, // PC_STWX
IU, 1, 1, 0, 0, 0, // PC_STWUX
IU, 1, 1, 0, 0, 0, // PC_STWBRX
IU, 1, 1, 0, 0, 0, // PC_STMW
IU, 2, 1, 0, 0, 0, // PC_DCBF
IU, 2, 1, 0, 0, 0, // PC_DCBST
IU, 2, 1, 0, 0, 0, // PC_DCBT
IU, 2, 1, 0, 0, 0, // PC_DCBTST
IU, 2, 1, 0, 0, 0, // PC_DCBZ
IU, 1, 1, 0, 0, 0, // PC_ADD
IU, 1, 1, 0, 0, 0, // PC_ADDC
IU, 1, 1, 0, 0, 0, // PC_ADDE
IU, 1, 1, 0, 0, 0, // PC_ADDI
IU, 1, 1, 0, 0, 0, // PC_ADDIC
IU, 1, 1, 0, 0, 0, // PC_ADDICR
IU, 1, 1, 0, 0, 0, // PC_ADDIS
IU, 1, 1, 0, 0, 0, // PC_ADDME
IU, 1, 1, 0, 0, 0, // PC_ADDZE
IU, 36, 36, 0, 0, 0, // PC_DIVW
IU, 36, 36, 0, 0, 0, // PC_DIVWU
IU, 5, 5, 0, 0, 0, // PC_MULHW
IU, 5, 5, 0, 0, 0, // PC_MULHWU
IU, 5, 5, 0, 0, 0, // PC_MULLI
IU, 5, 5, 0, 0, 0, // PC_MULLW
IU, 1, 1, 0, 0, 0, // PC_NEG
IU, 1, 1, 0, 0, 0, // PC_SUBF
IU, 1, 1, 0, 0, 0, // PC_SUBFC
IU, 1, 1, 0, 0, 0, // PC_SUBFE
IU, 1, 1, 0, 0, 0, // PC_SUBFIC
IU, 1, 1, 0, 0, 0, // PC_SUBFME
IU, 1, 1, 0, 0, 0, // PC_SUBFZE
IU, 3, 1, 0, 0, 0, // PC_CMPI
IU, 3, 1, 0, 0, 0, // PC_CMP
IU, 3, 1, 0, 0, 0, // PC_CMPLI
IU, 3, 1, 0, 0, 0, // PC_CMPL
IU, 1, 1, 0, 0, 0, // PC_ANDI
IU, 1, 1, 0, 0, 0, // PC_ANDIS
IU, 1, 1, 0, 0, 0, // PC_ORI
IU, 1, 1, 0, 0, 0, // PC_ORIS
IU, 1, 1, 0, 0, 0, // PC_XORI
IU, 1, 1, 0, 0, 0, // PC_XORIS
IU, 1, 1, 0, 0, 0, // PC_AND
IU, 1, 1, 0, 0, 0, // PC_OR
IU, 1, 1, 0, 0, 0, // PC_XOR
IU, 1, 1, 0, 0, 0, // PC_NAND
IU, 1, 1, 0, 0, 0, // PC_NOR
IU, 1, 1, 0, 0, 0, // PC_EQV
IU, 1, 1, 0, 0, 0, // PC_ANDC
IU, 1, 1, 0, 0, 0, // PC_ORC
IU, 1, 1, 0, 0, 0, // PC_EXTSB
IU, 1, 1, 0, 0, 0, // PC_EXTSH
IU, 1, 1, 0, 0, 0, // PC_CNTLZW
IU, 1, 1, 0, 0, 0, // PC_RLWINM
IU, 1, 1, 0, 0, 0, // PC_RLWNM
IU, 1, 1, 0, 0, 0, // PC_RLWIMI
IU, 1, 1, 0, 0, 0, // PC_SLW
IU, 1, 1, 0, 0, 0, // PC_SRW
IU, 1, 1, 0, 0, 0, // PC_SRAWI
IU, 1, 1, 0, 0, 0, // PC_SRAW
IU, 1, 1, 0, 0, 0, // PC_CRAND
IU, 1, 1, 0, 0, 0, // PC_CRANDC
IU, 1, 1, 0, 0, 0, // PC_CREQV
IU, 1, 1, 0, 0, 0, // PC_CRNAND
IU, 1, 1, 0, 0, 0, // PC_CRNOR
IU, 1, 1, 0, 0, 0, // PC_CROR
IU, 1, 1, 0, 0, 0, // PC_CRORC
IU, 1, 1, 0, 0, 0, // PC_CRXOR
IU, 1, 1, 0, 0, 0, // PC_MCRF
IU, 4, 1, 0, 0, 0, // PC_MTXER
IU, 4, 1, 0, 0, 0, // PC_MTCTR
IU, 4, 1, 0, 0, 0, // PC_MTLR
IU, 2, 1, 0, 0, 0, // PC_MTCRF
IU, 1, 0, 0, 0, 0, // PC_MTMSR
IU, 1, 0, 0, 0, 0, // PC_MTSPR
IU, 1, 0, 0, 0, 0, // PC_MFMSR
IU, 1, 0, 0, 0, 0, // PC_MFSPR
IU, 1, 1, 0, 0, 0, // PC_MFXER
IU, 1, 1, 0, 0, 0, // PC_MFCTR
IU, 1, 1, 0, 0, 0, // PC_MFLR
IU, 1, 1, 0, 0, 0, // PC_MFCR
FD, 4, 1, 1, 1, 1, // PC_MFFS
FD, 4, 1, 1, 1, 1, // PC_MTFSF
Serialize, 1, 1, 0, 0, 1, // PC_EIEIO
Serialize, 1, 1, 0, 0, 1, // PC_ISYNC
Serialize, 1, 1, 0, 0, 1, // PC_SYNC
Serialize, 0, 0, 0, 0, 1, // PC_RFI
IU, 1, 1, 0, 0, 0, // PC_LI
IU, 1, 1, 0, 0, 0, // PC_LIS
IU, 1, 1, 0, 0, 0, // PC_MR
IU, 1, 1, 0, 0, 0, // PC_NOP
IU, 1, 1, 0, 0, 0, // PC_NOT
IU, 3, 1, 0, 0, 0, // PC_LFS
IU, 3, 1, 0, 0, 0, // PC_LFSU
IU, 3, 1, 0, 0, 0, // PC_LFSX
IU, 3, 1, 0, 0, 0, // PC_LFSUX
IU, 3, 1, 0, 0, 0, // PC_LFD
IU, 3, 1, 0, 0, 0, // PC_LFDU
IU, 3, 1, 0, 0, 0, // PC_LFDX
IU, 3, 1, 0, 0, 0, // PC_LFDUX
IU, 1, 1, 0, 0, 0, // PC_STFS
IU, 1, 1, 0, 0, 0, // PC_STFSU
IU, 1, 1, 0, 0, 0, // PC_STFSX
IU, 1, 1, 0, 0, 0, // PC_STFSUX
IU, 1, 1, 0, 0, 0, // PC_STFD
IU, 1, 1, 0, 0, 0, // PC_STFDU
IU, 1, 1, 0, 0, 0, // PC_STFDX
IU, 1, 1, 0, 0, 0, // PC_STFDUX
FD, 4, 1, 1, 1, 1, // PC_FMR
FD, 4, 1, 1, 1, 1, // PC_FABS
FD, 4, 1, 1, 1, 1, // PC_FNEG
FD, 4, 1, 1, 1, 1, // PC_FNABS
FD, 4, 1, 1, 1, 1, // PC_FADD
FD, 4, 1, 1, 1, 1, // PC_FADDS
FD, 4, 1, 1, 1, 1, // PC_FSUB
FD, 4, 1, 1, 1, 1, // PC_FSUBS
FD, 5, 1, 1, 2, 1, // PC_FMUL
FD, 4, 1, 1, 1, 1, // PC_FMULS
FD, 31, 1, 1, 28, 1, // PC_FDIV
FD, 17, 1, 1, 14, 1, // PC_FDIVS
FD, 5, 1, 1, 2, 1, // PC_FMADD
FD, 4, 1, 1, 1, 1, // PC_FMADDS
FD, 5, 1, 1, 2, 1, // PC_FMSUB
FD, 4, 1, 1, 1, 1, // PC_FMSUBS
FD, 5, 1, 1, 2, 1, // PC_FNMADD
FD, 4, 1, 1, 1, 1, // PC_FNMADDS
FD, 5, 1, 1, 2, 1, // PC_FNMSUB
FD, 4, 1, 1, 1, 1, // PC_FNMSUBS
FD, 4, 1, 1, 1, 1, // PC_FRES
FD, 4, 1, 1, 1, 1, // PC_FRSQRTE
FD, 4, 1, 1, 1, 1, // PC_FSEL
FD, 4, 1, 1, 1, 1, // PC_FRSP
FD, 4, 1, 1, 1, 1, // PC_FCTIW
FD, 4, 1, 1, 1, 1, // PC_FCTIWZ
FD, 6, 1, 1, 1, 1, // PC_FCMPU
FD, 6, 1, 1, 1, 1, // PC_FCMPO
IU, 0, 0, 0, 0, 0, // PC_LWARX
IU, 0, 0, 0, 0, 0, // PC_LSWI
IU, 0, 0, 0, 0, 0, // PC_LSWX
IU, 0, 0, 0, 0, 0, // PC_STFIWX
IU, 0, 0, 0, 0, 0, // PC_STSWI
IU, 0, 0, 0, 0, 0, // PC_STSWX
IU, 0, 0, 0, 0, 0, // PC_STWCX
IU, 0, 0, 0, 0, 0, // PC_ECIWX
IU, 0, 0, 0, 0, 0, // PC_ECOWX
IU, 0, 0, 0, 0, 0, // PC_DCBI
IU, 0, 0, 0, 0, 0, // PC_ICBI
IU, 0, 0, 0, 0, 0, // PC_MCRFS
IU, 0, 0, 0, 0, 0, // PC_MCRXR
IU, 0, 0, 0, 0, 0, // PC_MFTB
IU, 0, 0, 0, 0, 0, // PC_MFSR
IU, 0, 0, 0, 0, 0, // PC_MTSR
IU, 0, 0, 0, 0, 0, // PC_MFSRIN
IU, 0, 0, 0, 0, 0, // PC_MTSRIN
IU, 0, 0, 0, 0, 0, // PC_MTFSB0
IU, 0, 0, 0, 0, 0, // PC_MTFSB1
IU, 0, 0, 0, 0, 0, // PC_MTFSFI
Serialize, 0, 0, 0, 0, 0, // PC_SC
IU, 0, 0, 0, 0, 0, // PC_FSQRT
IU, 0, 0, 0, 0, 0, // PC_FSQRTS
IU, 0, 0, 0, 0, 0, // PC_TLBIA
IU, 0, 0, 0, 0, 0, // PC_TLBIE
IU, 0, 0, 0, 0, 0, // PC_TLBLD
IU, 0, 0, 0, 0, 0, // PC_TLBLI
IU, 0, 0, 0, 0, 0, // PC_TLBSYNC
Serialize, 0, 0, 0, 0, 0, // PC_TW
Serialize, 0, 0, 0, 0, 0, // PC_TRAP
Serialize, 0, 0, 0, 0, 0, // PC_TWI
Serialize, 0, 0, 0, 0, 0, // PC_OPWORD
IU, 0, 0, 0, 0, 0, // PC_MFROM
IU, 0, 0, 0, 0, 0, // PC_DSA
IU, 0, 0, 0, 0, 0, // PC_ESA
IU, 0, 0, 0, 0, 0, // PC_DCCCI
IU, 0, 0, 0, 0, 0, // PC_DCREAD
IU, 0, 0, 0, 0, 0, // PC_ICBT
IU, 0, 0, 0, 0, 0, // PC_ICCCI
IU, 0, 0, 0, 0, 0, // PC_ICREAD
IU, 0, 0, 0, 0, 0, // PC_RFCI
IU, 0, 0, 0, 0, 0, // PC_TLBRE
IU, 0, 0, 0, 0, 0, // PC_TLBSX
IU, 0, 0, 0, 0, 0, // PC_TLBWE
IU, 0, 0, 0, 0, 0, // PC_WRTEE
IU, 0, 0, 0, 0, 0, // PC_WRTEEI
IU, 0, 0, 0, 0, 0, // PC_MFDCR
IU, 0, 0, 0, 0, 0, // PC_MTDCR
Unsupported, 0, 0, 0, 0, 0, // PC_DCBA
Unsupported, 0, 0, 0, 0, 0, // PC_DSS
Unsupported, 0, 0, 0, 0, 0, // PC_DSSALL
Unsupported, 0, 0, 0, 0, 0, // PC_DST
Unsupported, 0, 0, 0, 0, 0, // PC_DSTT
Unsupported, 0, 0, 0, 0, 0, // PC_DSTST
Unsupported, 0, 0, 0, 0, 0, // PC_DSTSTT
Unsupported, 0, 0, 0, 0, 0, // PC_LVEBX
Unsupported, 0, 0, 0, 0, 0, // PC_LVEHX
Unsupported, 0, 0, 0, 0, 0, // PC_LVEWX
Unsupported, 0, 0, 0, 0, 0, // PC_LVSL
Unsupported, 0, 0, 0, 0, 0, // PC_LVSR
Unsupported, 0, 0, 0, 0, 0, // PC_LVX
Unsupported, 0, 0, 0, 0, 0, // PC_LVXL
Unsupported, 0, 0, 0, 0, 0, // PC_STVEBX
Unsupported, 0, 0, 0, 0, 0, // PC_STVEHX
Unsupported, 0, 0, 0, 0, 0, // PC_STVEWX
Unsupported, 0, 0, 0, 0, 0, // PC_STVX
Unsupported, 0, 0, 0, 0, 0, // PC_STVXL
Unsupported, 0, 0, 0, 0, 0, // PC_MFVSCR
Unsupported, 0, 0, 0, 0, 0, // PC_MTVSCR
Unsupported, 0, 0, 0, 0, 0, // PC_VADDCUW
Unsupported, 0, 0, 0, 0, 0, // PC_VADDFP
Unsupported, 0, 0, 0, 0, 0, // PC_VADDSBS
Unsupported, 0, 0, 0, 0, 0, // PC_VADDSHS
Unsupported, 0, 0, 0, 0, 0, // PC_VADDSWS
Unsupported, 0, 0, 0, 0, 0, // PC_VADDUBM
Unsupported, 0, 0, 0, 0, 0, // PC_VADDUBS
Unsupported, 0, 0, 0, 0, 0, // PC_VADDUHM
Unsupported, 0, 0, 0, 0, 0, // PC_VADDUHS
Unsupported, 0, 0, 0, 0, 0, // PC_VADDUWM
Unsupported, 0, 0, 0, 0, 0, // PC_VADDUWS
Unsupported, 0, 0, 0, 0, 0, // PC_VAND
Unsupported, 0, 0, 0, 0, 0, // PC_VANDC
Unsupported, 0, 0, 0, 0, 0, // PC_VAVGSB
Unsupported, 0, 0, 0, 0, 0, // PC_VAVGSH
Unsupported, 0, 0, 0, 0, 0, // PC_VAVGSW
Unsupported, 0, 0, 0, 0, 0, // PC_VAVGUB
Unsupported, 0, 0, 0, 0, 0, // PC_VAVGUH
Unsupported, 0, 0, 0, 0, 0, // PC_VAVGUW
Unsupported, 0, 0, 0, 0, 0, // PC_VCFSX
Unsupported, 0, 0, 0, 0, 0, // PC_VCFUX
Unsupported, 0, 0, 0, 0, 0, // PC_VCMPBFP
Unsupported, 0, 0, 0, 0, 0, // PC_VCMPEQFP
Unsupported, 0, 0, 0, 0, 0, // PC_VCMPEQUB
Unsupported, 0, 0, 0, 0, 0, // PC_VCMPEQUH
Unsupported, 0, 0, 0, 0, 0, // PC_VCMPEQUW
Unsupported, 0, 0, 0, 0, 0, // PC_VCMPGEFP
Unsupported, 0, 0, 0, 0, 0, // PC_VCMPGTFP
Unsupported, 0, 0, 0, 0, 0, // PC_VCMPGTSB
Unsupported, 0, 0, 0, 0, 0, // PC_VCMPGTSH
Unsupported, 0, 0, 0, 0, 0, // PC_VCMPGTSW
Unsupported, 0, 0, 0, 0, 0, // PC_VCMPGTUB
Unsupported, 0, 0, 0, 0, 0, // PC_VCMPGTUH
Unsupported, 0, 0, 0, 0, 0, // PC_VCMPGTUW
Unsupported, 0, 0, 0, 0, 0, // PC_VCTSXS
Unsupported, 0, 0, 0, 0, 0, // PC_VCTUXS
Unsupported, 0, 0, 0, 0, 0, // PC_VEXPTEFP
Unsupported, 0, 0, 0, 0, 0, // PC_VLOGEFP
Unsupported, 0, 0, 0, 0, 0, // PC_VMAXFP
Unsupported, 0, 0, 0, 0, 0, // PC_VMAXSB
Unsupported, 0, 0, 0, 0, 0, // PC_VMAXSH
Unsupported, 0, 0, 0, 0, 0, // PC_VMAXSW
Unsupported, 0, 0, 0, 0, 0, // PC_VMAXUB
Unsupported, 0, 0, 0, 0, 0, // PC_VMAXUH
Unsupported, 0, 0, 0, 0, 0, // PC_VMAXUW
Unsupported, 0, 0, 0, 0, 0, // PC_VMINFP
Unsupported, 0, 0, 0, 0, 0, // PC_VMINSB
Unsupported, 0, 0, 0, 0, 0, // PC_VMINSH
Unsupported, 0, 0, 0, 0, 0, // PC_VMINSW
Unsupported, 0, 0, 0, 0, 0, // PC_VMINUB
Unsupported, 0, 0, 0, 0, 0, // PC_VMINUH
Unsupported, 0, 0, 0, 0, 0, // PC_VMINUW
Unsupported, 0, 0, 0, 0, 0, // PC_VMRGHB
Unsupported, 0, 0, 0, 0, 0, // PC_VMRGHH
Unsupported, 0, 0, 0, 0, 0, // PC_VMRGHW
Unsupported, 0, 0, 0, 0, 0, // PC_VMRGLB
Unsupported, 0, 0, 0, 0, 0, // PC_VMRGLH
Unsupported, 0, 0, 0, 0, 0, // PC_VMRGLW
Unsupported, 0, 0, 0, 0, 0, // PC_VMULESB
Unsupported, 0, 0, 0, 0, 0, // PC_VMULESH
Unsupported, 0, 0, 0, 0, 0, // PC_VMULEUB
Unsupported, 0, 0, 0, 0, 0, // PC_VMULEUH
Unsupported, 0, 0, 0, 0, 0, // PC_VMULOSB
Unsupported, 0, 0, 0, 0, 0, // PC_VMULOSH
Unsupported, 0, 0, 0, 0, 0, // PC_VMULOUB
Unsupported, 0, 0, 0, 0, 0, // PC_VMULOUH
Unsupported, 0, 0, 0, 0, 0, // PC_VNOR
Unsupported, 0, 0, 0, 0, 0, // PC_VOR
Unsupported, 0, 0, 0, 0, 0, // PC_VPKPX
Unsupported, 0, 0, 0, 0, 0, // PC_VPKSHSS
Unsupported, 0, 0, 0, 0, 0, // PC_VPKSHUS
Unsupported, 0, 0, 0, 0, 0, // PC_VPKSWSS
Unsupported, 0, 0, 0, 0, 0, // PC_VPKSWUS
Unsupported, 0, 0, 0, 0, 0, // PC_VPKUHUM
Unsupported, 0, 0, 0, 0, 0, // PC_VPKUHUS
Unsupported, 0, 0, 0, 0, 0, // PC_VPKUWUM
Unsupported, 0, 0, 0, 0, 0, // PC_VPKUWUS
Unsupported, 0, 0, 0, 0, 0, // PC_VREFP
Unsupported, 0, 0, 0, 0, 0, // PC_VRFIM
Unsupported, 0, 0, 0, 0, 0, // PC_VRFIN
Unsupported, 0, 0, 0, 0, 0, // PC_VRFIP
Unsupported, 0, 0, 0, 0, 0, // PC_VRFIZ
Unsupported, 0, 0, 0, 0, 0, // PC_VRLB
Unsupported, 0, 0, 0, 0, 0, // PC_VRLH
Unsupported, 0, 0, 0, 0, 0, // PC_VRLW
Unsupported, 0, 0, 0, 0, 0, // PC_VRSQRTEFP
Unsupported, 0, 0, 0, 0, 0, // PC_VSL
Unsupported, 0, 0, 0, 0, 0, // PC_VSLB
Unsupported, 0, 0, 0, 0, 0, // PC_VSLH
Unsupported, 0, 0, 0, 0, 0, // PC_VSLO
Unsupported, 0, 0, 0, 0, 0, // PC_VSLW
Unsupported, 0, 0, 0, 0, 0, // PC_VSPLTB
Unsupported, 0, 0, 0, 0, 0, // PC_VSPLTH
Unsupported, 0, 0, 0, 0, 0, // PC_VSPLTW
Unsupported, 0, 0, 0, 0, 0, // PC_VSPLTISB
Unsupported, 0, 0, 0, 0, 0, // PC_VSPLTISH
Unsupported, 0, 0, 0, 0, 0, // PC_VSPLTISW
Unsupported, 0, 0, 0, 0, 0, // PC_VSR
Unsupported, 0, 0, 0, 0, 0, // PC_VSRAB
Unsupported, 0, 0, 0, 0, 0, // PC_VSRAH
Unsupported, 0, 0, 0, 0, 0, // PC_VSRAW
Unsupported, 0, 0, 0, 0, 0, // PC_VSRB
Unsupported, 0, 0, 0, 0, 0, // PC_VSRH
Unsupported, 0, 0, 0, 0, 0, // PC_VSRO
Unsupported, 0, 0, 0, 0, 0, // PC_VSRW
Unsupported, 0, 0, 0, 0, 0, // PC_VSUBCUW
Unsupported, 0, 0, 0, 0, 0, // PC_VSUBFP
Unsupported, 0, 0, 0, 0, 0, // PC_VSUBSBS
Unsupported, 0, 0, 0, 0, 0, // PC_VSUBSHS
Unsupported, 0, 0, 0, 0, 0, // PC_VSUBSWS
Unsupported, 0, 0, 0, 0, 0, // PC_VSUBUBM
Unsupported, 0, 0, 0, 0, 0, // PC_VSUBUBS
Unsupported, 0, 0, 0, 0, 0, // PC_VSUBUHM
Unsupported, 0, 0, 0, 0, 0, // PC_VSUBUHS
Unsupported, 0, 0, 0, 0, 0, // PC_VSUBUWM
Unsupported, 0, 0, 0, 0, 0, // PC_VSUBUWS
Unsupported, 0, 0, 0, 0, 0, // PC_VSUMSWS
Unsupported, 0, 0, 0, 0, 0, // PC_VSUFPMSWS
Unsupported, 0, 0, 0, 0, 0, // PC_VSUFWASBS
Unsupported, 0, 0, 0, 0, 0, // PC_VSUFWASHS
Unsupported, 0, 0, 0, 0, 0, // PC_VSUFWAUBS
Unsupported, 0, 0, 0, 0, 0, // PC_VUPKHPX
Unsupported, 0, 0, 0, 0, 0, // PC_VUPKHSB
Unsupported, 0, 0, 0, 0, 0, // PC_VUPKHSH
Unsupported, 0, 0, 0, 0, 0, // PC_VUPKLPX
Unsupported, 0, 0, 0, 0, 0, // PC_VUPKLSB
Unsupported, 0, 0, 0, 0, 0, // PC_VUPKLSH
Unsupported, 0, 0, 0, 0, 0, // PC_VXOR
Unsupported, 0, 0, 0, 0, 0, // PC_VMADDFP
Unsupported, 0, 0, 0, 0, 0, // PC_VMHADDSHS
Unsupported, 0, 0, 0, 0, 0, // PC_VMHRADDSHS
Unsupported, 0, 0, 0, 0, 0, // PC_VMLADDUHM
Unsupported, 0, 0, 0, 0, 0, // PC_VMSUMMBM
Unsupported, 0, 0, 0, 0, 0, // PC_VMSUMSHM
Unsupported, 0, 0, 0, 0, 0, // PC_VMSUMSHS
Unsupported, 0, 0, 0, 0, 0, // PC_VMSUMUBM
Unsupported, 0, 0, 0, 0, 0, // PC_VMSUMUHM
Unsupported, 0, 0, 0, 0, 0, // PC_VMSUMUHS
Unsupported, 0, 0, 0, 0, 0, // PC_VNMSUBFP
Unsupported, 0, 0, 0, 0, 0, // PC_VPERM
Unsupported, 0, 0, 0, 0, 0, // PC_VSEL
Unsupported, 0, 0, 0, 0, 0, // PC_VSLDOI
Unsupported, 0, 0, 0, 0, 0, // PC_VMR
Unsupported, 0, 0, 0, 0, 0, // PC_VMRP
IU, 0, 0, 0, 0, 0, // PC_SLE
IU, 0, 0, 0, 0, 0, // PC_SLEQ
IU, 0, 0, 0, 0, 0, // PC_SLIQ
IU, 0, 0, 0, 0, 0, // PC_SLLIQ
IU, 0, 0, 0, 0, 0, // PC_SLLQ
IU, 0, 0, 0, 0, 0, // PC_SLQ
IU, 0, 0, 0, 0, 0, // PC_SRAIQ
IU, 0, 0, 0, 0, 0, // PC_SRAQ
IU, 0, 0, 0, 0, 0, // PC_SRE
IU, 0, 0, 0, 0, 0, // PC_SREA
IU, 0, 0, 0, 0, 0, // PC_SREQ
IU, 0, 0, 0, 0, 0, // PC_SRIQ
IU, 0, 0, 0, 0, 0, // PC_SRLIQ
IU, 0, 0, 0, 0, 0, // PC_SRLQ
IU, 0, 0, 0, 0, 0, // PC_SRQ
IU, 0, 0, 0, 0, 0, // PC_MASKG
IU, 0, 0, 0, 0, 0, // PC_MASKIR
IU, 0, 0, 0, 0, 0, // PC_LSCBX
IU, 0, 0, 0, 0, 0, // PC_DIV
IU, 0, 0, 0, 0, 0, // PC_DIVS
IU, 0, 0, 0, 0, 0, // PC_DOZ
IU, 0, 0, 0, 0, 0, // PC_MUL
IU, 0, 0, 0, 0, 0, // PC_NABS
IU, 0, 0, 0, 0, 0, // PC_ABS
IU, 0, 0, 0, 0, 0, // PC_CLCS
IU, 0, 0, 0, 0, 0, // PC_DOZI
IU, 0, 0, 0, 0, 0, // PC_RLMI
IU, 0, 0, 0, 0, 0, // PC_RRIB
};
static void advance(int stageCount, int oldStage, int newStage) {
PCode *instr = pipeline[oldStage].instr;
int cycles = instruction_timing[instr->op].cycles[newStage - stageCount];
pipeline[newStage].instr = instr;
pipeline[newStage].remaining = cycles;
pipeline[oldStage].instr = NULL;
}
static void complete_instruction(int stage) {
pipeline[stage].instr = NULL;
}
static int latency(PCode *instr) {
int cycles = instruction_timing[instr->op].latency;
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
cycles += 2;
if (instr->op == PC_LMW || instr->op == PC_STMW)
cycles += instr->argCount - 2;
return cycles;
}
static void initialize(void) {
int stage;
for (stage = 0; stage < NumStages; stage++)
pipeline[stage].instr = NULL;
}
static int can_issue(PCode *instr) {
int stage = instruction_timing[instr->op].stage;
if (stage == Serialize)
stage = IU;
if (pipeline[stage].instr)
return 0;
return 1;
}
static void issue(PCode *instr) {
int stage = instruction_timing[instr->op].stage;
int cycles = instruction_timing[instr->op].cycles[IU];
if (stage == Serialize)
stage = IU;
pipeline[stage].instr = instr;
pipeline[stage].remaining = cycles;
}
static void advance_clock(void) {
int stage;
for (stage = 0; stage < NumStages; stage++) {
if (pipeline[stage].instr && pipeline[stage].remaining)
--pipeline[stage].remaining;
}
if (pipeline[IU].instr && pipeline[IU].remaining == 0)
complete_instruction(IU);
if (pipeline[FWA].instr && pipeline[FWA].remaining == 0)
complete_instruction(FWA);
if (pipeline[BPU].instr && pipeline[BPU].remaining == 0)
complete_instruction(BPU);
if (pipeline[FPA].instr && pipeline[FPA].remaining == 0 && !pipeline[FWA].instr)
advance(1, FPA, FWA);
if (pipeline[FPM].instr && pipeline[FPM].remaining == 0 && !pipeline[FPA].instr)
advance(1, FPM, FPA);
if (pipeline[FD].instr && pipeline[FD].remaining == 0 && !pipeline[FPM].instr)
advance(1, FD, FPM);
}
static int serializes(PCode *instr) {
return instruction_timing[instr->op].stage == Serialize;
}
MachineInfo machine601 = {
2,
0,
0,
&latency,
&initialize,
&can_issue,
&issue,
&advance_clock,
&serializes,
&default_uses_vpermute_unit
};

View File

@@ -0,0 +1,626 @@
#include "compiler/Scheduler.h"
#include "compiler/PCode.h"
#include "compiler/PCodeInfo.h"
// this is actually for 603e, but i couldn't find the 603 doc
// https://www.nxp.com/docs/en/reference-manual/MPC603EUM.pdf
typedef enum Stage {
BPU, // Branch Prediction Unit
IU, // Integer Unit
LSU1, // Load/Store Unit
LSU2,
FPU1, // Floating Point Unit
FPU2,
FPU3,
SRU, // System Register Unit
NumStages
} Stage;
static struct {
// the instruction currently in this pipeline stage
PCode *instr;
// how many cycles are left for this instruction to finish
int remaining;
} pipeline[NumStages];
enum {
MaxEntries = 5
};
static struct {
// how many entries remain unused in the queue
unsigned int free;
// how many entries are currently used in the queue
unsigned int used;
// the index of the next instruction that will be retired
unsigned int nextToRetire;
// the index of the next free slot that will be used when an instruction is dispatched
unsigned int nextFreeSlot;
// circular array of entries in the completion queue
struct {
PCode *instr;
int completed;
} entries[MaxEntries];
} completionbuffers;
static struct {
// the initial stage for this instruction
Stage stage;
// the total amount of cycles required by this instruction
char latency;
// how long it takes to finish each stage
char cycles[3];
// does this instruction serialise?
char serializes;
} instruction_timing[OPCODE_MAX] = {
BPU, 0, 0, 0, 0, 1, // PC_B
BPU, 0, 0, 0, 0, 1, // PC_BL
BPU, 0, 0, 0, 0, 1, // PC_BC
BPU, 0, 0, 0, 0, 1, // PC_BCLR
BPU, 0, 0, 0, 0, 1, // PC_BCCTR
BPU, 0, 0, 0, 0, 1, // PC_BT
BPU, 0, 0, 0, 0, 1, // PC_BTLR
BPU, 0, 0, 0, 0, 1, // PC_BTCTR
BPU, 0, 0, 0, 0, 1, // PC_BF
BPU, 0, 0, 0, 0, 1, // PC_BFLR
BPU, 0, 0, 0, 0, 1, // PC_BFCTR
BPU, 0, 0, 0, 0, 1, // PC_BDNZ
BPU, 0, 0, 0, 0, 1, // PC_BDNZT
BPU, 0, 0, 0, 0, 1, // PC_BDNZF
BPU, 0, 0, 0, 0, 1, // PC_BDZ
BPU, 0, 0, 0, 0, 1, // PC_BDZT
BPU, 0, 0, 0, 0, 1, // PC_BDZF
BPU, 0, 0, 0, 0, 1, // PC_BLR
BPU, 0, 0, 0, 0, 1, // PC_BCTR
BPU, 0, 0, 0, 0, 1, // PC_BCTRL
BPU, 0, 0, 0, 0, 1, // PC_BLRL
LSU1, 2, 1, 1, 0, 0, // PC_LBZ
LSU1, 2, 1, 1, 0, 0, // PC_LBZU
LSU1, 2, 1, 1, 0, 0, // PC_LBZX
LSU1, 2, 1, 1, 0, 0, // PC_LBZUX
LSU1, 2, 1, 1, 0, 0, // PC_LHZ
LSU1, 2, 1, 1, 0, 0, // PC_LHZU
LSU1, 2, 1, 1, 0, 0, // PC_LHZX
LSU1, 2, 1, 1, 0, 0, // PC_LHZUX
LSU1, 2, 1, 1, 0, 0, // PC_LHA
LSU1, 2, 1, 1, 0, 0, // PC_LHAU
LSU1, 2, 1, 1, 0, 0, // PC_LHAX
LSU1, 2, 1, 1, 0, 0, // PC_LHAUX
LSU1, 2, 1, 1, 0, 0, // PC_LHBRX
LSU1, 2, 1, 1, 0, 0, // PC_LWZ
LSU1, 2, 1, 1, 0, 0, // PC_LWZU
LSU1, 2, 1, 1, 0, 0, // PC_LWZX
LSU1, 2, 1, 1, 0, 0, // PC_LWZUX
LSU1, 2, 1, 1, 0, 0, // PC_LWBRX
LSU1, 2, 1, 1, 0, 0, // PC_LMW
LSU1, 2, 1, 1, 0, 0, // PC_STB
LSU1, 2, 1, 1, 0, 0, // PC_STBU
LSU1, 2, 1, 1, 0, 0, // PC_STBX
LSU1, 2, 1, 1, 0, 0, // PC_STBUX
LSU1, 2, 1, 1, 0, 0, // PC_STH
LSU1, 2, 1, 1, 0, 0, // PC_STHU
LSU1, 2, 1, 1, 0, 0, // PC_STHX
LSU1, 2, 1, 1, 0, 0, // PC_STHUX
LSU1, 2, 1, 1, 0, 0, // PC_STHBRX
LSU1, 2, 1, 1, 0, 0, // PC_STW
LSU1, 2, 1, 1, 0, 0, // PC_STWU
LSU1, 2, 1, 1, 0, 0, // PC_STWX
LSU1, 2, 1, 1, 0, 0, // PC_STWUX
LSU1, 2, 1, 1, 0, 0, // PC_STWBRX
LSU1, 2, 1, 1, 0, 0, // PC_STMW
LSU1, 2, 1, 1, 0, 0, // PC_DCBF
LSU1, 2, 1, 1, 0, 0, // PC_DCBST
LSU1, 2, 1, 1, 0, 0, // PC_DCBT
LSU1, 2, 1, 1, 0, 0, // PC_DCBTST
LSU1, 2, 1, 1, 0, 0, // PC_DCBZ
IU, 1, 1, 0, 0, 0, // PC_ADD
IU, 1, 1, 0, 0, 0, // PC_ADDC
IU, 1, 1, 0, 0, 0, // PC_ADDE
IU, 1, 1, 0, 0, 0, // PC_ADDI
IU, 1, 1, 0, 0, 0, // PC_ADDIC
IU, 1, 1, 0, 0, 0, // PC_ADDICR
IU, 1, 1, 0, 0, 0, // PC_ADDIS
IU, 1, 1, 0, 0, 0, // PC_ADDME
IU, 1, 1, 0, 0, 0, // PC_ADDZE
IU, 37, 37, 0, 0, 0, // PC_DIVW
IU, 37, 37, 0, 0, 0, // PC_DIVWU
IU, 5, 5, 0, 0, 0, // PC_MULHW
IU, 5, 5, 0, 0, 0, // PC_MULHWU
IU, 3, 3, 0, 0, 0, // PC_MULLI
IU, 5, 5, 0, 0, 0, // PC_MULLW
IU, 1, 1, 0, 0, 0, // PC_NEG
IU, 1, 1, 0, 0, 0, // PC_SUBF
IU, 1, 1, 0, 0, 0, // PC_SUBFC
IU, 1, 1, 0, 0, 0, // PC_SUBFE
IU, 1, 1, 0, 0, 0, // PC_SUBFIC
IU, 1, 1, 0, 0, 0, // PC_SUBFME
IU, 1, 1, 0, 0, 0, // PC_SUBFZE
IU, 3, 1, 0, 0, 0, // PC_CMPI
IU, 3, 1, 0, 0, 0, // PC_CMP
IU, 3, 1, 0, 0, 0, // PC_CMPLI
IU, 3, 1, 0, 0, 0, // PC_CMPL
IU, 1, 1, 0, 0, 0, // PC_ANDI
IU, 1, 1, 0, 0, 0, // PC_ANDIS
IU, 1, 1, 0, 0, 0, // PC_ORI
IU, 1, 1, 0, 0, 0, // PC_ORIS
IU, 1, 1, 0, 0, 0, // PC_XORI
IU, 1, 1, 0, 0, 0, // PC_XORIS
IU, 1, 1, 0, 0, 0, // PC_AND
IU, 1, 1, 0, 0, 0, // PC_OR
IU, 1, 1, 0, 0, 0, // PC_XOR
IU, 1, 1, 0, 0, 0, // PC_NAND
IU, 1, 1, 0, 0, 0, // PC_NOR
IU, 1, 1, 0, 0, 0, // PC_EQV
IU, 1, 1, 0, 0, 0, // PC_ANDC
IU, 1, 1, 0, 0, 0, // PC_ORC
IU, 1, 1, 0, 0, 0, // PC_EXTSB
IU, 1, 1, 0, 0, 0, // PC_EXTSH
IU, 1, 1, 0, 0, 0, // PC_CNTLZW
IU, 1, 1, 0, 0, 0, // PC_RLWINM
IU, 1, 1, 0, 0, 0, // PC_RLWNM
IU, 1, 1, 0, 0, 0, // PC_RLWIMI
IU, 1, 1, 0, 0, 0, // PC_SLW
IU, 1, 1, 0, 0, 0, // PC_SRW
IU, 1, 1, 0, 0, 0, // PC_SRAWI
IU, 1, 1, 0, 0, 0, // PC_SRAW
SRU, 1, 1, 0, 0, 0, // PC_CRAND
SRU, 1, 1, 0, 0, 0, // PC_CRANDC
SRU, 1, 1, 0, 0, 0, // PC_CREQV
SRU, 1, 1, 0, 0, 0, // PC_CRNAND
SRU, 1, 1, 0, 0, 0, // PC_CRNOR
SRU, 1, 1, 0, 0, 0, // PC_CROR
SRU, 1, 1, 0, 0, 0, // PC_CRORC
SRU, 1, 1, 0, 0, 0, // PC_CRXOR
SRU, 1, 1, 0, 0, 0, // PC_MCRF
SRU, 2, 2, 0, 0, 0, // PC_MTXER
SRU, 2, 2, 0, 0, 0, // PC_MTCTR
SRU, 2, 2, 0, 0, 0, // PC_MTLR
SRU, 1, 1, 0, 0, 0, // PC_MTCRF
SRU, 1, 1, 0, 0, 1, // PC_MTMSR
SRU, 1, 1, 0, 0, 1, // PC_MTSPR
SRU, 1, 1, 0, 0, 1, // PC_MFMSR
SRU, 1, 1, 0, 0, 1, // PC_MFSPR
SRU, 1, 1, 0, 0, 0, // PC_MFXER
SRU, 1, 1, 0, 0, 0, // PC_MFCTR
SRU, 1, 1, 0, 0, 0, // PC_MFLR
SRU, 1, 1, 0, 0, 0, // PC_MFCR
FPU1, 3, 1, 1, 1, 0, // PC_MFFS
FPU1, 3, 1, 1, 1, 0, // PC_MTFSF
SRU, 1, 1, 0, 0, 1, // PC_EIEIO
SRU, 1, 1, 0, 0, 1, // PC_ISYNC
SRU, 1, 1, 0, 0, 1, // PC_SYNC
SRU, 1, 1, 0, 0, 1, // PC_RFI
IU, 1, 1, 0, 0, 0, // PC_LI
IU, 1, 1, 0, 0, 0, // PC_LIS
IU, 1, 1, 0, 0, 0, // PC_MR
IU, 1, 1, 0, 0, 0, // PC_NOP
IU, 1, 1, 0, 0, 0, // PC_NOT
LSU1, 2, 1, 1, 0, 0, // PC_LFS
LSU1, 2, 1, 1, 0, 0, // PC_LFSU
LSU1, 2, 1, 1, 0, 0, // PC_LFSX
LSU1, 2, 1, 1, 0, 0, // PC_LFSUX
LSU1, 2, 1, 1, 0, 0, // PC_LFD
LSU1, 2, 1, 1, 0, 0, // PC_LFDU
LSU1, 2, 1, 1, 0, 0, // PC_LFDX
LSU1, 2, 1, 1, 0, 0, // PC_LFDUX
LSU1, 2, 1, 1, 0, 0, // PC_STFS
LSU1, 2, 1, 1, 0, 0, // PC_STFSU
LSU1, 2, 1, 1, 0, 0, // PC_STFSX
LSU1, 2, 1, 1, 0, 0, // PC_STFSUX
LSU1, 2, 1, 1, 0, 0, // PC_STFD
LSU1, 2, 1, 1, 0, 0, // PC_STFDU
LSU1, 2, 1, 1, 0, 0, // PC_STFDX
LSU1, 2, 1, 1, 0, 0, // PC_STFDUX
FPU1, 3, 1, 1, 1, 0, // PC_FMR
FPU1, 3, 1, 1, 1, 0, // PC_FABS
FPU1, 3, 1, 1, 1, 0, // PC_FNEG
FPU1, 3, 1, 1, 1, 0, // PC_FNABS
FPU1, 3, 1, 1, 1, 0, // PC_FADD
FPU1, 3, 1, 1, 1, 0, // PC_FADDS
FPU1, 3, 1, 1, 1, 0, // PC_FSUB
FPU1, 3, 1, 1, 1, 0, // PC_FSUBS
FPU1, 4, 2, 1, 1, 0, // PC_FMUL
FPU1, 3, 1, 1, 1, 0, // PC_FMULS
FPU1, 33, 33, 0, 0, 0, // PC_FDIV
FPU1, 18, 18, 0, 0, 0, // PC_FDIVS
FPU1, 4, 2, 1, 1, 0, // PC_FMADD
FPU1, 3, 1, 1, 1, 0, // PC_FMADDS
FPU1, 4, 2, 1, 1, 0, // PC_FMSUB
FPU1, 3, 1, 1, 1, 0, // PC_FMSUBS
FPU1, 4, 2, 1, 1, 0, // PC_FNMADD
FPU1, 3, 1, 1, 1, 0, // PC_FNMADDS
FPU1, 4, 2, 1, 1, 0, // PC_FNMSUB
FPU1, 3, 1, 1, 1, 0, // PC_FNMSUBS
FPU1, 18, 18, 0, 0, 0, // PC_FRES
FPU1, 3, 1, 1, 1, 0, // PC_FRSQRTE
FPU1, 3, 1, 1, 1, 0, // PC_FSEL
FPU1, 3, 1, 1, 1, 0, // PC_FRSP
FPU1, 3, 1, 1, 1, 0, // PC_FCTIW
FPU1, 3, 1, 1, 1, 0, // PC_FCTIWZ
FPU1, 5, 1, 1, 1, 0, // PC_FCMPU
FPU1, 5, 1, 1, 1, 0, // PC_FCMPO
LSU1, 1, 1, 0, 0, 0, // PC_LWARX
LSU1, 1, 1, 0, 0, 0, // PC_LSWI
LSU1, 1, 1, 0, 0, 0, // PC_LSWX
LSU1, 1, 1, 0, 0, 0, // PC_STFIWX
LSU1, 1, 1, 0, 0, 0, // PC_STSWI
LSU1, 1, 1, 0, 0, 0, // PC_STSWX
LSU1, 1, 1, 0, 0, 0, // PC_STWCX
IU, 1, 1, 0, 0, 1, // PC_ECIWX
IU, 1, 1, 0, 0, 1, // PC_ECOWX
IU, 1, 1, 0, 0, 0, // PC_DCBI
IU, 1, 1, 0, 0, 0, // PC_ICBI
IU, 1, 1, 0, 0, 0, // PC_MCRFS
IU, 1, 1, 0, 0, 0, // PC_MCRXR
IU, 1, 1, 0, 0, 0, // PC_MFTB
IU, 1, 1, 0, 0, 0, // PC_MFSR
IU, 1, 1, 0, 0, 0, // PC_MTSR
IU, 1, 1, 0, 0, 0, // PC_MFSRIN
IU, 1, 1, 0, 0, 0, // PC_MTSRIN
IU, 1, 1, 0, 0, 0, // PC_MTFSB0
IU, 1, 1, 0, 0, 0, // PC_MTFSB1
IU, 1, 1, 0, 0, 0, // PC_MTFSFI
IU, 1, 1, 0, 0, 1, // PC_SC
FPU1, 1, 1, 0, 0, 0, // PC_FSQRT
FPU1, 1, 1, 0, 0, 0, // PC_FSQRTS
IU, 1, 1, 0, 0, 0, // PC_TLBIA
IU, 1, 1, 0, 0, 0, // PC_TLBIE
IU, 1, 1, 0, 0, 0, // PC_TLBLD
IU, 1, 1, 0, 0, 0, // PC_TLBLI
IU, 1, 1, 0, 0, 0, // PC_TLBSYNC
IU, 1, 1, 0, 0, 1, // PC_TW
IU, 1, 1, 0, 0, 1, // PC_TRAP
IU, 1, 1, 0, 0, 1, // PC_TWI
IU, 1, 1, 0, 0, 1, // PC_OPWORD
IU, 1, 1, 0, 0, 0, // PC_MFROM
IU, 1, 1, 0, 0, 1, // PC_DSA
IU, 1, 1, 0, 0, 1, // PC_ESA
IU, 0, 0, 0, 0, 0, // PC_DCCCI
IU, 0, 0, 0, 0, 0, // PC_DCREAD
IU, 0, 0, 0, 0, 0, // PC_ICBT
IU, 0, 0, 0, 0, 0, // PC_ICCCI
IU, 0, 0, 0, 0, 0, // PC_ICREAD
IU, 0, 0, 0, 0, 0, // PC_RFCI
IU, 0, 0, 0, 0, 0, // PC_TLBRE
IU, 0, 0, 0, 0, 0, // PC_TLBSX
IU, 0, 0, 0, 0, 0, // PC_TLBWE
IU, 0, 0, 0, 0, 0, // PC_WRTEE
IU, 0, 0, 0, 0, 0, // PC_WRTEEI
IU, 0, 0, 0, 0, 0, // PC_MFDCR
IU, 0, 0, 0, 0, 0, // PC_MTDCR
IU, 0, 0, 0, 0, 0, // PC_DCBA
BPU, 0, 0, 0, 0, 0, // PC_DSS
BPU, 0, 0, 0, 0, 0, // PC_DSSALL
BPU, 0, 0, 0, 0, 0, // PC_DST
BPU, 0, 0, 0, 0, 0, // PC_DSTT
BPU, 0, 0, 0, 0, 0, // PC_DSTST
BPU, 0, 0, 0, 0, 0, // PC_DSTSTT
BPU, 0, 0, 0, 0, 0, // PC_LVEBX
BPU, 0, 0, 0, 0, 0, // PC_LVEHX
BPU, 0, 0, 0, 0, 0, // PC_LVEWX
BPU, 0, 0, 0, 0, 0, // PC_LVSL
BPU, 0, 0, 0, 0, 0, // PC_LVSR
BPU, 0, 0, 0, 0, 0, // PC_LVX
BPU, 0, 0, 0, 0, 0, // PC_LVXL
BPU, 0, 0, 0, 0, 0, // PC_STVEBX
BPU, 0, 0, 0, 0, 0, // PC_STVEHX
BPU, 0, 0, 0, 0, 0, // PC_STVEWX
BPU, 0, 0, 0, 0, 0, // PC_STVX
BPU, 0, 0, 0, 0, 0, // PC_STVXL
BPU, 0, 0, 0, 0, 0, // PC_MFVSCR
BPU, 0, 0, 0, 0, 0, // PC_MTVSCR
BPU, 0, 0, 0, 0, 0, // PC_VADDCUW
BPU, 0, 0, 0, 0, 0, // PC_VADDFP
BPU, 0, 0, 0, 0, 0, // PC_VADDSBS
BPU, 0, 0, 0, 0, 0, // PC_VADDSHS
BPU, 0, 0, 0, 0, 0, // PC_VADDSWS
BPU, 0, 0, 0, 0, 0, // PC_VADDUBM
BPU, 0, 0, 0, 0, 0, // PC_VADDUBS
BPU, 0, 0, 0, 0, 0, // PC_VADDUHM
BPU, 0, 0, 0, 0, 0, // PC_VADDUHS
BPU, 0, 0, 0, 0, 0, // PC_VADDUWM
BPU, 0, 0, 0, 0, 0, // PC_VADDUWS
BPU, 0, 0, 0, 0, 0, // PC_VAND
BPU, 0, 0, 0, 0, 0, // PC_VANDC
BPU, 0, 0, 0, 0, 0, // PC_VAVGSB
BPU, 0, 0, 0, 0, 0, // PC_VAVGSH
BPU, 0, 0, 0, 0, 0, // PC_VAVGSW
BPU, 0, 0, 0, 0, 0, // PC_VAVGUB
BPU, 0, 0, 0, 0, 0, // PC_VAVGUH
BPU, 0, 0, 0, 0, 0, // PC_VAVGUW
BPU, 0, 0, 0, 0, 0, // PC_VCFSX
BPU, 0, 0, 0, 0, 0, // PC_VCFUX
BPU, 0, 0, 0, 0, 0, // PC_VCMPBFP
BPU, 0, 0, 0, 0, 0, // PC_VCMPEQFP
BPU, 0, 0, 0, 0, 0, // PC_VCMPEQUB
BPU, 0, 0, 0, 0, 0, // PC_VCMPEQUH
BPU, 0, 0, 0, 0, 0, // PC_VCMPEQUW
BPU, 0, 0, 0, 0, 0, // PC_VCMPGEFP
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTFP
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTSB
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTSH
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTSW
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTUB
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTUH
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTUW
BPU, 0, 0, 0, 0, 0, // PC_VCTSXS
BPU, 0, 0, 0, 0, 0, // PC_VCTUXS
BPU, 0, 0, 0, 0, 0, // PC_VEXPTEFP
BPU, 0, 0, 0, 0, 0, // PC_VLOGEFP
BPU, 0, 0, 0, 0, 0, // PC_VMAXFP
BPU, 0, 0, 0, 0, 0, // PC_VMAXSB
BPU, 0, 0, 0, 0, 0, // PC_VMAXSH
BPU, 0, 0, 0, 0, 0, // PC_VMAXSW
BPU, 0, 0, 0, 0, 0, // PC_VMAXUB
BPU, 0, 0, 0, 0, 0, // PC_VMAXUH
BPU, 0, 0, 0, 0, 0, // PC_VMAXUW
BPU, 0, 0, 0, 0, 0, // PC_VMINFP
BPU, 0, 0, 0, 0, 0, // PC_VMINSB
BPU, 0, 0, 0, 0, 0, // PC_VMINSH
BPU, 0, 0, 0, 0, 0, // PC_VMINSW
BPU, 0, 0, 0, 0, 0, // PC_VMINUB
BPU, 0, 0, 0, 0, 0, // PC_VMINUH
BPU, 0, 0, 0, 0, 0, // PC_VMINUW
BPU, 0, 0, 0, 0, 0, // PC_VMRGHB
BPU, 0, 0, 0, 0, 0, // PC_VMRGHH
BPU, 0, 0, 0, 0, 0, // PC_VMRGHW
BPU, 0, 0, 0, 0, 0, // PC_VMRGLB
BPU, 0, 0, 0, 0, 0, // PC_VMRGLH
BPU, 0, 0, 0, 0, 0, // PC_VMRGLW
BPU, 0, 0, 0, 0, 0, // PC_VMULESB
BPU, 0, 0, 0, 0, 0, // PC_VMULESH
BPU, 0, 0, 0, 0, 0, // PC_VMULEUB
BPU, 0, 0, 0, 0, 0, // PC_VMULEUH
BPU, 0, 0, 0, 0, 0, // PC_VMULOSB
BPU, 0, 0, 0, 0, 0, // PC_VMULOSH
BPU, 0, 0, 0, 0, 0, // PC_VMULOUB
BPU, 0, 0, 0, 0, 0, // PC_VMULOUH
BPU, 0, 0, 0, 0, 0, // PC_VNOR
BPU, 0, 0, 0, 0, 0, // PC_VOR
BPU, 0, 0, 0, 0, 0, // PC_VPKPX
BPU, 0, 0, 0, 0, 0, // PC_VPKSHSS
BPU, 0, 0, 0, 0, 0, // PC_VPKSHUS
BPU, 0, 0, 0, 0, 0, // PC_VPKSWSS
BPU, 0, 0, 0, 0, 0, // PC_VPKSWUS
BPU, 0, 0, 0, 0, 0, // PC_VPKUHUM
BPU, 0, 0, 0, 0, 0, // PC_VPKUHUS
BPU, 0, 0, 0, 0, 0, // PC_VPKUWUM
BPU, 0, 0, 0, 0, 0, // PC_VPKUWUS
BPU, 0, 0, 0, 0, 0, // PC_VREFP
BPU, 0, 0, 0, 0, 0, // PC_VRFIM
BPU, 0, 0, 0, 0, 0, // PC_VRFIN
BPU, 0, 0, 0, 0, 0, // PC_VRFIP
BPU, 0, 0, 0, 0, 0, // PC_VRFIZ
BPU, 0, 0, 0, 0, 0, // PC_VRLB
BPU, 0, 0, 0, 0, 0, // PC_VRLH
BPU, 0, 0, 0, 0, 0, // PC_VRLW
BPU, 0, 0, 0, 0, 0, // PC_VRSQRTEFP
BPU, 0, 0, 0, 0, 0, // PC_VSL
BPU, 0, 0, 0, 0, 0, // PC_VSLB
BPU, 0, 0, 0, 0, 0, // PC_VSLH
BPU, 0, 0, 0, 0, 0, // PC_VSLO
BPU, 0, 0, 0, 0, 0, // PC_VSLW
BPU, 0, 0, 0, 0, 0, // PC_VSPLTB
BPU, 0, 0, 0, 0, 0, // PC_VSPLTH
BPU, 0, 0, 0, 0, 0, // PC_VSPLTW
BPU, 0, 0, 0, 0, 0, // PC_VSPLTISB
BPU, 0, 0, 0, 0, 0, // PC_VSPLTISH
BPU, 0, 0, 0, 0, 0, // PC_VSPLTISW
BPU, 0, 0, 0, 0, 0, // PC_VSR
BPU, 0, 0, 0, 0, 0, // PC_VSRAB
BPU, 0, 0, 0, 0, 0, // PC_VSRAH
BPU, 0, 0, 0, 0, 0, // PC_VSRAW
BPU, 0, 0, 0, 0, 0, // PC_VSRB
BPU, 0, 0, 0, 0, 0, // PC_VSRH
BPU, 0, 0, 0, 0, 0, // PC_VSRO
BPU, 0, 0, 0, 0, 0, // PC_VSRW
BPU, 0, 0, 0, 0, 0, // PC_VSUBCUW
BPU, 0, 0, 0, 0, 0, // PC_VSUBFP
BPU, 0, 0, 0, 0, 0, // PC_VSUBSBS
BPU, 0, 0, 0, 0, 0, // PC_VSUBSHS
BPU, 0, 0, 0, 0, 0, // PC_VSUBSWS
BPU, 0, 0, 0, 0, 0, // PC_VSUBUBM
BPU, 0, 0, 0, 0, 0, // PC_VSUBUBS
BPU, 0, 0, 0, 0, 0, // PC_VSUBUHM
BPU, 0, 0, 0, 0, 0, // PC_VSUBUHS
BPU, 0, 0, 0, 0, 0, // PC_VSUBUWM
BPU, 0, 0, 0, 0, 0, // PC_VSUBUWS
BPU, 0, 0, 0, 0, 0, // PC_VSUMSWS
BPU, 0, 0, 0, 0, 0, // PC_VSUM2SWS
BPU, 0, 0, 0, 0, 0, // PC_VSUM4SBS
BPU, 0, 0, 0, 0, 0, // PC_VSUM4SHS
BPU, 0, 0, 0, 0, 0, // PC_VSUM4UBS
BPU, 0, 0, 0, 0, 0, // PC_VUPKHPX
BPU, 0, 0, 0, 0, 0, // PC_VUPKHSB
BPU, 0, 0, 0, 0, 0, // PC_VUPKHSH
BPU, 0, 0, 0, 0, 0, // PC_VUPKLPX
BPU, 0, 0, 0, 0, 0, // PC_VUPKLSB
BPU, 0, 0, 0, 0, 0, // PC_VUPKLSH
BPU, 0, 0, 0, 0, 0, // PC_VXOR
BPU, 0, 0, 0, 0, 0, // PC_VMADDFP
BPU, 0, 0, 0, 0, 0, // PC_VMHADDSHS
BPU, 0, 0, 0, 0, 0, // PC_VMHRADDSHS
BPU, 0, 0, 0, 0, 0, // PC_VMLADDUHM
BPU, 0, 0, 0, 0, 0, // PC_VMSUMMBM
BPU, 0, 0, 0, 0, 0, // PC_VMSUMSHM
BPU, 0, 0, 0, 0, 0, // PC_VMSUMSHS
BPU, 0, 0, 0, 0, 0, // PC_VMSUMUBM
BPU, 0, 0, 0, 0, 0, // PC_VMSUMUHM
BPU, 0, 0, 0, 0, 0, // PC_VMSUMUHS
BPU, 0, 0, 0, 0, 0, // PC_VNMSUBFP
BPU, 0, 0, 0, 0, 0, // PC_VPERM
BPU, 0, 0, 0, 0, 0, // PC_VSEL
BPU, 0, 0, 0, 0, 0, // PC_VSLDOI
BPU, 0, 0, 0, 0, 0, // PC_VMR
BPU, 0, 0, 0, 0, 0, // PC_VMRP
BPU, 0, 0, 0, 0, 0, // PC_SLE
BPU, 0, 0, 0, 0, 0, // PC_SLEQ
BPU, 0, 0, 0, 0, 0, // PC_SLIQ
BPU, 0, 0, 0, 0, 0, // PC_SLLIQ
BPU, 0, 0, 0, 0, 0, // PC_SLLQ
BPU, 0, 0, 0, 0, 0, // PC_SLQ
BPU, 0, 0, 0, 0, 0, // PC_SRAIQ
BPU, 0, 0, 0, 0, 0, // PC_SRAQ
BPU, 0, 0, 0, 0, 0, // PC_SRE
BPU, 0, 0, 0, 0, 0, // PC_SREA
BPU, 0, 0, 0, 0, 0, // PC_SREQ
BPU, 0, 0, 0, 0, 0, // PC_SRIQ
BPU, 0, 0, 0, 0, 0, // PC_SRLIQ
BPU, 0, 0, 0, 0, 0, // PC_SRLQ
BPU, 0, 0, 0, 0, 0, // PC_SRQ
BPU, 0, 0, 0, 0, 0, // PC_MASKG
BPU, 0, 0, 0, 0, 0, // PC_MASKIR
BPU, 0, 0, 0, 0, 0, // PC_LSCBX
BPU, 0, 0, 0, 0, 0, // PC_DIV
BPU, 0, 0, 0, 0, 0, // PC_DIVS
BPU, 0, 0, 0, 0, 0, // PC_DOZ
BPU, 0, 0, 0, 0, 0, // PC_MUL
BPU, 0, 0, 0, 0, 0, // PC_NABS
BPU, 0, 0, 0, 0, 0, // PC_ABS
BPU, 0, 0, 0, 0, 0, // PC_CLCS
BPU, 0, 0, 0, 0, 0, // PC_DOZI
BPU, 0, 0, 0, 0, 0, // PC_RLMI
BPU, 0, 0, 0, 0, 0, // PC_RRIB
};
static void advance(int firstStage, int oldStage, int newStage) {
PCode *instr = pipeline[oldStage].instr;
int cycles = instruction_timing[instr->op].cycles[newStage - firstStage];
pipeline[newStage].instr = instr;
pipeline[newStage].remaining = cycles;
pipeline[oldStage].instr = NULL;
}
static void assign_completion_buffer(PCode *instr) {
completionbuffers.used++;
completionbuffers.free--;
completionbuffers.entries[completionbuffers.nextFreeSlot].instr = instr;
completionbuffers.entries[completionbuffers.nextFreeSlot].completed = 0;
completionbuffers.nextFreeSlot = (completionbuffers.nextFreeSlot + 1) % MaxEntries;
}
static void complete_instruction(int stage) {
PCode *instr = pipeline[stage].instr;
int buf = 0;
while (buf < MaxEntries && completionbuffers.entries[buf].instr != instr)
buf++;
completionbuffers.entries[buf].completed = 1;
pipeline[stage].instr = NULL;
}
static void retire_instruction(void) {
completionbuffers.entries[completionbuffers.nextToRetire].instr = NULL;
completionbuffers.used--;
completionbuffers.free++;
completionbuffers.nextToRetire = (completionbuffers.nextToRetire + 1) % MaxEntries;
}
static int latency(PCode *instr) {
int cycles = instruction_timing[instr->op].latency;
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
cycles += 2;
if (instr->op == PC_LMW || instr->op == PC_STMW)
cycles += instr->argCount - 2;
return cycles;
}
static void initialize(void) {
int stage;
int i;
for (stage = 0; stage < NumStages; stage++)
pipeline[stage].instr = NULL;
completionbuffers.free = 5;
completionbuffers.used = 0;
completionbuffers.nextToRetire = 0;
completionbuffers.nextFreeSlot = 0;
for (i = 0; i < MaxEntries; i++)
completionbuffers.entries[i].instr = NULL;
}
static int can_issue(PCode *instr) {
if (completionbuffers.free == 0)
return 0;
if (pipeline[instruction_timing[instr->op].stage].instr)
return 0;
if ((instr->flags & fIsWrite) && pipeline[LSU2].instr && (pipeline[LSU2].instr->flags & fIsWrite))
return 0;
return 1;
}
static void issue(PCode *instr) {
int stage = instruction_timing[instr->op].stage;
int cycles = instruction_timing[instr->op].cycles[0];
assign_completion_buffer(instr);
pipeline[stage].instr = instr;
pipeline[stage].remaining = cycles;
}
static void advance_clock(void) {
int stage;
for (stage = 0; stage < NumStages; stage++) {
if (pipeline[stage].instr && pipeline[stage].remaining)
--pipeline[stage].remaining;
}
if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) {
retire_instruction();
if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) {
retire_instruction();
}
}
if (pipeline[IU].instr && pipeline[IU].remaining == 0)
complete_instruction(IU);
if (pipeline[LSU2].instr && pipeline[LSU2].remaining == 0)
complete_instruction(LSU2);
if (pipeline[FPU3].instr && pipeline[FPU3].remaining == 0)
complete_instruction(FPU3);
if (pipeline[SRU].instr && pipeline[SRU].remaining == 0)
complete_instruction(SRU);
if (pipeline[BPU].instr && pipeline[BPU].remaining == 0)
complete_instruction(BPU);
if (
pipeline[FPU1].instr &&
pipeline[FPU1].remaining == 0 &&
(pipeline[FPU1].instr->op == PC_FDIV || pipeline[FPU1].instr->op == PC_FDIVS)
)
complete_instruction(FPU1);
if (pipeline[FPU2].instr && pipeline[FPU2].remaining == 0 && !pipeline[FPU3].instr)
advance(FPU1, FPU2, FPU3);
if (pipeline[FPU1].instr && pipeline[FPU1].remaining == 0 && !pipeline[FPU2].instr)
advance(FPU1, FPU1, FPU2);
if (pipeline[LSU1].instr && pipeline[LSU1].remaining == 0 && !pipeline[LSU2].instr)
advance(LSU1, LSU1, LSU2);
}
static int serializes(PCode *instr) {
return instruction_timing[instr->op].serializes;
}
MachineInfo machine603 = {
2,
1,
0,
&latency,
&initialize,
&can_issue,
&issue,
&advance_clock,
&serializes,
&default_uses_vpermute_unit
};

View File

@@ -0,0 +1,650 @@
#include "compiler/Scheduler.h"
#include "compiler/PCode.h"
#include "compiler/PCodeInfo.h"
// https://www.nxp.com/docs/en/reference-manual/MPC603EUM.pdf
typedef enum Stage {
BPU, // Branch Prediction Unit
IU, // Integer Unit
LSU1, // Load/Store Unit
LSU2,
FPU1, // Floating Point Unit
FPU2,
FPU3,
SRU, // System Register Unit
NumStages
} Stage;
static struct {
// the instruction currently in this pipeline stage
PCode *instr;
// how many cycles are left for this instruction to finish
int remaining;
} pipeline[NumStages];
enum {
MaxEntries = 5
};
static struct {
// how many entries remain unused in the queue
unsigned int free;
// how many entries are currently used in the queue
unsigned int used;
// the index of the next instruction that will be retired
unsigned int nextToRetire;
// the index of the next free slot that will be used when an instruction is dispatched
unsigned int nextFreeSlot;
// circular array of entries in the completion queue
struct {
PCode *instr;
int completed;
} entries[MaxEntries];
} completionbuffers;
static struct {
// the initial stage for this instruction
Stage stage;
// the total amount of cycles required by this instruction
char latency;
// how long it takes to finish each stage
char cycles[3];
// does this instruction serialise?
char serializes;
} instruction_timing[OPCODE_MAX] = {
BPU, 0, 0, 0, 0, 1, // PC_B
BPU, 0, 0, 0, 0, 1, // PC_BL
BPU, 0, 0, 0, 0, 1, // PC_BC
BPU, 0, 0, 0, 0, 1, // PC_BCLR
BPU, 0, 0, 0, 0, 1, // PC_BCCTR
BPU, 0, 0, 0, 0, 1, // PC_BT
BPU, 0, 0, 0, 0, 1, // PC_BTLR
BPU, 0, 0, 0, 0, 1, // PC_BTCTR
BPU, 0, 0, 0, 0, 1, // PC_BF
BPU, 0, 0, 0, 0, 1, // PC_BFLR
BPU, 0, 0, 0, 0, 1, // PC_BFCTR
BPU, 0, 0, 0, 0, 1, // PC_BDNZ
BPU, 0, 0, 0, 0, 1, // PC_BDNZT
BPU, 0, 0, 0, 0, 1, // PC_BDNZF
BPU, 0, 0, 0, 0, 1, // PC_BDZ
BPU, 0, 0, 0, 0, 1, // PC_BDZT
BPU, 0, 0, 0, 0, 1, // PC_BDZF
BPU, 0, 0, 0, 0, 1, // PC_BLR
BPU, 0, 0, 0, 0, 1, // PC_BCTR
BPU, 0, 0, 0, 0, 1, // PC_BCTRL
BPU, 0, 0, 0, 0, 1, // PC_BLRL
LSU1, 2, 1, 1, 0, 0, // PC_LBZ
LSU1, 2, 1, 1, 0, 0, // PC_LBZU
LSU1, 2, 1, 1, 0, 0, // PC_LBZX
LSU1, 2, 1, 1, 0, 0, // PC_LBZUX
LSU1, 2, 1, 1, 0, 0, // PC_LHZ
LSU1, 2, 1, 1, 0, 0, // PC_LHZU
LSU1, 2, 1, 1, 0, 0, // PC_LHZX
LSU1, 2, 1, 1, 0, 0, // PC_LHZUX
LSU1, 2, 1, 1, 0, 0, // PC_LHA
LSU1, 2, 1, 1, 0, 0, // PC_LHAU
LSU1, 2, 1, 1, 0, 0, // PC_LHAX
LSU1, 2, 1, 1, 0, 0, // PC_LHAUX
LSU1, 2, 1, 1, 0, 0, // PC_LHBRX
LSU1, 2, 1, 1, 0, 0, // PC_LWZ
LSU1, 2, 1, 1, 0, 0, // PC_LWZU
LSU1, 2, 1, 1, 0, 0, // PC_LWZX
LSU1, 2, 1, 1, 0, 0, // PC_LWZUX
LSU1, 2, 1, 1, 0, 0, // PC_LWBRX
LSU1, 2, 1, 1, 0, 0, // PC_LMW
LSU1, 2, 1, 1, 0, 0, // PC_STB
LSU1, 2, 1, 1, 0, 0, // PC_STBU
LSU1, 2, 1, 1, 0, 0, // PC_STBX
LSU1, 2, 1, 1, 0, 0, // PC_STBUX
LSU1, 2, 1, 1, 0, 0, // PC_STH
LSU1, 2, 1, 1, 0, 0, // PC_STHU
LSU1, 2, 1, 1, 0, 0, // PC_STHX
LSU1, 2, 1, 1, 0, 0, // PC_STHUX
LSU1, 2, 1, 1, 0, 0, // PC_STHBRX
LSU1, 2, 1, 1, 0, 0, // PC_STW
LSU1, 2, 1, 1, 0, 0, // PC_STWU
LSU1, 2, 1, 1, 0, 0, // PC_STWX
LSU1, 2, 1, 1, 0, 0, // PC_STWUX
LSU1, 2, 1, 1, 0, 0, // PC_STWBRX
LSU1, 2, 1, 1, 0, 0, // PC_STMW
LSU1, 2, 1, 1, 0, 0, // PC_DCBF
LSU1, 2, 1, 1, 0, 0, // PC_DCBST
LSU1, 2, 1, 1, 0, 0, // PC_DCBT
LSU1, 2, 1, 1, 0, 0, // PC_DCBTST
LSU1, 2, 1, 1, 0, 0, // PC_DCBZ
IU, 1, 1, 0, 0, 0, // PC_ADD
IU, 1, 1, 0, 0, 0, // PC_ADDC
IU, 1, 1, 0, 0, 0, // PC_ADDE
IU, 1, 1, 0, 0, 0, // PC_ADDI
IU, 1, 1, 0, 0, 0, // PC_ADDIC
IU, 1, 1, 0, 0, 0, // PC_ADDICR
IU, 1, 1, 0, 0, 0, // PC_ADDIS
IU, 1, 1, 0, 0, 0, // PC_ADDME
IU, 1, 1, 0, 0, 0, // PC_ADDZE
IU, 37, 37, 0, 0, 0, // PC_DIVW
IU, 37, 37, 0, 0, 0, // PC_DIVWU
IU, 5, 5, 0, 0, 0, // PC_MULHW
IU, 5, 5, 0, 0, 0, // PC_MULHWU
IU, 3, 3, 0, 0, 0, // PC_MULLI
IU, 5, 5, 0, 0, 0, // PC_MULLW
IU, 1, 1, 0, 0, 0, // PC_NEG
IU, 1, 1, 0, 0, 0, // PC_SUBF
IU, 1, 1, 0, 0, 0, // PC_SUBFC
IU, 1, 1, 0, 0, 0, // PC_SUBFE
IU, 1, 1, 0, 0, 0, // PC_SUBFIC
IU, 1, 1, 0, 0, 0, // PC_SUBFME
IU, 1, 1, 0, 0, 0, // PC_SUBFZE
IU, 3, 1, 0, 0, 0, // PC_CMPI
IU, 3, 1, 0, 0, 0, // PC_CMP
IU, 3, 1, 0, 0, 0, // PC_CMPLI
IU, 3, 1, 0, 0, 0, // PC_CMPL
IU, 1, 1, 0, 0, 0, // PC_ANDI
IU, 1, 1, 0, 0, 0, // PC_ANDIS
IU, 1, 1, 0, 0, 0, // PC_ORI
IU, 1, 1, 0, 0, 0, // PC_ORIS
IU, 1, 1, 0, 0, 0, // PC_XORI
IU, 1, 1, 0, 0, 0, // PC_XORIS
IU, 1, 1, 0, 0, 0, // PC_AND
IU, 1, 1, 0, 0, 0, // PC_OR
IU, 1, 1, 0, 0, 0, // PC_XOR
IU, 1, 1, 0, 0, 0, // PC_NAND
IU, 1, 1, 0, 0, 0, // PC_NOR
IU, 1, 1, 0, 0, 0, // PC_EQV
IU, 1, 1, 0, 0, 0, // PC_ANDC
IU, 1, 1, 0, 0, 0, // PC_ORC
IU, 1, 1, 0, 0, 0, // PC_EXTSB
IU, 1, 1, 0, 0, 0, // PC_EXTSH
IU, 1, 1, 0, 0, 0, // PC_CNTLZW
IU, 1, 1, 0, 0, 0, // PC_RLWINM
IU, 1, 1, 0, 0, 0, // PC_RLWNM
IU, 1, 1, 0, 0, 0, // PC_RLWIMI
IU, 1, 1, 0, 0, 0, // PC_SLW
IU, 1, 1, 0, 0, 0, // PC_SRW
IU, 1, 1, 0, 0, 0, // PC_SRAWI
IU, 1, 1, 0, 0, 0, // PC_SRAW
SRU, 1, 1, 0, 0, 0, // PC_CRAND
SRU, 1, 1, 0, 0, 0, // PC_CRANDC
SRU, 1, 1, 0, 0, 0, // PC_CREQV
SRU, 1, 1, 0, 0, 0, // PC_CRNAND
SRU, 1, 1, 0, 0, 0, // PC_CRNOR
SRU, 1, 1, 0, 0, 0, // PC_CROR
SRU, 1, 1, 0, 0, 0, // PC_CRORC
SRU, 1, 1, 0, 0, 0, // PC_CRXOR
SRU, 1, 1, 0, 0, 0, // PC_MCRF
SRU, 2, 2, 0, 0, 0, // PC_MTXER
SRU, 2, 2, 0, 0, 0, // PC_MTCTR
SRU, 2, 2, 0, 0, 0, // PC_MTLR
SRU, 1, 1, 0, 0, 0, // PC_MTCRF
SRU, 1, 1, 0, 0, 1, // PC_MTMSR
SRU, 1, 1, 0, 0, 1, // PC_MTSPR
SRU, 1, 1, 0, 0, 1, // PC_MFMSR
SRU, 1, 1, 0, 0, 1, // PC_MFSPR
SRU, 1, 1, 0, 0, 0, // PC_MFXER
SRU, 1, 1, 0, 0, 0, // PC_MFCTR
SRU, 1, 1, 0, 0, 0, // PC_MFLR
SRU, 1, 1, 0, 0, 0, // PC_MFCR
FPU1, 3, 1, 1, 1, 0, // PC_MFFS
FPU1, 3, 1, 1, 1, 0, // PC_MTFSF
SRU, 1, 1, 0, 0, 1, // PC_EIEIO
SRU, 1, 1, 0, 0, 1, // PC_ISYNC
SRU, 1, 1, 0, 0, 1, // PC_SYNC
SRU, 1, 1, 0, 0, 1, // PC_RFI
IU, 1, 1, 0, 0, 0, // PC_LI
IU, 1, 1, 0, 0, 0, // PC_LIS
IU, 1, 1, 0, 0, 0, // PC_MR
IU, 1, 1, 0, 0, 0, // PC_NOP
IU, 1, 1, 0, 0, 0, // PC_NOT
LSU1, 2, 1, 1, 0, 0, // PC_LFS
LSU1, 2, 1, 1, 0, 0, // PC_LFSU
LSU1, 2, 1, 1, 0, 0, // PC_LFSX
LSU1, 2, 1, 1, 0, 0, // PC_LFSUX
LSU1, 2, 1, 1, 0, 0, // PC_LFD
LSU1, 2, 1, 1, 0, 0, // PC_LFDU
LSU1, 2, 1, 1, 0, 0, // PC_LFDX
LSU1, 2, 1, 1, 0, 0, // PC_LFDUX
LSU1, 2, 1, 1, 0, 0, // PC_STFS
LSU1, 2, 1, 1, 0, 0, // PC_STFSU
LSU1, 2, 1, 1, 0, 0, // PC_STFSX
LSU1, 2, 1, 1, 0, 0, // PC_STFSUX
LSU1, 2, 1, 1, 0, 0, // PC_STFD
LSU1, 2, 1, 1, 0, 0, // PC_STFDU
LSU1, 2, 1, 1, 0, 0, // PC_STFDX
LSU1, 2, 1, 1, 0, 0, // PC_STFDUX
FPU1, 3, 1, 1, 1, 0, // PC_FMR
FPU1, 3, 1, 1, 1, 0, // PC_FABS
FPU1, 3, 1, 1, 1, 0, // PC_FNEG
FPU1, 3, 1, 1, 1, 0, // PC_FNABS
FPU1, 3, 1, 1, 1, 0, // PC_FADD
FPU1, 3, 1, 1, 1, 0, // PC_FADDS
FPU1, 3, 1, 1, 1, 0, // PC_FSUB
FPU1, 3, 1, 1, 1, 0, // PC_FSUBS
FPU1, 4, 2, 1, 1, 0, // PC_FMUL
FPU1, 3, 1, 1, 1, 0, // PC_FMULS
FPU1, 33, 33, 0, 0, 0, // PC_FDIV
FPU1, 18, 18, 0, 0, 0, // PC_FDIVS
FPU1, 4, 2, 1, 1, 0, // PC_FMADD
FPU1, 3, 1, 1, 1, 0, // PC_FMADDS
FPU1, 4, 2, 1, 1, 0, // PC_FMSUB
FPU1, 3, 1, 1, 1, 0, // PC_FMSUBS
FPU1, 4, 2, 1, 1, 0, // PC_FNMADD
FPU1, 3, 1, 1, 1, 0, // PC_FNMADDS
FPU1, 4, 2, 1, 1, 0, // PC_FNMSUB
FPU1, 3, 1, 1, 1, 0, // PC_FNMSUBS
FPU1, 18, 18, 0, 0, 0, // PC_FRES
FPU1, 3, 1, 1, 1, 0, // PC_FRSQRTE
FPU1, 3, 1, 1, 1, 0, // PC_FSEL
FPU1, 3, 1, 1, 1, 0, // PC_FRSP
FPU1, 3, 1, 1, 1, 0, // PC_FCTIW
FPU1, 3, 1, 1, 1, 0, // PC_FCTIWZ
FPU1, 5, 1, 1, 1, 0, // PC_FCMPU
FPU1, 5, 1, 1, 1, 0, // PC_FCMPO
LSU1, 1, 1, 0, 0, 0, // PC_LWARX
LSU1, 1, 1, 0, 0, 0, // PC_LSWI
LSU1, 1, 1, 0, 0, 0, // PC_LSWX
LSU1, 1, 1, 0, 0, 0, // PC_STFIWX
LSU1, 1, 1, 0, 0, 0, // PC_STSWI
LSU1, 1, 1, 0, 0, 0, // PC_STSWX
LSU1, 1, 1, 0, 0, 0, // PC_STWCX
IU, 1, 1, 0, 0, 1, // PC_ECIWX
IU, 1, 1, 0, 0, 1, // PC_ECOWX
IU, 1, 1, 0, 0, 0, // PC_DCBI
IU, 1, 1, 0, 0, 0, // PC_ICBI
IU, 1, 1, 0, 0, 0, // PC_MCRFS
IU, 1, 1, 0, 0, 0, // PC_MCRXR
IU, 1, 1, 0, 0, 0, // PC_MFTB
IU, 1, 1, 0, 0, 0, // PC_MFSR
IU, 1, 1, 0, 0, 0, // PC_MTSR
IU, 1, 1, 0, 0, 0, // PC_MFSRIN
IU, 1, 1, 0, 0, 0, // PC_MTSRIN
IU, 1, 1, 0, 0, 0, // PC_MTFSB0
IU, 1, 1, 0, 0, 0, // PC_MTFSB1
IU, 1, 1, 0, 0, 0, // PC_MTFSFI
IU, 1, 1, 0, 0, 1, // PC_SC
FPU1, 1, 1, 0, 0, 0, // PC_FSQRT
FPU1, 1, 1, 0, 0, 0, // PC_FSQRTS
IU, 1, 1, 0, 0, 0, // PC_TLBIA
IU, 1, 1, 0, 0, 0, // PC_TLBIE
IU, 1, 1, 0, 0, 0, // PC_TLBLD
IU, 1, 1, 0, 0, 0, // PC_TLBLI
IU, 1, 1, 0, 0, 0, // PC_TLBSYNC
IU, 1, 1, 0, 0, 1, // PC_TW
IU, 1, 1, 0, 0, 1, // PC_TRAP
IU, 1, 1, 0, 0, 1, // PC_TWI
IU, 1, 1, 0, 0, 1, // PC_OPWORD
IU, 1, 1, 0, 0, 0, // PC_MFROM
IU, 1, 1, 0, 0, 1, // PC_DSA
IU, 1, 1, 0, 0, 1, // PC_ESA
IU, 0, 0, 0, 0, 0, // PC_DCCCI
IU, 0, 0, 0, 0, 0, // PC_DCREAD
IU, 0, 0, 0, 0, 0, // PC_ICBT
IU, 0, 0, 0, 0, 0, // PC_ICCCI
IU, 0, 0, 0, 0, 0, // PC_ICREAD
IU, 0, 0, 0, 0, 0, // PC_RFCI
IU, 0, 0, 0, 0, 0, // PC_TLBRE
IU, 0, 0, 0, 0, 0, // PC_TLBSX
IU, 0, 0, 0, 0, 0, // PC_TLBWE
IU, 0, 0, 0, 0, 0, // PC_WRTEE
IU, 0, 0, 0, 0, 0, // PC_WRTEEI
IU, 0, 0, 0, 0, 0, // PC_MFDCR
IU, 0, 0, 0, 0, 0, // PC_MTDCR
IU, 0, 0, 0, 0, 0, // PC_DCBA
BPU, 0, 0, 0, 0, 0, // PC_DSS
BPU, 0, 0, 0, 0, 0, // PC_DSSALL
BPU, 0, 0, 0, 0, 0, // PC_DST
BPU, 0, 0, 0, 0, 0, // PC_DSTT
BPU, 0, 0, 0, 0, 0, // PC_DSTST
BPU, 0, 0, 0, 0, 0, // PC_DSTSTT
BPU, 0, 0, 0, 0, 0, // PC_LVEBX
BPU, 0, 0, 0, 0, 0, // PC_LVEHX
BPU, 0, 0, 0, 0, 0, // PC_LVEWX
BPU, 0, 0, 0, 0, 0, // PC_LVSL
BPU, 0, 0, 0, 0, 0, // PC_LVSR
BPU, 0, 0, 0, 0, 0, // PC_LVX
BPU, 0, 0, 0, 0, 0, // PC_LVXL
BPU, 0, 0, 0, 0, 0, // PC_STVEBX
BPU, 0, 0, 0, 0, 0, // PC_STVEHX
BPU, 0, 0, 0, 0, 0, // PC_STVEWX
BPU, 0, 0, 0, 0, 0, // PC_STVX
BPU, 0, 0, 0, 0, 0, // PC_STVXL
BPU, 0, 0, 0, 0, 0, // PC_MFVSCR
BPU, 0, 0, 0, 0, 0, // PC_MTVSCR
BPU, 0, 0, 0, 0, 0, // PC_VADDCUW
BPU, 0, 0, 0, 0, 0, // PC_VADDFP
BPU, 0, 0, 0, 0, 0, // PC_VADDSBS
BPU, 0, 0, 0, 0, 0, // PC_VADDSHS
BPU, 0, 0, 0, 0, 0, // PC_VADDSWS
BPU, 0, 0, 0, 0, 0, // PC_VADDUBM
BPU, 0, 0, 0, 0, 0, // PC_VADDUBS
BPU, 0, 0, 0, 0, 0, // PC_VADDUHM
BPU, 0, 0, 0, 0, 0, // PC_VADDUHS
BPU, 0, 0, 0, 0, 0, // PC_VADDUWM
BPU, 0, 0, 0, 0, 0, // PC_VADDUWS
BPU, 0, 0, 0, 0, 0, // PC_VAND
BPU, 0, 0, 0, 0, 0, // PC_VANDC
BPU, 0, 0, 0, 0, 0, // PC_VAVGSB
BPU, 0, 0, 0, 0, 0, // PC_VAVGSH
BPU, 0, 0, 0, 0, 0, // PC_VAVGSW
BPU, 0, 0, 0, 0, 0, // PC_VAVGUB
BPU, 0, 0, 0, 0, 0, // PC_VAVGUH
BPU, 0, 0, 0, 0, 0, // PC_VAVGUW
BPU, 0, 0, 0, 0, 0, // PC_VCFSX
BPU, 0, 0, 0, 0, 0, // PC_VCFUX
BPU, 0, 0, 0, 0, 0, // PC_VCMPBFP
BPU, 0, 0, 0, 0, 0, // PC_VCMPEQFP
BPU, 0, 0, 0, 0, 0, // PC_VCMPEQUB
BPU, 0, 0, 0, 0, 0, // PC_VCMPEQUH
BPU, 0, 0, 0, 0, 0, // PC_VCMPEQUW
BPU, 0, 0, 0, 0, 0, // PC_VCMPGEFP
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTFP
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTSB
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTSH
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTSW
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTUB
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTUH
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTUW
BPU, 0, 0, 0, 0, 0, // PC_VCTSXS
BPU, 0, 0, 0, 0, 0, // PC_VCTUXS
BPU, 0, 0, 0, 0, 0, // PC_VEXPTEFP
BPU, 0, 0, 0, 0, 0, // PC_VLOGEFP
BPU, 0, 0, 0, 0, 0, // PC_VMAXFP
BPU, 0, 0, 0, 0, 0, // PC_VMAXSB
BPU, 0, 0, 0, 0, 0, // PC_VMAXSH
BPU, 0, 0, 0, 0, 0, // PC_VMAXSW
BPU, 0, 0, 0, 0, 0, // PC_VMAXUB
BPU, 0, 0, 0, 0, 0, // PC_VMAXUH
BPU, 0, 0, 0, 0, 0, // PC_VMAXUW
BPU, 0, 0, 0, 0, 0, // PC_VMINFP
BPU, 0, 0, 0, 0, 0, // PC_VMINSB
BPU, 0, 0, 0, 0, 0, // PC_VMINSH
BPU, 0, 0, 0, 0, 0, // PC_VMINSW
BPU, 0, 0, 0, 0, 0, // PC_VMINUB
BPU, 0, 0, 0, 0, 0, // PC_VMINUH
BPU, 0, 0, 0, 0, 0, // PC_VMINUW
BPU, 0, 0, 0, 0, 0, // PC_VMRGHB
BPU, 0, 0, 0, 0, 0, // PC_VMRGHH
BPU, 0, 0, 0, 0, 0, // PC_VMRGHW
BPU, 0, 0, 0, 0, 0, // PC_VMRGLB
BPU, 0, 0, 0, 0, 0, // PC_VMRGLH
BPU, 0, 0, 0, 0, 0, // PC_VMRGLW
BPU, 0, 0, 0, 0, 0, // PC_VMULESB
BPU, 0, 0, 0, 0, 0, // PC_VMULESH
BPU, 0, 0, 0, 0, 0, // PC_VMULEUB
BPU, 0, 0, 0, 0, 0, // PC_VMULEUH
BPU, 0, 0, 0, 0, 0, // PC_VMULOSB
BPU, 0, 0, 0, 0, 0, // PC_VMULOSH
BPU, 0, 0, 0, 0, 0, // PC_VMULOUB
BPU, 0, 0, 0, 0, 0, // PC_VMULOUH
BPU, 0, 0, 0, 0, 0, // PC_VNOR
BPU, 0, 0, 0, 0, 0, // PC_VOR
BPU, 0, 0, 0, 0, 0, // PC_VPKPX
BPU, 0, 0, 0, 0, 0, // PC_VPKSHSS
BPU, 0, 0, 0, 0, 0, // PC_VPKSHUS
BPU, 0, 0, 0, 0, 0, // PC_VPKSWSS
BPU, 0, 0, 0, 0, 0, // PC_VPKSWUS
BPU, 0, 0, 0, 0, 0, // PC_VPKUHUM
BPU, 0, 0, 0, 0, 0, // PC_VPKUHUS
BPU, 0, 0, 0, 0, 0, // PC_VPKUWUM
BPU, 0, 0, 0, 0, 0, // PC_VPKUWUS
BPU, 0, 0, 0, 0, 0, // PC_VREFP
BPU, 0, 0, 0, 0, 0, // PC_VRFIM
BPU, 0, 0, 0, 0, 0, // PC_VRFIN
BPU, 0, 0, 0, 0, 0, // PC_VRFIP
BPU, 0, 0, 0, 0, 0, // PC_VRFIZ
BPU, 0, 0, 0, 0, 0, // PC_VRLB
BPU, 0, 0, 0, 0, 0, // PC_VRLH
BPU, 0, 0, 0, 0, 0, // PC_VRLW
BPU, 0, 0, 0, 0, 0, // PC_VRSQRTEFP
BPU, 0, 0, 0, 0, 0, // PC_VSL
BPU, 0, 0, 0, 0, 0, // PC_VSLB
BPU, 0, 0, 0, 0, 0, // PC_VSLH
BPU, 0, 0, 0, 0, 0, // PC_VSLO
BPU, 0, 0, 0, 0, 0, // PC_VSLW
BPU, 0, 0, 0, 0, 0, // PC_VSPLTB
BPU, 0, 0, 0, 0, 0, // PC_VSPLTH
BPU, 0, 0, 0, 0, 0, // PC_VSPLTW
BPU, 0, 0, 0, 0, 0, // PC_VSPLTISB
BPU, 0, 0, 0, 0, 0, // PC_VSPLTISH
BPU, 0, 0, 0, 0, 0, // PC_VSPLTISW
BPU, 0, 0, 0, 0, 0, // PC_VSR
BPU, 0, 0, 0, 0, 0, // PC_VSRAB
BPU, 0, 0, 0, 0, 0, // PC_VSRAH
BPU, 0, 0, 0, 0, 0, // PC_VSRAW
BPU, 0, 0, 0, 0, 0, // PC_VSRB
BPU, 0, 0, 0, 0, 0, // PC_VSRH
BPU, 0, 0, 0, 0, 0, // PC_VSRO
BPU, 0, 0, 0, 0, 0, // PC_VSRW
BPU, 0, 0, 0, 0, 0, // PC_VSUBCUW
BPU, 0, 0, 0, 0, 0, // PC_VSUBFP
BPU, 0, 0, 0, 0, 0, // PC_VSUBSBS
BPU, 0, 0, 0, 0, 0, // PC_VSUBSHS
BPU, 0, 0, 0, 0, 0, // PC_VSUBSWS
BPU, 0, 0, 0, 0, 0, // PC_VSUBUBM
BPU, 0, 0, 0, 0, 0, // PC_VSUBUBS
BPU, 0, 0, 0, 0, 0, // PC_VSUBUHM
BPU, 0, 0, 0, 0, 0, // PC_VSUBUHS
BPU, 0, 0, 0, 0, 0, // PC_VSUBUWM
BPU, 0, 0, 0, 0, 0, // PC_VSUBUWS
BPU, 0, 0, 0, 0, 0, // PC_VSUMSWS
BPU, 0, 0, 0, 0, 0, // PC_VSUM2SWS
BPU, 0, 0, 0, 0, 0, // PC_VSUM4SBS
BPU, 0, 0, 0, 0, 0, // PC_VSUM4SHS
BPU, 0, 0, 0, 0, 0, // PC_VSUM4UBS
BPU, 0, 0, 0, 0, 0, // PC_VUPKHPX
BPU, 0, 0, 0, 0, 0, // PC_VUPKHSB
BPU, 0, 0, 0, 0, 0, // PC_VUPKHSH
BPU, 0, 0, 0, 0, 0, // PC_VUPKLPX
BPU, 0, 0, 0, 0, 0, // PC_VUPKLSB
BPU, 0, 0, 0, 0, 0, // PC_VUPKLSH
BPU, 0, 0, 0, 0, 0, // PC_VXOR
BPU, 0, 0, 0, 0, 0, // PC_VMADDFP
BPU, 0, 0, 0, 0, 0, // PC_VMHADDSHS
BPU, 0, 0, 0, 0, 0, // PC_VMHRADDSHS
BPU, 0, 0, 0, 0, 0, // PC_VMLADDUHM
BPU, 0, 0, 0, 0, 0, // PC_VMSUMMBM
BPU, 0, 0, 0, 0, 0, // PC_VMSUMSHM
BPU, 0, 0, 0, 0, 0, // PC_VMSUMSHS
BPU, 0, 0, 0, 0, 0, // PC_VMSUMUBM
BPU, 0, 0, 0, 0, 0, // PC_VMSUMUHM
BPU, 0, 0, 0, 0, 0, // PC_VMSUMUHS
BPU, 0, 0, 0, 0, 0, // PC_VNMSUBFP
BPU, 0, 0, 0, 0, 0, // PC_VPERM
BPU, 0, 0, 0, 0, 0, // PC_VSEL
BPU, 0, 0, 0, 0, 0, // PC_VSLDOI
BPU, 0, 0, 0, 0, 0, // PC_VMR
BPU, 0, 0, 0, 0, 0, // PC_VMRP
BPU, 0, 0, 0, 0, 0, // PC_SLE
BPU, 0, 0, 0, 0, 0, // PC_SLEQ
BPU, 0, 0, 0, 0, 0, // PC_SLIQ
BPU, 0, 0, 0, 0, 0, // PC_SLLIQ
BPU, 0, 0, 0, 0, 0, // PC_SLLQ
BPU, 0, 0, 0, 0, 0, // PC_SLQ
BPU, 0, 0, 0, 0, 0, // PC_SRAIQ
BPU, 0, 0, 0, 0, 0, // PC_SRAQ
BPU, 0, 0, 0, 0, 0, // PC_SRE
BPU, 0, 0, 0, 0, 0, // PC_SREA
BPU, 0, 0, 0, 0, 0, // PC_SREQ
BPU, 0, 0, 0, 0, 0, // PC_SRIQ
BPU, 0, 0, 0, 0, 0, // PC_SRLIQ
BPU, 0, 0, 0, 0, 0, // PC_SRLQ
BPU, 0, 0, 0, 0, 0, // PC_SRQ
BPU, 0, 0, 0, 0, 0, // PC_MASKG
BPU, 0, 0, 0, 0, 0, // PC_MASKIR
BPU, 0, 0, 0, 0, 0, // PC_LSCBX
BPU, 0, 0, 0, 0, 0, // PC_DIV
BPU, 0, 0, 0, 0, 0, // PC_DIVS
BPU, 0, 0, 0, 0, 0, // PC_DOZ
BPU, 0, 0, 0, 0, 0, // PC_MUL
BPU, 0, 0, 0, 0, 0, // PC_NABS
BPU, 0, 0, 0, 0, 0, // PC_ABS
BPU, 0, 0, 0, 0, 0, // PC_CLCS
BPU, 0, 0, 0, 0, 0, // PC_DOZI
BPU, 0, 0, 0, 0, 0, // PC_RLMI
BPU, 0, 0, 0, 0, 0, // PC_RRIB
};
static void advance(int firstStage, int oldStage, int newStage) {
PCode *instr = pipeline[oldStage].instr;
int cycles = instruction_timing[instr->op].cycles[newStage - firstStage];
pipeline[newStage].instr = instr;
pipeline[newStage].remaining = cycles;
pipeline[oldStage].instr = NULL;
}
static void assign_completion_buffer(PCode *instr) {
completionbuffers.used++;
completionbuffers.free--;
completionbuffers.entries[completionbuffers.nextFreeSlot].instr = instr;
completionbuffers.entries[completionbuffers.nextFreeSlot].completed = 0;
completionbuffers.nextFreeSlot = (completionbuffers.nextFreeSlot + 1) % MaxEntries;
}
static void complete_instruction(int stage) {
PCode *instr = pipeline[stage].instr;
int buf = 0;
while (buf < MaxEntries && completionbuffers.entries[buf].instr != instr)
buf++;
completionbuffers.entries[buf].completed = 1;
pipeline[stage].instr = NULL;
}
static void retire_instruction(void) {
completionbuffers.entries[completionbuffers.nextToRetire].instr = NULL;
completionbuffers.used--;
completionbuffers.free++;
completionbuffers.nextToRetire = (completionbuffers.nextToRetire + 1) % MaxEntries;
}
static int latency(PCode *instr) {
int cycles = instruction_timing[instr->op].latency;
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
cycles += 2;
if (instr->op == PC_LMW || instr->op == PC_STMW)
cycles += instr->argCount - 2;
return cycles;
}
static void initialize(void) {
int stage;
int i;
for (stage = 0; stage < NumStages; stage++)
pipeline[stage].instr = NULL;
completionbuffers.free = MaxEntries;
completionbuffers.used = 0;
completionbuffers.nextToRetire = 0;
completionbuffers.nextFreeSlot = 0;
for (i = 0; i < MaxEntries; i++)
completionbuffers.entries[i].instr = NULL;
}
static int can_issue(PCode *instr) {
int stage;
if (completionbuffers.free == 0)
return 0;
stage = instruction_timing[instr->op].stage;
if (pipeline[stage].instr) {
if (stage == IU) {
switch (instr->op) {
case PC_ADD:
case PC_ADDC:
case PC_ADDI:
case PC_ADDIS:
case PC_CMPI:
case PC_CMP:
case PC_CMPLI:
case PC_CMPL:
if (is_dependent(instr, pipeline[IU].instr, RegClass_GPR))
return 0;
if (!pipeline[SRU].instr)
return 1;
}
}
return 0;
}
if ((instr->flags & fIsWrite) && pipeline[LSU2].instr && (pipeline[LSU2].instr->flags & fIsWrite))
return 0;
return 1;
}
static void issue(PCode *instr) {
int stage = instruction_timing[instr->op].stage;
int cycles = instruction_timing[instr->op].cycles[0];
if (stage == IU && pipeline[IU].instr)
stage = SRU;
assign_completion_buffer(instr);
pipeline[stage].instr = instr;
pipeline[stage].remaining = cycles;
}
static void advance_clock(void) {
int stage;
for (stage = 0; stage < NumStages; stage++) {
if (pipeline[stage].instr && pipeline[stage].remaining)
--pipeline[stage].remaining;
}
if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) {
retire_instruction();
if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) {
retire_instruction();
}
}
if (pipeline[IU].instr && pipeline[IU].remaining == 0)
complete_instruction(IU);
if (pipeline[LSU2].instr && pipeline[LSU2].remaining == 0)
complete_instruction(LSU2);
if (pipeline[FPU3].instr && pipeline[FPU3].remaining == 0)
complete_instruction(FPU3);
if (pipeline[SRU].instr && pipeline[SRU].remaining == 0)
complete_instruction(SRU);
if (pipeline[BPU].instr && pipeline[BPU].remaining == 0)
complete_instruction(BPU);
if (
pipeline[FPU1].instr &&
pipeline[FPU1].remaining == 0 &&
(pipeline[FPU1].instr->op == PC_FDIV || pipeline[FPU1].instr->op == PC_FDIVS)
)
complete_instruction(FPU1);
if (pipeline[FPU2].instr && pipeline[FPU2].remaining == 0 && !pipeline[FPU3].instr)
advance(FPU1, FPU2, FPU3);
if (pipeline[FPU1].instr && pipeline[FPU1].remaining == 0 && !pipeline[FPU2].instr)
advance(FPU1, FPU1, FPU2);
if (pipeline[LSU1].instr && pipeline[LSU1].remaining == 0 && !pipeline[LSU2].instr)
advance(LSU1, LSU1, LSU2);
}
static int serializes(PCode *instr) {
return instruction_timing[instr->op].serializes;
}
MachineInfo machine603e = {
2,
1,
0,
&latency,
&initialize,
&can_issue,
&issue,
&advance_clock,
&serializes,
&default_uses_vpermute_unit
};

View File

@@ -0,0 +1,670 @@
#include "compiler/Scheduler.h"
#include "compiler/PCode.h"
#include "compiler/PCodeInfo.h"
// https://archive.org/details/bitsavers_motorolaPosManualNov94_22719504
typedef enum Stage {
SCIU, // Single-Cycle Integer Unit 1
SCIU2, // Single-Cycle Integer Unit 2
MCIU, // Multiple-Cycle Integer Unit
FPU1, // Floating Point Unit
FPU2,
FPU3,
LSU1, // Load/Store Unit
LSU2,
BPU, // Branch Prediction Unit
NumStages
} Stage;
static struct {
// the instruction currently in this pipeline stage
PCode *instr;
// how many cycles are left for this instruction to finish
int remaining;
} pipeline[NumStages];
enum {
MaxEntries = 16
};
static struct {
// how many entries remain unused in the queue
unsigned int free;
// how many entries are currently used in the queue
unsigned int used;
// the index of the next instruction that will be retired
unsigned int nextToRetire;
// the index of the next free slot that will be used when an instruction is dispatched
unsigned int nextFreeSlot;
// circular array of entries in the completion queue
struct {
PCode *instr;
int completed;
} entries[MaxEntries];
} completionbuffers;
static PCode *sciu_completed_instruction;
static PCode *sciu2_completed_instruction;
static struct {
// the initial stage for this instruction
Stage stage;
// the total amount of cycles required by this instruction
char latency;
// how long it takes to finish each stage
char cycles[3];
// does this instruction serialise?
char serializes;
} instruction_timing[OPCODE_MAX] = {
BPU, 0, 0, 0, 0, 1, // PC_B
BPU, 0, 0, 0, 0, 1, // PC_BL
BPU, 0, 0, 0, 0, 1, // PC_BC
BPU, 0, 0, 0, 0, 1, // PC_BCLR
BPU, 0, 0, 0, 0, 1, // PC_BCCTR
BPU, 0, 0, 0, 0, 1, // PC_BT
BPU, 0, 0, 0, 0, 1, // PC_BTLR
BPU, 0, 0, 0, 0, 1, // PC_BTCTR
BPU, 0, 0, 0, 0, 1, // PC_BF
BPU, 0, 0, 0, 0, 1, // PC_BFLR
BPU, 0, 0, 0, 0, 1, // PC_BFCTR
BPU, 0, 0, 0, 0, 1, // PC_BDNZ
BPU, 0, 0, 0, 0, 1, // PC_BDNZT
BPU, 0, 0, 0, 0, 1, // PC_BDNZF
BPU, 0, 0, 0, 0, 1, // PC_BDZ
BPU, 0, 0, 0, 0, 1, // PC_BDZT
BPU, 0, 0, 0, 0, 1, // PC_BDZF
BPU, 0, 0, 0, 0, 1, // PC_BLR
BPU, 0, 0, 0, 0, 1, // PC_BCTR
BPU, 0, 0, 0, 0, 1, // PC_BCTRL
BPU, 0, 0, 0, 0, 1, // PC_BLRL
LSU1, 2, 1, 1, 0, 0, // PC_LBZ
LSU1, 2, 1, 1, 0, 0, // PC_LBZU
LSU1, 2, 1, 1, 0, 0, // PC_LBZX
LSU1, 2, 1, 1, 0, 0, // PC_LBZUX
LSU1, 2, 1, 1, 0, 0, // PC_LHZ
LSU1, 2, 1, 1, 0, 0, // PC_LHZU
LSU1, 2, 1, 1, 0, 0, // PC_LHZX
LSU1, 2, 1, 1, 0, 0, // PC_LHZUX
LSU1, 2, 1, 1, 0, 0, // PC_LHA
LSU1, 2, 1, 1, 0, 0, // PC_LHAU
LSU1, 2, 1, 1, 0, 0, // PC_LHAX
LSU1, 2, 1, 1, 0, 0, // PC_LHAUX
LSU1, 2, 1, 1, 0, 0, // PC_LHBRX
LSU1, 2, 1, 1, 0, 0, // PC_LWZ
LSU1, 2, 1, 1, 0, 0, // PC_LWZU
LSU1, 2, 1, 1, 0, 0, // PC_LWZX
LSU1, 2, 1, 1, 0, 0, // PC_LWZUX
LSU1, 2, 1, 1, 0, 0, // PC_LWBRX
LSU1, 2, 1, 1, 0, 0, // PC_LMW
LSU1, 3, 1, 1, 0, 0, // PC_STB
LSU1, 3, 1, 1, 0, 0, // PC_STBU
LSU1, 3, 1, 1, 0, 0, // PC_STBX
LSU1, 3, 1, 1, 0, 0, // PC_STBUX
LSU1, 3, 1, 1, 0, 0, // PC_STH
LSU1, 3, 1, 1, 0, 0, // PC_STHU
LSU1, 3, 1, 1, 0, 0, // PC_STHX
LSU1, 3, 1, 1, 0, 0, // PC_STHUX
LSU1, 3, 1, 1, 0, 0, // PC_STHBRX
LSU1, 3, 1, 1, 0, 0, // PC_STW
LSU1, 3, 1, 1, 0, 0, // PC_STWU
LSU1, 3, 1, 1, 0, 0, // PC_STWX
LSU1, 3, 1, 1, 0, 0, // PC_STWUX
LSU1, 3, 1, 1, 0, 0, // PC_STWBRX
LSU1, 2, 1, 1, 0, 0, // PC_STMW
LSU1, 2, 1, 1, 0, 0, // PC_DCBF
LSU1, 2, 1, 1, 0, 0, // PC_DCBST
LSU1, 2, 1, 1, 0, 0, // PC_DCBT
LSU1, 2, 1, 1, 0, 0, // PC_DCBTST
LSU1, 2, 1, 1, 0, 0, // PC_DCBZ
SCIU, 1, 1, 0, 0, 0, // PC_ADD
SCIU, 1, 1, 0, 0, 0, // PC_ADDC
SCIU, 1, 1, 0, 0, 0, // PC_ADDE
SCIU, 1, 1, 0, 0, 0, // PC_ADDI
SCIU, 1, 1, 0, 0, 0, // PC_ADDIC
SCIU, 1, 1, 0, 0, 0, // PC_ADDICR
SCIU, 1, 1, 0, 0, 0, // PC_ADDIS
SCIU, 1, 1, 0, 0, 0, // PC_ADDME
SCIU, 1, 1, 0, 0, 0, // PC_ADDZE
MCIU, 20, 20, 0, 0, 0, // PC_DIVW
MCIU, 20, 20, 0, 0, 0, // PC_DIVWU
MCIU, 4, 4, 0, 0, 0, // PC_MULHW
MCIU, 4, 4, 0, 0, 0, // PC_MULHWU
MCIU, 3, 3, 0, 0, 0, // PC_MULLI
MCIU, 4, 4, 0, 0, 0, // PC_MULLW
SCIU, 1, 1, 0, 0, 0, // PC_NEG
SCIU, 1, 1, 0, 0, 0, // PC_SUBF
SCIU, 1, 1, 0, 0, 0, // PC_SUBFC
SCIU, 1, 1, 0, 0, 0, // PC_SUBFE
SCIU, 1, 1, 0, 0, 0, // PC_SUBFIC
SCIU, 1, 1, 0, 0, 0, // PC_SUBFME
SCIU, 1, 1, 0, 0, 0, // PC_SUBFZE
SCIU, 3, 1, 0, 0, 0, // PC_CMPI
SCIU, 3, 1, 0, 0, 0, // PC_CMP
SCIU, 3, 1, 0, 0, 0, // PC_CMPLI
SCIU, 3, 1, 0, 0, 0, // PC_CMPL
SCIU, 1, 1, 0, 0, 0, // PC_ANDI
SCIU, 1, 1, 0, 0, 0, // PC_ANDIS
SCIU, 1, 1, 0, 0, 0, // PC_ORI
SCIU, 1, 1, 0, 0, 0, // PC_ORIS
SCIU, 1, 1, 0, 0, 0, // PC_XORI
SCIU, 1, 1, 0, 0, 0, // PC_XORIS
SCIU, 1, 1, 0, 0, 0, // PC_AND
SCIU, 1, 1, 0, 0, 0, // PC_OR
SCIU, 1, 1, 0, 0, 0, // PC_XOR
SCIU, 1, 1, 0, 0, 0, // PC_NAND
SCIU, 1, 1, 0, 0, 0, // PC_NOR
SCIU, 1, 1, 0, 0, 0, // PC_EQV
SCIU, 1, 1, 0, 0, 0, // PC_ANDC
SCIU, 1, 1, 0, 0, 0, // PC_ORC
SCIU, 1, 1, 0, 0, 0, // PC_EXTSB
SCIU, 1, 1, 0, 0, 0, // PC_EXTSH
SCIU, 1, 1, 0, 0, 0, // PC_CNTLZW
SCIU, 1, 1, 0, 0, 0, // PC_RLWINM
SCIU, 1, 1, 0, 0, 0, // PC_RLWNM
SCIU, 1, 1, 0, 0, 0, // PC_RLWIMI
SCIU, 1, 1, 0, 0, 0, // PC_SLW
SCIU, 1, 1, 0, 0, 0, // PC_SRW
SCIU, 1, 1, 0, 0, 0, // PC_SRAWI
SCIU, 1, 1, 0, 0, 0, // PC_SRAW
BPU, 1, 1, 0, 0, 0, // PC_CRAND
BPU, 1, 1, 0, 0, 0, // PC_CRANDC
BPU, 1, 1, 0, 0, 0, // PC_CREQV
BPU, 1, 1, 0, 0, 0, // PC_CRNAND
BPU, 1, 1, 0, 0, 0, // PC_CRNOR
BPU, 1, 1, 0, 0, 0, // PC_CROR
BPU, 1, 1, 0, 0, 0, // PC_CRORC
BPU, 1, 1, 0, 0, 0, // PC_CRXOR
BPU, 1, 1, 0, 0, 0, // PC_MCRF
MCIU, 1, 1, 0, 0, 0, // PC_MTXER
MCIU, 1, 1, 0, 0, 0, // PC_MTCTR
MCIU, 1, 1, 0, 0, 0, // PC_MTLR
MCIU, 1, 1, 0, 0, 0, // PC_MTCRF
MCIU, 1, 1, 0, 0, 0, // PC_MTMSR
MCIU, 1, 1, 0, 0, 0, // PC_MTSPR
MCIU, 1, 1, 0, 0, 0, // PC_MFMSR
MCIU, 1, 1, 0, 0, 0, // PC_MFSPR
MCIU, 3, 3, 0, 0, 0, // PC_MFXER
MCIU, 3, 3, 0, 0, 0, // PC_MFCTR
MCIU, 3, 3, 0, 0, 0, // PC_MFLR
MCIU, 3, 3, 0, 0, 0, // PC_MFCR
FPU1, 3, 1, 1, 1, 0, // PC_MFFS
FPU1, 3, 1, 1, 1, 0, // PC_MTFSF
LSU1, 1, 0, 0, 0, 1, // PC_EIEIO
LSU1, 1, 0, 0, 0, 1, // PC_ISYNC
LSU1, 1, 0, 0, 0, 1, // PC_SYNC
LSU1, 1, 1, 0, 0, 1, // PC_RFI
SCIU, 1, 1, 0, 0, 0, // PC_LI
SCIU, 1, 1, 0, 0, 0, // PC_LIS
SCIU, 1, 1, 0, 0, 0, // PC_MR
SCIU, 1, 1, 0, 0, 0, // PC_NOP
SCIU, 1, 1, 0, 0, 0, // PC_NOT
LSU1, 3, 1, 1, 0, 0, // PC_LFS
LSU1, 3, 1, 1, 0, 0, // PC_LFSU
LSU1, 3, 1, 1, 0, 0, // PC_LFSX
LSU1, 3, 1, 1, 0, 0, // PC_LFSUX
LSU1, 3, 1, 1, 0, 0, // PC_LFD
LSU1, 3, 1, 1, 0, 0, // PC_LFDU
LSU1, 3, 1, 1, 0, 0, // PC_LFDX
LSU1, 3, 1, 1, 0, 0, // PC_LFDUX
LSU1, 3, 1, 1, 0, 0, // PC_STFS
LSU1, 3, 1, 1, 0, 0, // PC_STFSU
LSU1, 3, 1, 1, 0, 0, // PC_STFSX
LSU1, 3, 1, 1, 0, 0, // PC_STFSUX
LSU1, 3, 1, 1, 0, 0, // PC_STFD
LSU1, 3, 1, 1, 0, 0, // PC_STFDU
LSU1, 3, 1, 1, 0, 0, // PC_STFDX
LSU1, 3, 1, 1, 0, 0, // PC_STFDUX
FPU1, 3, 1, 1, 1, 0, // PC_FMR
FPU1, 3, 1, 1, 1, 0, // PC_FABS
FPU1, 3, 1, 1, 1, 0, // PC_FNEG
FPU1, 3, 1, 1, 1, 0, // PC_FNABS
FPU1, 3, 1, 1, 1, 0, // PC_FADD
FPU1, 3, 1, 1, 1, 0, // PC_FADDS
FPU1, 3, 1, 1, 1, 0, // PC_FSUB
FPU1, 3, 1, 1, 1, 0, // PC_FSUBS
FPU1, 3, 1, 1, 1, 0, // PC_FMUL
FPU1, 3, 1, 1, 1, 0, // PC_FMULS
FPU1, 32, 32, 0, 0, 0, // PC_FDIV
FPU1, 18, 18, 0, 0, 0, // PC_FDIVS
FPU1, 3, 1, 1, 1, 0, // PC_FMADD
FPU1, 3, 1, 1, 1, 0, // PC_FMADDS
FPU1, 3, 1, 1, 1, 0, // PC_FMSUB
FPU1, 3, 1, 1, 1, 0, // PC_FMSUBS
FPU1, 3, 1, 1, 1, 0, // PC_FNMADD
FPU1, 3, 1, 1, 1, 0, // PC_FNMADDS
FPU1, 3, 1, 1, 1, 0, // PC_FNMSUB
FPU1, 3, 1, 1, 1, 0, // PC_FNMSUBS
FPU1, 18, 18, 0, 0, 0, // PC_FRES
FPU1, 3, 1, 1, 1, 0, // PC_FRSQRTE
FPU1, 3, 1, 1, 1, 0, // PC_FSEL
FPU1, 3, 1, 1, 1, 0, // PC_FRSP
FPU1, 3, 1, 1, 1, 0, // PC_FCTIW
FPU1, 3, 1, 1, 1, 0, // PC_FCTIWZ
FPU1, 5, 1, 1, 1, 0, // PC_FCMPU
FPU1, 5, 1, 1, 1, 0, // PC_FCMPO
LSU1, 1, 1, 0, 0, 0, // PC_LWARX
LSU1, 1, 1, 0, 0, 0, // PC_LSWI
LSU1, 1, 1, 0, 0, 0, // PC_LSWX
LSU1, 1, 1, 0, 0, 0, // PC_STFIWX
LSU1, 1, 1, 0, 0, 0, // PC_STSWI
LSU1, 1, 1, 0, 0, 0, // PC_STSWX
LSU1, 1, 1, 0, 0, 0, // PC_STWCX
MCIU, 1, 1, 0, 0, 1, // PC_ECIWX
MCIU, 1, 1, 0, 0, 1, // PC_ECOWX
MCIU, 1, 1, 0, 0, 0, // PC_DCBI
MCIU, 1, 1, 0, 0, 0, // PC_ICBI
MCIU, 1, 1, 0, 0, 0, // PC_MCRFS
MCIU, 1, 1, 0, 0, 0, // PC_MCRXR
MCIU, 1, 1, 0, 0, 0, // PC_MFTB
MCIU, 1, 1, 0, 0, 0, // PC_MFSR
MCIU, 1, 1, 0, 0, 0, // PC_MTSR
MCIU, 1, 1, 0, 0, 0, // PC_MFSRIN
MCIU, 1, 1, 0, 0, 0, // PC_MTSRIN
MCIU, 1, 1, 0, 0, 0, // PC_MTFSB0
MCIU, 1, 1, 0, 0, 0, // PC_MTFSB1
MCIU, 1, 1, 0, 0, 0, // PC_MTFSFI
MCIU, 1, 1, 0, 0, 1, // PC_SC
FPU1, 1, 1, 0, 0, 0, // PC_FSQRT
FPU1, 1, 1, 0, 0, 0, // PC_FSQRTS
MCIU, 1, 1, 0, 0, 0, // PC_TLBIA
MCIU, 1, 1, 0, 0, 0, // PC_TLBIE
MCIU, 1, 1, 0, 0, 0, // PC_TLBLD
MCIU, 1, 1, 0, 0, 0, // PC_TLBLI
MCIU, 1, 1, 0, 0, 0, // PC_TLBSYNC
MCIU, 1, 1, 0, 0, 1, // PC_TW
MCIU, 1, 1, 0, 0, 1, // PC_TRAP
MCIU, 1, 1, 0, 0, 1, // PC_TWI
MCIU, 1, 1, 0, 0, 1, // PC_OPWORD
MCIU, 1, 1, 0, 0, 0, // PC_MFROM
MCIU, 1, 1, 0, 0, 1, // PC_DSA
MCIU, 1, 1, 0, 0, 1, // PC_ESA
MCIU, 0, 0, 0, 0, 0, // PC_DCCCI
MCIU, 0, 0, 0, 0, 0, // PC_DCREAD
MCIU, 0, 0, 0, 0, 0, // PC_ICBT
MCIU, 0, 0, 0, 0, 0, // PC_ICCCI
MCIU, 0, 0, 0, 0, 0, // PC_ICREAD
MCIU, 0, 0, 0, 0, 0, // PC_RFCI
MCIU, 0, 0, 0, 0, 0, // PC_TLBRE
MCIU, 0, 0, 0, 0, 0, // PC_TLBSX
MCIU, 0, 0, 0, 0, 0, // PC_TLBWE
MCIU, 0, 0, 0, 0, 0, // PC_WRTEE
MCIU, 0, 0, 0, 0, 0, // PC_WRTEEI
MCIU, 0, 0, 0, 0, 0, // PC_MFDCR
MCIU, 0, 0, 0, 0, 0, // PC_MTDCR
MCIU, 0, 0, 0, 0, 0, // PC_DCBA
SCIU, 0, 0, 0, 0, 0, // PC_DSS
SCIU, 0, 0, 0, 0, 0, // PC_DSSALL
SCIU, 0, 0, 0, 0, 0, // PC_DST
SCIU, 0, 0, 0, 0, 0, // PC_DSTT
SCIU, 0, 0, 0, 0, 0, // PC_DSTST
SCIU, 0, 0, 0, 0, 0, // PC_DSTSTT
SCIU, 0, 0, 0, 0, 0, // PC_LVEBX
SCIU, 0, 0, 0, 0, 0, // PC_LVEHX
SCIU, 0, 0, 0, 0, 0, // PC_LVEWX
SCIU, 0, 0, 0, 0, 0, // PC_LVSL
SCIU, 0, 0, 0, 0, 0, // PC_LVSR
SCIU, 0, 0, 0, 0, 0, // PC_LVX
SCIU, 0, 0, 0, 0, 0, // PC_LVXL
SCIU, 0, 0, 0, 0, 0, // PC_STVEBX
SCIU, 0, 0, 0, 0, 0, // PC_STVEHX
SCIU, 0, 0, 0, 0, 0, // PC_STVEWX
SCIU, 0, 0, 0, 0, 0, // PC_STVX
SCIU, 0, 0, 0, 0, 0, // PC_STVXL
SCIU, 0, 0, 0, 0, 0, // PC_MFVSCR
SCIU, 0, 0, 0, 0, 0, // PC_MTVSCR
SCIU, 0, 0, 0, 0, 0, // PC_VADDCUW
SCIU, 0, 0, 0, 0, 0, // PC_VADDFP
SCIU, 0, 0, 0, 0, 0, // PC_VADDSBS
SCIU, 0, 0, 0, 0, 0, // PC_VADDSHS
SCIU, 0, 0, 0, 0, 0, // PC_VADDSWS
SCIU, 0, 0, 0, 0, 0, // PC_VADDUBM
SCIU, 0, 0, 0, 0, 0, // PC_VADDUBS
SCIU, 0, 0, 0, 0, 0, // PC_VADDUHM
SCIU, 0, 0, 0, 0, 0, // PC_VADDUHS
SCIU, 0, 0, 0, 0, 0, // PC_VADDUWM
SCIU, 0, 0, 0, 0, 0, // PC_VADDUWS
SCIU, 0, 0, 0, 0, 0, // PC_VAND
SCIU, 0, 0, 0, 0, 0, // PC_VANDC
SCIU, 0, 0, 0, 0, 0, // PC_VAVGSB
SCIU, 0, 0, 0, 0, 0, // PC_VAVGSH
SCIU, 0, 0, 0, 0, 0, // PC_VAVGSW
SCIU, 0, 0, 0, 0, 0, // PC_VAVGUB
SCIU, 0, 0, 0, 0, 0, // PC_VAVGUH
SCIU, 0, 0, 0, 0, 0, // PC_VAVGUW
SCIU, 0, 0, 0, 0, 0, // PC_VCFSX
SCIU, 0, 0, 0, 0, 0, // PC_VCFUX
SCIU, 0, 0, 0, 0, 0, // PC_VCMPBFP
SCIU, 0, 0, 0, 0, 0, // PC_VCMPEQFP
SCIU, 0, 0, 0, 0, 0, // PC_VCMPEQUB
SCIU, 0, 0, 0, 0, 0, // PC_VCMPEQUH
SCIU, 0, 0, 0, 0, 0, // PC_VCMPEQUW
SCIU, 0, 0, 0, 0, 0, // PC_VCMPGEFP
SCIU, 0, 0, 0, 0, 0, // PC_VCMPGTFP
SCIU, 0, 0, 0, 0, 0, // PC_VCMPGTSB
SCIU, 0, 0, 0, 0, 0, // PC_VCMPGTSH
SCIU, 0, 0, 0, 0, 0, // PC_VCMPGTSW
SCIU, 0, 0, 0, 0, 0, // PC_VCMPGTUB
SCIU, 0, 0, 0, 0, 0, // PC_VCMPGTUH
SCIU, 0, 0, 0, 0, 0, // PC_VCMPGTUW
SCIU, 0, 0, 0, 0, 0, // PC_VCTSXS
SCIU, 0, 0, 0, 0, 0, // PC_VCTUXS
SCIU, 0, 0, 0, 0, 0, // PC_VEXPTEFP
SCIU, 0, 0, 0, 0, 0, // PC_VLOGEFP
SCIU, 0, 0, 0, 0, 0, // PC_VMAXFP
SCIU, 0, 0, 0, 0, 0, // PC_VMAXSB
SCIU, 0, 0, 0, 0, 0, // PC_VMAXSH
SCIU, 0, 0, 0, 0, 0, // PC_VMAXSW
SCIU, 0, 0, 0, 0, 0, // PC_VMAXUB
SCIU, 0, 0, 0, 0, 0, // PC_VMAXUH
SCIU, 0, 0, 0, 0, 0, // PC_VMAXUW
SCIU, 0, 0, 0, 0, 0, // PC_VMINFP
SCIU, 0, 0, 0, 0, 0, // PC_VMINSB
SCIU, 0, 0, 0, 0, 0, // PC_VMINSH
SCIU, 0, 0, 0, 0, 0, // PC_VMINSW
SCIU, 0, 0, 0, 0, 0, // PC_VMINUB
SCIU, 0, 0, 0, 0, 0, // PC_VMINUH
SCIU, 0, 0, 0, 0, 0, // PC_VMINUW
SCIU, 0, 0, 0, 0, 0, // PC_VMRGHB
SCIU, 0, 0, 0, 0, 0, // PC_VMRGHH
SCIU, 0, 0, 0, 0, 0, // PC_VMRGHW
SCIU, 0, 0, 0, 0, 0, // PC_VMRGLB
SCIU, 0, 0, 0, 0, 0, // PC_VMRGLH
SCIU, 0, 0, 0, 0, 0, // PC_VMRGLW
SCIU, 0, 0, 0, 0, 0, // PC_VMULESB
SCIU, 0, 0, 0, 0, 0, // PC_VMULESH
SCIU, 0, 0, 0, 0, 0, // PC_VMULEUB
SCIU, 0, 0, 0, 0, 0, // PC_VMULEUH
SCIU, 0, 0, 0, 0, 0, // PC_VMULOSB
SCIU, 0, 0, 0, 0, 0, // PC_VMULOSH
SCIU, 0, 0, 0, 0, 0, // PC_VMULOUB
SCIU, 0, 0, 0, 0, 0, // PC_VMULOUH
SCIU, 0, 0, 0, 0, 0, // PC_VNOR
SCIU, 0, 0, 0, 0, 0, // PC_VOR
SCIU, 0, 0, 0, 0, 0, // PC_VPKPX
SCIU, 0, 0, 0, 0, 0, // PC_VPKSHSS
SCIU, 0, 0, 0, 0, 0, // PC_VPKSHUS
SCIU, 0, 0, 0, 0, 0, // PC_VPKSWSS
SCIU, 0, 0, 0, 0, 0, // PC_VPKSWUS
SCIU, 0, 0, 0, 0, 0, // PC_VPKUHUM
SCIU, 0, 0, 0, 0, 0, // PC_VPKUHUS
SCIU, 0, 0, 0, 0, 0, // PC_VPKUWUM
SCIU, 0, 0, 0, 0, 0, // PC_VPKUWUS
SCIU, 0, 0, 0, 0, 0, // PC_VREFP
SCIU, 0, 0, 0, 0, 0, // PC_VRFIM
SCIU, 0, 0, 0, 0, 0, // PC_VRFIN
SCIU, 0, 0, 0, 0, 0, // PC_VRFIP
SCIU, 0, 0, 0, 0, 0, // PC_VRFIZ
SCIU, 0, 0, 0, 0, 0, // PC_VRLB
SCIU, 0, 0, 0, 0, 0, // PC_VRLH
SCIU, 0, 0, 0, 0, 0, // PC_VRLW
SCIU, 0, 0, 0, 0, 0, // PC_VRSQRTEFP
SCIU, 0, 0, 0, 0, 0, // PC_VSL
SCIU, 0, 0, 0, 0, 0, // PC_VSLB
SCIU, 0, 0, 0, 0, 0, // PC_VSLH
SCIU, 0, 0, 0, 0, 0, // PC_VSLO
SCIU, 0, 0, 0, 0, 0, // PC_VSLW
SCIU, 0, 0, 0, 0, 0, // PC_VSPLTB
SCIU, 0, 0, 0, 0, 0, // PC_VSPLTH
SCIU, 0, 0, 0, 0, 0, // PC_VSPLTW
SCIU, 0, 0, 0, 0, 0, // PC_VSPLTISB
SCIU, 0, 0, 0, 0, 0, // PC_VSPLTISH
SCIU, 0, 0, 0, 0, 0, // PC_VSPLTISW
SCIU, 0, 0, 0, 0, 0, // PC_VSR
SCIU, 0, 0, 0, 0, 0, // PC_VSRAB
SCIU, 0, 0, 0, 0, 0, // PC_VSRAH
SCIU, 0, 0, 0, 0, 0, // PC_VSRAW
SCIU, 0, 0, 0, 0, 0, // PC_VSRB
SCIU, 0, 0, 0, 0, 0, // PC_VSRH
SCIU, 0, 0, 0, 0, 0, // PC_VSRO
SCIU, 0, 0, 0, 0, 0, // PC_VSRW
SCIU, 0, 0, 0, 0, 0, // PC_VSUBCUW
SCIU, 0, 0, 0, 0, 0, // PC_VSUBFP
SCIU, 0, 0, 0, 0, 0, // PC_VSUBSBS
SCIU, 0, 0, 0, 0, 0, // PC_VSUBSHS
SCIU, 0, 0, 0, 0, 0, // PC_VSUBSWS
SCIU, 0, 0, 0, 0, 0, // PC_VSUBUBM
SCIU, 0, 0, 0, 0, 0, // PC_VSUBUBS
SCIU, 0, 0, 0, 0, 0, // PC_VSUBUHM
SCIU, 0, 0, 0, 0, 0, // PC_VSUBUHS
SCIU, 0, 0, 0, 0, 0, // PC_VSUBUWM
SCIU, 0, 0, 0, 0, 0, // PC_VSUBUWS
SCIU, 0, 0, 0, 0, 0, // PC_VSUMSWS
SCIU, 0, 0, 0, 0, 0, // PC_VSUM2SWS
SCIU, 0, 0, 0, 0, 0, // PC_VSUM4SBS
SCIU, 0, 0, 0, 0, 0, // PC_VSUM4SHS
SCIU, 0, 0, 0, 0, 0, // PC_VSUM4UBS
SCIU, 0, 0, 0, 0, 0, // PC_VUPKHPX
SCIU, 0, 0, 0, 0, 0, // PC_VUPKHSB
SCIU, 0, 0, 0, 0, 0, // PC_VUPKHSH
SCIU, 0, 0, 0, 0, 0, // PC_VUPKLPX
SCIU, 0, 0, 0, 0, 0, // PC_VUPKLSB
SCIU, 0, 0, 0, 0, 0, // PC_VUPKLSH
SCIU, 0, 0, 0, 0, 0, // PC_VXOR
SCIU, 0, 0, 0, 0, 0, // PC_VMADDFP
SCIU, 0, 0, 0, 0, 0, // PC_VMHADDSHS
SCIU, 0, 0, 0, 0, 0, // PC_VMHRADDSHS
SCIU, 0, 0, 0, 0, 0, // PC_VMLADDUHM
SCIU, 0, 0, 0, 0, 0, // PC_VMSUMMBM
SCIU, 0, 0, 0, 0, 0, // PC_VMSUMSHM
SCIU, 0, 0, 0, 0, 0, // PC_VMSUMSHS
SCIU, 0, 0, 0, 0, 0, // PC_VMSUMUBM
SCIU, 0, 0, 0, 0, 0, // PC_VMSUMUHM
SCIU, 0, 0, 0, 0, 0, // PC_VMSUMUHS
SCIU, 0, 0, 0, 0, 0, // PC_VNMSUBFP
SCIU, 0, 0, 0, 0, 0, // PC_VPERM
SCIU, 0, 0, 0, 0, 0, // PC_VSEL
SCIU, 0, 0, 0, 0, 0, // PC_VSLDOI
SCIU, 0, 0, 0, 0, 0, // PC_VMR
SCIU, 0, 0, 0, 0, 0, // PC_VMRP
SCIU, 0, 0, 0, 0, 0, // PC_SLE
SCIU, 0, 0, 0, 0, 0, // PC_SLEQ
SCIU, 0, 0, 0, 0, 0, // PC_SLIQ
SCIU, 0, 0, 0, 0, 0, // PC_SLLIQ
SCIU, 0, 0, 0, 0, 0, // PC_SLLQ
SCIU, 0, 0, 0, 0, 0, // PC_SLQ
SCIU, 0, 0, 0, 0, 0, // PC_SRAIQ
SCIU, 0, 0, 0, 0, 0, // PC_SRAQ
SCIU, 0, 0, 0, 0, 0, // PC_SRE
SCIU, 0, 0, 0, 0, 0, // PC_SREA
SCIU, 0, 0, 0, 0, 0, // PC_SREQ
SCIU, 0, 0, 0, 0, 0, // PC_SRIQ
SCIU, 0, 0, 0, 0, 0, // PC_SRLIQ
SCIU, 0, 0, 0, 0, 0, // PC_SRLQ
SCIU, 0, 0, 0, 0, 0, // PC_SRQ
SCIU, 0, 0, 0, 0, 0, // PC_MASKG
SCIU, 0, 0, 0, 0, 0, // PC_MASKIR
SCIU, 0, 0, 0, 0, 0, // PC_LSCBX
SCIU, 0, 0, 0, 0, 0, // PC_DIV
SCIU, 0, 0, 0, 0, 0, // PC_DIVS
SCIU, 0, 0, 0, 0, 0, // PC_DOZ
SCIU, 0, 0, 0, 0, 0, // PC_MUL
SCIU, 0, 0, 0, 0, 0, // PC_NABS
SCIU, 0, 0, 0, 0, 0, // PC_ABS
SCIU, 0, 0, 0, 0, 0, // PC_CLCS
SCIU, 0, 0, 0, 0, 0, // PC_DOZI
SCIU, 0, 0, 0, 0, 0, // PC_RLMI
SCIU, 0, 0, 0, 0, 0, // PC_RRIB
};
static void advance(int firstStage, int oldStage, int newStage) {
PCode *instr = pipeline[oldStage].instr;
int cycles = instruction_timing[instr->op].cycles[newStage - firstStage];
pipeline[newStage].instr = instr;
pipeline[newStage].remaining = cycles;
pipeline[oldStage].instr = NULL;
}
static void assign_completion_buffer(PCode *instr) {
completionbuffers.used++;
completionbuffers.free--;
completionbuffers.entries[completionbuffers.nextFreeSlot].instr = instr;
completionbuffers.entries[completionbuffers.nextFreeSlot].completed = 0;
completionbuffers.nextFreeSlot = (completionbuffers.nextFreeSlot + 1) % MaxEntries;
}
static void complete_instruction(int stage) {
PCode *instr = pipeline[stage].instr;
int buf = 0;
while (buf < MaxEntries && completionbuffers.entries[buf].instr != instr)
buf++;
completionbuffers.entries[buf].completed = 1;
pipeline[stage].instr = NULL;
if (stage == SCIU)
sciu_completed_instruction = instr;
else if (stage == SCIU2)
sciu2_completed_instruction = instr;
}
static void retire_instruction(void) {
completionbuffers.entries[completionbuffers.nextToRetire].instr = NULL;
completionbuffers.used--;
completionbuffers.free++;
completionbuffers.nextToRetire = (completionbuffers.nextToRetire + 1) % MaxEntries;
}
static int latency(PCode *instr) {
int cycles = instruction_timing[instr->op].latency;
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
cycles += 2;
if (instr->op == PC_LMW || instr->op == PC_STMW)
cycles += instr->argCount - 2;
return cycles;
}
static void initialize(void) {
int stage;
int i;
for (stage = 0; stage < NumStages; stage++)
pipeline[stage].instr = NULL;
completionbuffers.free = MaxEntries;
completionbuffers.used = 0;
completionbuffers.nextToRetire = 0;
completionbuffers.nextFreeSlot = 0;
for (i = 0; i < MaxEntries; i++)
completionbuffers.entries[i].instr = NULL;
sciu_completed_instruction = NULL;
sciu2_completed_instruction = NULL;
}
static int can_issue(PCode *instr) {
PCode *check;
int stage = instruction_timing[instr->op].stage;
if (completionbuffers.free == 0)
return 0;
if (stage == SCIU) {
int isClear1 = !pipeline[SCIU].instr;
int isClear2 = !pipeline[SCIU2].instr;
if (!isClear1 && !isClear2)
return 0;
if (isClear1 && isClear2)
return 1;
if (isClear1)
check = pipeline[SCIU2].instr;
else
check = pipeline[SCIU].instr;
if (is_dependent(instr, check, RegClass_GPR))
return 0;
if (is_dependent(instr, sciu_completed_instruction, RegClass_GPR))
return 0;
if (is_dependent(instr, sciu2_completed_instruction, RegClass_GPR))
return 0;
} else {
if (pipeline[stage].instr)
return 0;
}
return 1;
}
static void issue(PCode *instr) {
int stage = instruction_timing[instr->op].stage;
int cycles = instruction_timing[instr->op].cycles[0];
if (stage == SCIU && pipeline[SCIU].instr)
stage = SCIU2;
assign_completion_buffer(instr);
pipeline[stage].instr = instr;
pipeline[stage].remaining = cycles;
}
static void advance_clock(void) {
int stage;
int i;
sciu_completed_instruction = NULL;
sciu2_completed_instruction = NULL;
for (stage = 0; stage < NumStages; stage++) {
if (pipeline[stage].instr && pipeline[stage].remaining)
--pipeline[stage].remaining;
}
for (i = 0; i < 5; i++) {
if (completionbuffers.used == 0)
break;
if (completionbuffers.entries[completionbuffers.nextToRetire].completed == 0)
break;
retire_instruction();
}
if (pipeline[SCIU].instr && pipeline[SCIU].remaining == 0)
complete_instruction(SCIU);
if (pipeline[SCIU2].instr && pipeline[SCIU2].remaining == 0)
complete_instruction(SCIU2);
if (pipeline[MCIU].instr && pipeline[MCIU].remaining == 0)
complete_instruction(MCIU);
if (pipeline[LSU2].instr && pipeline[LSU2].remaining == 0)
complete_instruction(LSU2);
if (pipeline[FPU3].instr && pipeline[FPU3].remaining == 0)
complete_instruction(FPU3);
if (pipeline[BPU].instr && pipeline[BPU].remaining == 0)
complete_instruction(BPU);
if (
pipeline[FPU1].instr &&
pipeline[FPU1].remaining == 0 &&
(pipeline[FPU1].instr->op == PC_FDIV || pipeline[FPU1].instr->op == PC_FDIVS)
)
complete_instruction(FPU1);
if (pipeline[FPU2].instr && pipeline[FPU2].remaining == 0 && !pipeline[FPU3].instr)
advance(FPU1, FPU2, FPU3);
if (pipeline[FPU1].instr && pipeline[FPU1].remaining == 0 && !pipeline[FPU2].instr)
advance(FPU1, FPU1, FPU2);
if (pipeline[LSU1].instr && pipeline[LSU1].remaining == 0 && !pipeline[LSU2].instr)
advance(LSU1, LSU1, LSU2);
}
static int serializes(PCode *instr) {
return instruction_timing[instr->op].serializes;
}
MachineInfo machine604 = {
4,
1,
0,
&latency,
&initialize,
&can_issue,
&issue,
&advance_clock,
&serializes,
&default_uses_vpermute_unit
};

View File

@@ -0,0 +1,744 @@
#include "compiler/Scheduler.h"
#include "compiler/PCode.h"
#include "compiler/PCodeInfo.h"
// https://www.nxp.com/docs/en/reference-manual/MPC7410UM.pdf
typedef enum Stage {
BPU, // Branch Prediction Unit
IU1, // Integer Unit 1
IU2, // Integer Unit 2
LSU1, // Load/Store Unit
LSU2,
FPU1, // Floating Point Unit
FPU2,
FPU3,
SRU, // System Register Unit
VSIU, // Vector Simple Integer Unit
VPU, // AltiVec Permute Unit
VCIU1, // Vector Complex Integer Unit
VCIU2,
VCIU3,
VFPU1, // Vector Floating-Point Unit
VFPU2,
VFPU3,
VFPU4,
NumStages
} Stage;
static struct {
// the instruction currently in this pipeline stage
PCode *instr;
// how many cycles are left for this instruction to finish
int remaining;
} pipeline[NumStages];
static PCode *iu1_completed_instruction;
static PCode *iu2_completed_instruction;
enum {
MaxEntries = 8
};
static struct {
// how many entries remain unused in the queue
unsigned int free;
// how many entries are currently used in the queue
unsigned int used;
// the index of the next instruction that will be retired
unsigned int nextToRetire;
// the index of the next free slot that will be used when an instruction is dispatched
unsigned int nextFreeSlot;
// circular array of entries in the completion queue
struct {
PCode *instr;
int completed;
} entries[MaxEntries];
} completionbuffers;
static struct {
// the initial stage for this instruction
Stage stage;
// the total amount of cycles required by this instruction
char latency;
// how long it takes to finish each stage
char cycles[4];
// does this instruction serialise?
char serializes;
char unused;
} instruction_timing[OPCODE_MAX] = {
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_B
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BL
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BC
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCLR
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCCTR
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BT
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BTLR
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BTCTR
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BF
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BFLR
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BFCTR
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDNZ
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDNZT
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDNZF
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDZ
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDZT
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDZF
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BLR
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCTR
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCTRL
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BLRL
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LBZ
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LBZU
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LBZX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LBZUX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LHZ
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LHZU
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LHZX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LHZUX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LHA
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LHAU
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LHAX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LHAUX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LHBRX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LWZ
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LWZU
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LWZX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LWZUX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LWBRX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LMW
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STB
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STBU
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STBX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STBUX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STH
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STHU
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STHX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STHUX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STHBRX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STW
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STWU
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STWX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STWUX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STWBRX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STMW
LSU1, 3, 1, 2, 0, 0, 0, 0, // PC_DCBF
LSU1, 3, 1, 2, 0, 0, 0, 0, // PC_DCBST
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_DCBT
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_DCBTST
LSU1, 3, 1, 2, 0, 0, 0, 0, // PC_DCBZ
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ADD
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ADDC
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ADDE
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ADDI
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ADDIC
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ADDICR
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ADDIS
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ADDME
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ADDZE
IU1, 19, 19, 0, 0, 0, 0, 0, // PC_DIVW
IU1, 19, 19, 0, 0, 0, 0, 0, // PC_DIVWU
IU1, 5, 5, 0, 0, 0, 0, 0, // PC_MULHW
IU1, 6, 5, 0, 0, 0, 0, 0, // PC_MULHWU
IU1, 3, 3, 0, 0, 0, 0, 0, // PC_MULLI
IU1, 5, 5, 0, 0, 0, 0, 0, // PC_MULLW
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_NEG
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SUBF
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFC
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFE
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFIC
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFME
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFZE
IU2, 3, 1, 0, 0, 0, 0, 0, // PC_CMPI
IU2, 3, 1, 0, 0, 0, 0, 0, // PC_CMP
IU2, 3, 1, 0, 0, 0, 0, 0, // PC_CMPLI
IU2, 3, 1, 0, 0, 0, 0, 0, // PC_CMPL
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ANDI
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ANDIS
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ORI
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ORIS
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_XORI
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_XORIS
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_AND
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_OR
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_XOR
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_NAND
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_NOR
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_EQV
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ANDC
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ORC
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_EXTSB
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_EXTSH
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_CNTLZW
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_RLWINM
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_RLWNM
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_RLWIMI
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SLW
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SRW
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SRAWI
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_SRAW
SRU, 1, 1, 0, 0, 0, 1, 0, // PC_CRAND
SRU, 1, 1, 0, 0, 0, 1, 0, // PC_CRANDC
SRU, 1, 1, 0, 0, 0, 1, 0, // PC_CREQV
SRU, 1, 1, 0, 0, 0, 1, 0, // PC_CRNAND
SRU, 1, 1, 0, 0, 0, 1, 0, // PC_CRNOR
SRU, 1, 1, 0, 0, 0, 1, 0, // PC_CROR
SRU, 1, 1, 0, 0, 0, 1, 0, // PC_CRORC
SRU, 1, 1, 0, 0, 0, 1, 0, // PC_CRXOR
SRU, 1, 1, 0, 0, 0, 1, 0, // PC_MCRF
SRU, 2, 2, 0, 0, 0, 1, 0, // PC_MTXER
SRU, 2, 2, 0, 0, 0, 1, 0, // PC_MTCTR
SRU, 2, 2, 0, 0, 0, 1, 0, // PC_MTLR
SRU, 1, 1, 0, 0, 0, 1, 0, // PC_MTCRF
SRU, 1, 1, 0, 0, 0, 1, 0, // PC_MTMSR
SRU, 2, 2, 0, 0, 0, 1, 0, // PC_MTSPR
SRU, 1, 1, 0, 0, 0, 0, 0, // PC_MFMSR
SRU, 3, 3, 0, 0, 0, 1, 0, // PC_MFSPR
SRU, 1, 1, 0, 0, 0, 1, 0, // PC_MFXER
SRU, 1, 1, 0, 0, 0, 1, 0, // PC_MFCTR
SRU, 1, 1, 0, 0, 0, 1, 0, // PC_MFLR
SRU, 1, 1, 0, 0, 0, 1, 0, // PC_MFCR
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_MFFS
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_MTFSF
SRU, 1, 1, 0, 0, 0, 1, 0, // PC_EIEIO
SRU, 2, 2, 0, 0, 0, 1, 0, // PC_ISYNC
SRU, 3, 3, 0, 0, 0, 1, 0, // PC_SYNC
SRU, 2, 2, 0, 0, 0, 1, 0, // PC_RFI
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_LI
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_LIS
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_MR
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_NOP
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_NOT
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LFS
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LFSU
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LFSX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LFSUX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LFD
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LFDU
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LFDX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LFDUX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STFS
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STFSU
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STFSX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STFSUX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STFD
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STFDU
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STFDX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STFDUX
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FMR
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FABS
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FNEG
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FNABS
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FADD
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FADDS
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FSUB
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FSUBS
FPU1, 4, 2, 1, 1, 0, 0, 0, // PC_FMUL
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FMULS
FPU1, 31, 31, 0, 0, 0, 0, 0, // PC_FDIV
FPU1, 17, 17, 0, 0, 0, 0, 0, // PC_FDIVS
FPU1, 4, 2, 1, 1, 0, 0, 0, // PC_FMADD
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FMADDS
FPU1, 4, 2, 1, 1, 0, 0, 0, // PC_FMSUB
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FMSUBS
FPU1, 4, 2, 1, 1, 0, 0, 0, // PC_FNMADD
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FNMADDS
FPU1, 4, 2, 1, 1, 0, 0, 0, // PC_FNMSUB
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FNMSUBS
FPU1, 10, 10, 0, 0, 0, 0, 0, // PC_FRES
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FRSQRTE
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FSEL
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FRSP
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FCTIW
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FCTIWZ
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FCMPU
FPU1, 3, 1, 1, 1, 0, 0, 0, // PC_FCMPO
LSU1, 2, 1, 1, 0, 0, 1, 0, // PC_LWARX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LSWI
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LSWX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STFIWX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STSWI
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STSWX
LSU1, 2, 1, 1, 0, 0, 1, 0, // PC_STWCX
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ECIWX
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ECOWX
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_DCBI
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ICBI
SRU, 1, 1, 0, 0, 0, 1, 0, // PC_MCRFS
SRU, 1, 1, 0, 0, 0, 1, 0, // PC_MCRXR
SRU, 1, 1, 0, 0, 0, 0, 0, // PC_MFTB
SRU, 3, 3, 0, 0, 0, 0, 0, // PC_MFSR
SRU, 2, 2, 0, 0, 0, 1, 0, // PC_MTSR
SRU, 3, 3, 0, 0, 0, 0, 0, // PC_MFSRIN
SRU, 2, 2, 0, 0, 0, 1, 0, // PC_MTSRIN
FPU1, 1, 1, 0, 0, 0, 0, 0, // PC_MTFSB0
FPU1, 1, 1, 0, 0, 0, 0, 0, // PC_MTFSB1
FPU1, 1, 1, 0, 0, 0, 0, 0, // PC_MTFSFI
SRU, 2, 2, 0, 0, 0, 1, 0, // PC_SC
FPU1, 1, 1, 0, 0, 0, 0, 0, // PC_FSQRT
FPU1, 1, 1, 0, 0, 0, 0, 0, // PC_FSQRTS
LSU1, 1, 1, 0, 0, 0, 0, 0, // PC_TLBIA
LSU1, 1, 1, 0, 0, 0, 0, 0, // PC_TLBIE
LSU1, 1, 1, 0, 0, 0, 0, 0, // PC_TLBLD
LSU1, 1, 1, 0, 0, 0, 0, 0, // PC_TLBLI
LSU1, 1, 1, 0, 0, 0, 1, 0, // PC_TLBSYNC
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_TW
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_TRAP
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_TWI
IU2, 1, 1, 0, 0, 0, 1, 0, // PC_OPWORD
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_MFROM
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_DSA
IU2, 1, 1, 0, 0, 0, 0, 0, // PC_ESA
IU2, 1, 0, 0, 0, 0, 0, 0, // PC_DCCCI
IU2, 1, 0, 0, 0, 0, 0, 0, // PC_DCREAD
IU2, 1, 0, 0, 0, 0, 0, 0, // PC_ICBT
IU2, 1, 0, 0, 0, 0, 0, 0, // PC_ICCCI
IU2, 1, 0, 0, 0, 0, 0, 0, // PC_ICREAD
IU2, 1, 0, 0, 0, 0, 0, 0, // PC_RFCI
IU2, 1, 0, 0, 0, 0, 0, 0, // PC_TLBRE
IU2, 1, 0, 0, 0, 0, 0, 0, // PC_TLBSX
IU2, 1, 0, 0, 0, 0, 0, 0, // PC_TLBWE
IU2, 1, 0, 0, 0, 0, 0, 0, // PC_WRTEE
IU2, 1, 0, 0, 0, 0, 0, 0, // PC_WRTEEI
IU2, 1, 0, 0, 0, 0, 0, 0, // PC_MFDCR
IU2, 1, 0, 0, 0, 0, 0, 0, // PC_MTDCR
LSU1, 3, 1, 2, 0, 0, 0, 0, // PC_DCBA
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_DSS
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_DSSALL
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_DST
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_DSTT
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_DSTST
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_DSTSTT
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LVEBX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LVEHX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LVEWX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LVSL
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LVSR
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LVX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_LVXL
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STVEBX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STVEHX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STVEWX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STVX
LSU1, 2, 1, 1, 0, 0, 0, 0, // PC_STVXL
VSIU, 1, 1, 0, 0, 0, 1, 0, // PC_MFVSCR
VSIU, 1, 1, 0, 0, 0, 1, 0, // PC_MTVSCR
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDCUW
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VADDFP
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDSBS
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDSHS
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDSWS
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUBM
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUBS
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUHM
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUHS
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUWM
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUWS
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VAND
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VANDC
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGSB
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGSH
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGSW
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGUB
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGUH
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGUW
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VCFSX
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VCFUX
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VCMPBFP
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VCMPEQFP
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPEQUB
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPEQUH
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPEQUW
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VCMPGEFP
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VCMPGTFP
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTSB
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTSH
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTSW
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTUB
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTUH
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTUW
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VCTSXS
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VCTUXS
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VEXPTEFP
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VLOGEFP
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VMAXFP
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXSB
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXSH
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXSW
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXUB
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXUH
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXUW
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VMINFP
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMINSB
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMINSH
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMINSW
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMINUB
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMINUH
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMINUW
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VMRGHB
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VMRGHH
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VMRGHW
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VMRGLB
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VMRGLH
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VMRGLW
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMULESB
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMULESH
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMULEUB
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMULEUH
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMULOSB
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMULOSH
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMULOUB
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMULOUH
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VNOR
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VOR
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPKPX
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPKSHSS
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPKSHUS
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPKSWSS
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPKSWUS
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPKUHUM
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPKUHUS
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPKUWUM
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPKUWUS
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VREFP
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIM
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIN
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIP
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIZ
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VRLB
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VRLH
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VRLW
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VRSQRTEFP
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSL
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSLB
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSLH
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VSLO
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSLW
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VSPLTB
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VSPLTH
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VSPLTW
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VSPLTISB
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VSPLTISH
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VSPLTISW
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSR
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSRAB
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSRAH
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSRAW
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSRB
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSRH
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VSRO
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSRW
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBCUW
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUBFP
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBSBS
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBSHS
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBSWS
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUBM
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUBS
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUHM
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUHS
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUWM
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUWS
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VSUMSWS
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VSUM2SWS
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VSUM4SBS
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VSUM4SHS
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VSUM4UBS
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VUPKHPX
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VUPKHSB
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VUPKHSH
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VUPKLPX
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VUPKLSB
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VUPKLSH
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VXOR
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VMADDFP
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMHADDSHS
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMHRADDSHS
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMLADDUHM
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMSUMMBM
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMSUMSHM
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMSUMSHS
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMSUMUBM
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMSUMUHM
VCIU1, 3, 1, 1, 1, 0, 0, 0, // PC_VMSUMUHS
VFPU1, 4, 1, 1, 1, 1, 0, 0, // PC_VNMSUBFP
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VPERM
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VSEL
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VSLDOI
VSIU, 1, 1, 0, 0, 0, 0, 0, // PC_VMR
VPU, 1, 1, 0, 0, 0, 0, 0, // PC_VMRP
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SLE
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SLEQ
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SLIQ
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SLLIQ
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SLLQ
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SLQ
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SRAIQ
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SRAQ
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SRE
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SREA
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SREQ
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SRIQ
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SRLIQ
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SRLQ
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_SRQ
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_MASKG
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_MASKIR
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_LSCBX
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_DIV
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_DIVS
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_DOZ
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_MUL
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_NABS
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_ABS
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_CLCS
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_DOZI
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_RLMI
BPU, 0, 0, 0, 0, 0, 0, 0, // PC_RRIB
};
static void advance(int firstStage, int oldStage, int newStage) {
PCode *instr = pipeline[oldStage].instr;
int cycles = instruction_timing[instr->op].cycles[newStage - firstStage];
pipeline[newStage].instr = instr;
pipeline[newStage].remaining = cycles;
pipeline[oldStage].instr = NULL;
}
static void assign_completion_buffer(PCode *instr) {
completionbuffers.used++;
completionbuffers.free--;
completionbuffers.entries[completionbuffers.nextFreeSlot].instr = instr;
completionbuffers.entries[completionbuffers.nextFreeSlot].completed = 0;
completionbuffers.nextFreeSlot = (completionbuffers.nextFreeSlot + 1) % MaxEntries;
}
static void complete_instruction(int stage) {
PCode *instr = pipeline[stage].instr;
int buf = 0;
while (buf < MaxEntries && completionbuffers.entries[buf].instr != instr)
buf++;
completionbuffers.entries[buf].completed = 1;
pipeline[stage].instr = NULL;
if (stage == IU1)
iu1_completed_instruction = instr;
else if (stage == IU2)
iu2_completed_instruction = instr;
}
static void retire_instruction(void) {
completionbuffers.entries[completionbuffers.nextToRetire].instr = NULL;
completionbuffers.used--;
completionbuffers.free++;
completionbuffers.nextToRetire = (completionbuffers.nextToRetire + 1) % MaxEntries;
}
static int latency(PCode *instr) {
int cycles = instruction_timing[instr->op].latency;
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
cycles += 2;
if (instr->op == PC_LMW || instr->op == PC_STMW)
cycles += instr->argCount - 2;
return cycles;
}
static void initialize(void) {
int stage;
int i;
for (stage = 0; stage < NumStages; stage++)
pipeline[stage].instr = NULL;
completionbuffers.free = MaxEntries;
completionbuffers.used = 0;
completionbuffers.nextToRetire = 0;
completionbuffers.nextFreeSlot = 0;
for (i = 0; i < MaxEntries; i++)
completionbuffers.entries[i].instr = NULL;
iu1_completed_instruction = NULL;
iu2_completed_instruction = NULL;
}
static int can_issue(PCode *instr) {
int stage;
if (completionbuffers.free == 0)
return 0;
stage = instruction_timing[instr->op].stage;
if (stage == IU2) {
PCode *check;
int isClear1 = !pipeline[IU1].instr;
int isClear2 = !pipeline[IU2].instr;
if (!isClear1 && !isClear2)
return 0;
if (isClear1 && isClear2)
return 1;
if (isClear1)
check = pipeline[IU2].instr;
else
check = pipeline[IU1].instr;
if (is_dependent(instr, check, RegClass_GPR))
return 0;
if (is_dependent(instr, iu1_completed_instruction, RegClass_GPR))
return 0;
if (is_dependent(instr, iu2_completed_instruction, RegClass_GPR))
return 0;
} else if (stage == VFPU1 || stage == VCIU1 || stage == VSIU || stage == VPU) {
PCode *check;
int isVpuClear = !pipeline[VPU].instr;
int isVFpuClear = !pipeline[VFPU1].instr;
int isVCiuClear = !pipeline[VCIU1].instr;
int isVSiuClear = !pipeline[VSIU].instr;
if (stage == VPU) {
if (!isVpuClear)
return 0;
if (!isVFpuClear)
check = pipeline[VFPU1].instr;
else if (!isVCiuClear)
check = pipeline[VCIU1].instr;
else if (!isVSiuClear)
check = pipeline[VSIU].instr;
else
check = NULL;
if (is_dependent(instr, check, RegClass_VR))
return 0;
} else {
if (!isVFpuClear || !isVCiuClear || !isVSiuClear)
return 0;
if (!isVpuClear && is_dependent(instr, pipeline[VPU].instr, RegClass_VR))
return 0;
}
} else {
if (pipeline[stage].instr)
return 0;
}
if ((instr->flags & fIsWrite) && pipeline[LSU2].instr && (pipeline[LSU2].instr->flags & fIsWrite))
return 0;
return 1;
}
static void issue(PCode *instr) {
int stage = instruction_timing[instr->op].stage;
int cycles = instruction_timing[instr->op].cycles[0];
assign_completion_buffer(instr);
if (stage == IU2 && !pipeline[IU1].instr)
stage = IU1;
pipeline[stage].instr = instr;
pipeline[stage].remaining = cycles;
}
static void advance_clock(void) {
int stage;
iu1_completed_instruction = NULL;
iu2_completed_instruction = NULL;
for (stage = 0; stage < NumStages; stage++) {
if (pipeline[stage].instr && pipeline[stage].remaining)
--pipeline[stage].remaining;
}
if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) {
retire_instruction();
if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) {
retire_instruction();
}
}
if (pipeline[IU1].instr && pipeline[IU1].remaining == 0)
complete_instruction(IU1);
if (pipeline[VPU].instr && pipeline[VPU].remaining == 0)
complete_instruction(VPU);
if (pipeline[LSU2].instr && pipeline[LSU2].remaining == 0)
complete_instruction(LSU2);
if (pipeline[FPU3].instr && pipeline[FPU3].remaining == 0)
complete_instruction(FPU3);
if (pipeline[SRU].instr && pipeline[SRU].remaining == 0)
complete_instruction(SRU);
if (pipeline[BPU].instr && pipeline[BPU].remaining == 0)
complete_instruction(BPU);
if (pipeline[VSIU].instr && pipeline[VSIU].remaining == 0)
complete_instruction(VSIU);
if (pipeline[VCIU3].instr && pipeline[VCIU3].remaining == 0)
complete_instruction(VCIU3);
if (pipeline[VFPU4].instr && pipeline[VFPU4].remaining == 0)
complete_instruction(VFPU4);
if (pipeline[IU2].instr && pipeline[IU2].remaining == 0)
complete_instruction(IU2);
if (
pipeline[FPU1].instr &&
pipeline[FPU1].remaining == 0 &&
(pipeline[FPU1].instr->op == PC_FDIV || pipeline[FPU1].instr->op == PC_FDIVS)
)
complete_instruction(FPU1);
if (pipeline[FPU2].instr && pipeline[FPU2].remaining == 0 && !pipeline[FPU3].instr)
advance(FPU1, FPU2, FPU3);
if (pipeline[FPU1].instr && pipeline[FPU1].remaining == 0 && !pipeline[FPU2].instr)
advance(FPU1, FPU1, FPU2);
if (pipeline[LSU1].instr && pipeline[LSU1].remaining == 0 && !pipeline[LSU2].instr)
advance(LSU1, LSU1, LSU2);
if (pipeline[VCIU2].instr && pipeline[VCIU2].remaining == 0 && !pipeline[VCIU3].instr)
advance(VCIU1, VCIU2, VCIU3);
if (pipeline[VCIU1].instr && pipeline[VCIU1].remaining == 0 && !pipeline[VCIU2].instr)
advance(VCIU1, VCIU1, VCIU2);
if (pipeline[VFPU3].instr && pipeline[VFPU3].remaining == 0 && !pipeline[VFPU4].instr)
advance(VFPU1, VFPU3, VFPU4);
if (pipeline[VFPU2].instr && pipeline[VFPU2].remaining == 0 && !pipeline[VFPU3].instr)
advance(VFPU1, VFPU2, VFPU3);
if (pipeline[VFPU1].instr && pipeline[VFPU1].remaining == 0 && !pipeline[VFPU2].instr)
advance(VFPU1, VFPU1, VFPU2);
}
static int serializes(PCode *instr) {
return instruction_timing[instr->op].serializes;
}
static int uses_vpermute_unit_7400(PCode *instr) {
return instruction_timing[instr->op].stage == VPU;
}
MachineInfo machine7400 = {
2,
1,
0,
&latency,
&initialize,
&can_issue,
&issue,
&advance_clock,
&serializes,
&uses_vpermute_unit_7400
};

View File

@@ -0,0 +1,678 @@
#include "compiler/Scheduler.h"
#include "compiler/PCode.h"
#include "compiler/PCodeInfo.h"
// https://www.nxp.com/docs/en/reference-manual/MPC750UM.pdf
typedef enum Stage {
BPU, // Branch Prediction Unit
IU1, // Integer Unit 1
IU2, // Integer Unit 2
LSU1, // Load/Store Unit
LSU2,
FPU1, // Floating Point Unit
FPU2,
FPU3,
SRU, // System Register Unit
NumStages
} Stage;
static struct {
// the instruction currently in this pipeline stage
PCode *instr;
// how many cycles are left for this instruction to finish
int remaining;
} pipeline[NumStages];
static PCode *iu1_completed_instruction;
static PCode *iu2_completed_instruction;
enum {
MaxEntries = 6
};
static struct {
// how many entries remain unused in the queue
unsigned int free;
// how many entries are currently used in the queue
unsigned int used;
// the index of the next instruction that will be retired
unsigned int nextToRetire;
// the index of the next free slot that will be used when an instruction is dispatched
unsigned int nextFreeSlot;
// circular array of entries in the completion queue
struct {
PCode *instr;
int completed;
} entries[MaxEntries];
} completionbuffers;
static struct {
// the initial stage for this instruction
Stage stage;
// the total amount of cycles required by this instruction
char latency;
// how long it takes to finish each stage
char cycles[3];
// does this instruction serialise?
char serializes;
} instruction_timing[OPCODE_MAX] = {
BPU, 0, 0, 0, 0, 1, // PC_B
BPU, 0, 0, 0, 0, 1, // PC_BL
BPU, 0, 0, 0, 0, 1, // PC_BC
BPU, 0, 0, 0, 0, 1, // PC_BCLR
BPU, 0, 0, 0, 0, 1, // PC_BCCTR
BPU, 0, 0, 0, 0, 1, // PC_BT
BPU, 0, 0, 0, 0, 1, // PC_BTLR
BPU, 0, 0, 0, 0, 1, // PC_BTCTR
BPU, 0, 0, 0, 0, 1, // PC_BF
BPU, 0, 0, 0, 0, 1, // PC_BFLR
BPU, 0, 0, 0, 0, 1, // PC_BFCTR
BPU, 0, 0, 0, 0, 1, // PC_BDNZ
BPU, 0, 0, 0, 0, 1, // PC_BDNZT
BPU, 0, 0, 0, 0, 1, // PC_BDNZF
BPU, 0, 0, 0, 0, 1, // PC_BDZ
BPU, 0, 0, 0, 0, 1, // PC_BDZT
BPU, 0, 0, 0, 0, 1, // PC_BDZF
BPU, 0, 0, 0, 0, 1, // PC_BLR
BPU, 0, 0, 0, 0, 1, // PC_BCTR
BPU, 0, 0, 0, 0, 1, // PC_BCTRL
BPU, 0, 0, 0, 0, 1, // PC_BLRL
LSU1, 2, 1, 1, 0, 0, // PC_LBZ
LSU1, 2, 1, 1, 0, 0, // PC_LBZU
LSU1, 2, 1, 1, 0, 0, // PC_LBZX
LSU1, 2, 1, 1, 0, 0, // PC_LBZUX
LSU1, 2, 1, 1, 0, 0, // PC_LHZ
LSU1, 2, 1, 1, 0, 0, // PC_LHZU
LSU1, 2, 1, 1, 0, 0, // PC_LHZX
LSU1, 2, 1, 1, 0, 0, // PC_LHZUX
LSU1, 2, 1, 1, 0, 0, // PC_LHA
LSU1, 2, 1, 1, 0, 0, // PC_LHAU
LSU1, 2, 1, 1, 0, 0, // PC_LHAX
LSU1, 2, 1, 1, 0, 0, // PC_LHAUX
LSU1, 2, 1, 1, 0, 0, // PC_LHBRX
LSU1, 2, 1, 1, 0, 0, // PC_LWZ
LSU1, 2, 1, 1, 0, 0, // PC_LWZU
LSU1, 2, 1, 1, 0, 0, // PC_LWZX
LSU1, 2, 1, 1, 0, 0, // PC_LWZUX
LSU1, 2, 1, 1, 0, 0, // PC_LWBRX
LSU1, 2, 1, 1, 0, 0, // PC_LMW
LSU1, 2, 1, 1, 0, 0, // PC_STB
LSU1, 2, 1, 1, 0, 0, // PC_STBU
LSU1, 2, 1, 1, 0, 0, // PC_STBX
LSU1, 2, 1, 1, 0, 0, // PC_STBUX
LSU1, 2, 1, 1, 0, 0, // PC_STH
LSU1, 2, 1, 1, 0, 0, // PC_STHU
LSU1, 2, 1, 1, 0, 0, // PC_STHX
LSU1, 2, 1, 1, 0, 0, // PC_STHUX
LSU1, 2, 1, 1, 0, 0, // PC_STHBRX
LSU1, 2, 1, 1, 0, 0, // PC_STW
LSU1, 2, 1, 1, 0, 0, // PC_STWU
LSU1, 2, 1, 1, 0, 0, // PC_STWX
LSU1, 2, 1, 1, 0, 0, // PC_STWUX
LSU1, 2, 1, 1, 0, 0, // PC_STWBRX
LSU1, 2, 1, 1, 0, 0, // PC_STMW
LSU1, 3, 1, 2, 0, 0, // PC_DCBF
LSU1, 3, 1, 2, 0, 0, // PC_DCBST
LSU1, 2, 1, 1, 0, 0, // PC_DCBT
LSU1, 2, 1, 1, 0, 0, // PC_DCBTST
LSU1, 3, 1, 2, 0, 0, // PC_DCBZ
IU2, 1, 1, 0, 0, 0, // PC_ADD
IU2, 1, 1, 0, 0, 0, // PC_ADDC
IU2, 1, 1, 0, 0, 0, // PC_ADDE
IU2, 1, 1, 0, 0, 0, // PC_ADDI
IU2, 1, 1, 0, 0, 0, // PC_ADDIC
IU2, 1, 1, 0, 0, 0, // PC_ADDICR
IU2, 1, 1, 0, 0, 0, // PC_ADDIS
IU2, 1, 1, 0, 0, 0, // PC_ADDME
IU2, 1, 1, 0, 0, 0, // PC_ADDZE
IU1, 19, 19, 0, 0, 0, // PC_DIVW
IU1, 19, 19, 0, 0, 0, // PC_DIVWU
IU1, 5, 5, 0, 0, 0, // PC_MULHW
IU1, 6, 5, 0, 0, 0, // PC_MULHWU
IU1, 3, 3, 0, 0, 0, // PC_MULLI
IU1, 5, 5, 0, 0, 0, // PC_MULLW
IU2, 1, 1, 0, 0, 0, // PC_NEG
IU2, 1, 1, 0, 0, 0, // PC_SUBF
IU2, 1, 1, 0, 0, 0, // PC_SUBFC
IU2, 1, 1, 0, 0, 0, // PC_SUBFE
IU2, 1, 1, 0, 0, 0, // PC_SUBFIC
IU2, 1, 1, 0, 0, 0, // PC_SUBFME
IU2, 1, 1, 0, 0, 0, // PC_SUBFZE
IU2, 3, 1, 0, 0, 0, // PC_CMPI
IU2, 3, 1, 0, 0, 0, // PC_CMP
IU2, 3, 1, 0, 0, 0, // PC_CMPLI
IU2, 3, 1, 0, 0, 0, // PC_CMPL
IU2, 1, 1, 0, 0, 0, // PC_ANDI
IU2, 1, 1, 0, 0, 0, // PC_ANDIS
IU2, 1, 1, 0, 0, 0, // PC_ORI
IU2, 1, 1, 0, 0, 0, // PC_ORIS
IU2, 1, 1, 0, 0, 0, // PC_XORI
IU2, 1, 1, 0, 0, 0, // PC_XORIS
IU2, 1, 1, 0, 0, 0, // PC_AND
IU2, 1, 1, 0, 0, 0, // PC_OR
IU2, 1, 1, 0, 0, 0, // PC_XOR
IU2, 1, 1, 0, 0, 0, // PC_NAND
IU2, 1, 1, 0, 0, 0, // PC_NOR
IU2, 1, 1, 0, 0, 0, // PC_EQV
IU2, 1, 1, 0, 0, 0, // PC_ANDC
IU2, 1, 1, 0, 0, 0, // PC_ORC
IU2, 1, 1, 0, 0, 0, // PC_EXTSB
IU2, 1, 1, 0, 0, 0, // PC_EXTSH
IU2, 1, 1, 0, 0, 0, // PC_CNTLZW
IU2, 1, 1, 0, 0, 0, // PC_RLWINM
IU2, 1, 1, 0, 0, 0, // PC_RLWNM
IU2, 1, 1, 0, 0, 0, // PC_RLWIMI
IU2, 1, 1, 0, 0, 0, // PC_SLW
IU2, 1, 1, 0, 0, 0, // PC_SRW
IU2, 1, 1, 0, 0, 0, // PC_SRAWI
IU2, 1, 1, 0, 0, 0, // PC_SRAW
SRU, 1, 1, 0, 0, 1, // PC_CRAND
SRU, 1, 1, 0, 0, 1, // PC_CRANDC
SRU, 1, 1, 0, 0, 1, // PC_CREQV
SRU, 1, 1, 0, 0, 1, // PC_CRNAND
SRU, 1, 1, 0, 0, 1, // PC_CRNOR
SRU, 1, 1, 0, 0, 1, // PC_CROR
SRU, 1, 1, 0, 0, 1, // PC_CRORC
SRU, 1, 1, 0, 0, 1, // PC_CRXOR
SRU, 1, 1, 0, 0, 1, // PC_MCRF
SRU, 2, 2, 0, 0, 1, // PC_MTXER
SRU, 2, 2, 0, 0, 1, // PC_MTCTR
SRU, 2, 2, 0, 0, 1, // PC_MTLR
SRU, 1, 1, 0, 0, 1, // PC_MTCRF
SRU, 1, 1, 0, 0, 0, // PC_MTMSR
SRU, 1, 1, 0, 0, 1, // PC_MTSPR
SRU, 1, 1, 0, 0, 1, // PC_MFMSR
SRU, 1, 1, 0, 0, 1, // PC_MFSPR
SRU, 1, 1, 0, 0, 1, // PC_MFXER
SRU, 1, 1, 0, 0, 1, // PC_MFCTR
SRU, 1, 1, 0, 0, 1, // PC_MFLR
SRU, 1, 1, 0, 0, 1, // PC_MFCR
FPU1, 3, 1, 1, 1, 0, // PC_MFFS
FPU1, 3, 1, 1, 1, 0, // PC_MTFSF
SRU, 1, 1, 0, 0, 1, // PC_EIEIO
SRU, 2, 2, 0, 0, 1, // PC_ISYNC
SRU, 3, 3, 0, 0, 1, // PC_SYNC
SRU, 1, 1, 0, 0, 1, // PC_RFI
IU2, 1, 1, 0, 0, 0, // PC_LI
IU2, 1, 1, 0, 0, 0, // PC_LIS
IU2, 1, 1, 0, 0, 0, // PC_MR
IU2, 1, 1, 0, 0, 0, // PC_NOP
IU2, 1, 1, 0, 0, 0, // PC_NOT
LSU1, 2, 1, 1, 0, 0, // PC_LFS
LSU1, 2, 1, 1, 0, 0, // PC_LFSU
LSU1, 2, 1, 1, 0, 0, // PC_LFSX
LSU1, 2, 1, 1, 0, 0, // PC_LFSUX
LSU1, 2, 1, 1, 0, 0, // PC_LFD
LSU1, 2, 1, 1, 0, 0, // PC_LFDU
LSU1, 2, 1, 1, 0, 0, // PC_LFDX
LSU1, 2, 1, 1, 0, 0, // PC_LFDUX
LSU1, 2, 1, 1, 0, 0, // PC_STFS
LSU1, 2, 1, 1, 0, 0, // PC_STFSU
LSU1, 2, 1, 1, 0, 0, // PC_STFSX
LSU1, 2, 1, 1, 0, 0, // PC_STFSUX
LSU1, 2, 1, 1, 0, 0, // PC_STFD
LSU1, 2, 1, 1, 0, 0, // PC_STFDU
LSU1, 2, 1, 1, 0, 0, // PC_STFDX
LSU1, 2, 1, 1, 0, 0, // PC_STFDUX
FPU1, 3, 1, 1, 1, 0, // PC_FMR
FPU1, 3, 1, 1, 1, 0, // PC_FABS
FPU1, 3, 1, 1, 1, 0, // PC_FNEG
FPU1, 3, 1, 1, 1, 0, // PC_FNABS
FPU1, 3, 1, 1, 1, 0, // PC_FADD
FPU1, 3, 1, 1, 1, 0, // PC_FADDS
FPU1, 3, 1, 1, 1, 0, // PC_FSUB
FPU1, 3, 1, 1, 1, 0, // PC_FSUBS
FPU1, 4, 2, 1, 1, 0, // PC_FMUL
FPU1, 3, 1, 1, 1, 0, // PC_FMULS
FPU1, 31, 31, 0, 0, 0, // PC_FDIV
FPU1, 17, 17, 0, 0, 0, // PC_FDIVS
FPU1, 4, 2, 1, 1, 0, // PC_FMADD
FPU1, 3, 1, 1, 1, 0, // PC_FMADDS
FPU1, 4, 2, 1, 1, 0, // PC_FMSUB
FPU1, 3, 1, 1, 1, 0, // PC_FMSUBS
FPU1, 4, 2, 1, 1, 0, // PC_FNMADD
FPU1, 3, 1, 1, 1, 0, // PC_FNMADDS
FPU1, 4, 2, 1, 1, 0, // PC_FNMSUB
FPU1, 3, 1, 1, 1, 0, // PC_FNMSUBS
FPU1, 10, 10, 0, 0, 0, // PC_FRES
FPU1, 3, 1, 1, 1, 0, // PC_FRSQRTE
FPU1, 3, 1, 1, 1, 0, // PC_FSEL
FPU1, 3, 1, 1, 1, 0, // PC_FRSP
FPU1, 3, 1, 1, 1, 0, // PC_FCTIW
FPU1, 3, 1, 1, 1, 0, // PC_FCTIWZ
FPU1, 3, 1, 1, 1, 0, // PC_FCMPU
FPU1, 3, 1, 1, 1, 0, // PC_FCMPO
LSU1, 1, 1, 0, 0, 0, // PC_LWARX
LSU1, 1, 1, 0, 0, 0, // PC_LSWI
LSU1, 1, 1, 0, 0, 0, // PC_LSWX
LSU1, 1, 1, 0, 0, 0, // PC_STFIWX
LSU1, 1, 1, 0, 0, 0, // PC_STSWI
LSU1, 1, 1, 0, 0, 0, // PC_STSWX
LSU1, 1, 1, 0, 0, 0, // PC_STWCX
IU1, 1, 1, 0, 0, 1, // PC_ECIWX
IU1, 1, 1, 0, 0, 1, // PC_ECOWX
IU1, 1, 1, 0, 0, 0, // PC_DCBI
IU1, 1, 1, 0, 0, 0, // PC_ICBI
IU1, 1, 1, 0, 0, 0, // PC_MCRFS
IU1, 1, 1, 0, 0, 0, // PC_MCRXR
IU1, 1, 1, 0, 0, 0, // PC_MFTB
IU1, 1, 1, 0, 0, 0, // PC_MFSR
IU1, 1, 1, 0, 0, 0, // PC_MTSR
IU1, 1, 1, 0, 0, 0, // PC_MFSRIN
IU1, 1, 1, 0, 0, 0, // PC_MTSRIN
IU1, 1, 1, 0, 0, 0, // PC_MTFSB0
IU1, 1, 1, 0, 0, 0, // PC_MTFSB1
IU1, 1, 1, 0, 0, 0, // PC_MTFSFI
IU1, 1, 1, 0, 0, 1, // PC_SC
FPU1, 1, 1, 0, 0, 0, // PC_FSQRT
FPU1, 1, 1, 0, 0, 0, // PC_FSQRTS
IU1, 1, 1, 0, 0, 0, // PC_TLBIA
IU1, 1, 1, 0, 0, 0, // PC_TLBIE
IU1, 1, 1, 0, 0, 0, // PC_TLBLD
IU1, 1, 1, 0, 0, 0, // PC_TLBLI
IU1, 1, 1, 0, 0, 0, // PC_TLBSYNC
IU1, 1, 1, 0, 0, 1, // PC_TW
IU1, 1, 1, 0, 0, 1, // PC_TRAP
IU1, 1, 1, 0, 0, 1, // PC_TWI
IU1, 1, 1, 0, 0, 1, // PC_OPWORD
IU1, 1, 1, 0, 0, 0, // PC_MFROM
IU1, 1, 1, 0, 0, 1, // PC_DSA
IU1, 1, 1, 0, 0, 1, // PC_ESA
IU1, 0, 0, 0, 0, 0, // PC_DCCCI
IU1, 0, 0, 0, 0, 0, // PC_DCREAD
IU1, 0, 0, 0, 0, 0, // PC_ICBT
IU1, 0, 0, 0, 0, 0, // PC_ICCCI
IU1, 0, 0, 0, 0, 0, // PC_ICREAD
IU1, 0, 0, 0, 0, 0, // PC_RFCI
IU1, 0, 0, 0, 0, 0, // PC_TLBRE
IU1, 0, 0, 0, 0, 0, // PC_TLBSX
IU1, 0, 0, 0, 0, 0, // PC_TLBWE
IU1, 0, 0, 0, 0, 0, // PC_WRTEE
IU1, 0, 0, 0, 0, 0, // PC_WRTEEI
IU1, 0, 0, 0, 0, 0, // PC_MFDCR
IU1, 0, 0, 0, 0, 0, // PC_MTDCR
IU1, 0, 0, 0, 0, 0, // PC_DCBA
BPU, 0, 0, 0, 0, 0, // PC_DSS
BPU, 0, 0, 0, 0, 0, // PC_DSSALL
BPU, 0, 0, 0, 0, 0, // PC_DST
BPU, 0, 0, 0, 0, 0, // PC_DSTT
BPU, 0, 0, 0, 0, 0, // PC_DSTST
BPU, 0, 0, 0, 0, 0, // PC_DSTSTT
BPU, 0, 0, 0, 0, 0, // PC_LVEBX
BPU, 0, 0, 0, 0, 0, // PC_LVEHX
BPU, 0, 0, 0, 0, 0, // PC_LVEWX
BPU, 0, 0, 0, 0, 0, // PC_LVSL
BPU, 0, 0, 0, 0, 0, // PC_LVSR
BPU, 0, 0, 0, 0, 0, // PC_LVX
BPU, 0, 0, 0, 0, 0, // PC_LVXL
BPU, 0, 0, 0, 0, 0, // PC_STVEBX
BPU, 0, 0, 0, 0, 0, // PC_STVEHX
BPU, 0, 0, 0, 0, 0, // PC_STVEWX
BPU, 0, 0, 0, 0, 0, // PC_STVX
BPU, 0, 0, 0, 0, 0, // PC_STVXL
BPU, 0, 0, 0, 0, 0, // PC_MFVSCR
BPU, 0, 0, 0, 0, 0, // PC_MTVSCR
BPU, 0, 0, 0, 0, 0, // PC_VADDCUW
BPU, 0, 0, 0, 0, 0, // PC_VADDFP
BPU, 0, 0, 0, 0, 0, // PC_VADDSBS
BPU, 0, 0, 0, 0, 0, // PC_VADDSHS
BPU, 0, 0, 0, 0, 0, // PC_VADDSWS
BPU, 0, 0, 0, 0, 0, // PC_VADDUBM
BPU, 0, 0, 0, 0, 0, // PC_VADDUBS
BPU, 0, 0, 0, 0, 0, // PC_VADDUHM
BPU, 0, 0, 0, 0, 0, // PC_VADDUHS
BPU, 0, 0, 0, 0, 0, // PC_VADDUWM
BPU, 0, 0, 0, 0, 0, // PC_VADDUWS
BPU, 0, 0, 0, 0, 0, // PC_VAND
BPU, 0, 0, 0, 0, 0, // PC_VANDC
BPU, 0, 0, 0, 0, 0, // PC_VAVGSB
BPU, 0, 0, 0, 0, 0, // PC_VAVGSH
BPU, 0, 0, 0, 0, 0, // PC_VAVGSW
BPU, 0, 0, 0, 0, 0, // PC_VAVGUB
BPU, 0, 0, 0, 0, 0, // PC_VAVGUH
BPU, 0, 0, 0, 0, 0, // PC_VAVGUW
BPU, 0, 0, 0, 0, 0, // PC_VCFSX
BPU, 0, 0, 0, 0, 0, // PC_VCFUX
BPU, 0, 0, 0, 0, 0, // PC_VCMPBFP
BPU, 0, 0, 0, 0, 0, // PC_VCMPEQFP
BPU, 0, 0, 0, 0, 0, // PC_VCMPEQUB
BPU, 0, 0, 0, 0, 0, // PC_VCMPEQUH
BPU, 0, 0, 0, 0, 0, // PC_VCMPEQUW
BPU, 0, 0, 0, 0, 0, // PC_VCMPGEFP
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTFP
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTSB
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTSH
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTSW
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTUB
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTUH
BPU, 0, 0, 0, 0, 0, // PC_VCMPGTUW
BPU, 0, 0, 0, 0, 0, // PC_VCTSXS
BPU, 0, 0, 0, 0, 0, // PC_VCTUXS
BPU, 0, 0, 0, 0, 0, // PC_VEXPTEFP
BPU, 0, 0, 0, 0, 0, // PC_VLOGEFP
BPU, 0, 0, 0, 0, 0, // PC_VMAXFP
BPU, 0, 0, 0, 0, 0, // PC_VMAXSB
BPU, 0, 0, 0, 0, 0, // PC_VMAXSH
BPU, 0, 0, 0, 0, 0, // PC_VMAXSW
BPU, 0, 0, 0, 0, 0, // PC_VMAXUB
BPU, 0, 0, 0, 0, 0, // PC_VMAXUH
BPU, 0, 0, 0, 0, 0, // PC_VMAXUW
BPU, 0, 0, 0, 0, 0, // PC_VMINFP
BPU, 0, 0, 0, 0, 0, // PC_VMINSB
BPU, 0, 0, 0, 0, 0, // PC_VMINSH
BPU, 0, 0, 0, 0, 0, // PC_VMINSW
BPU, 0, 0, 0, 0, 0, // PC_VMINUB
BPU, 0, 0, 0, 0, 0, // PC_VMINUH
BPU, 0, 0, 0, 0, 0, // PC_VMINUW
BPU, 0, 0, 0, 0, 0, // PC_VMRGHB
BPU, 0, 0, 0, 0, 0, // PC_VMRGHH
BPU, 0, 0, 0, 0, 0, // PC_VMRGHW
BPU, 0, 0, 0, 0, 0, // PC_VMRGLB
BPU, 0, 0, 0, 0, 0, // PC_VMRGLH
BPU, 0, 0, 0, 0, 0, // PC_VMRGLW
BPU, 0, 0, 0, 0, 0, // PC_VMULESB
BPU, 0, 0, 0, 0, 0, // PC_VMULESH
BPU, 0, 0, 0, 0, 0, // PC_VMULEUB
BPU, 0, 0, 0, 0, 0, // PC_VMULEUH
BPU, 0, 0, 0, 0, 0, // PC_VMULOSB
BPU, 0, 0, 0, 0, 0, // PC_VMULOSH
BPU, 0, 0, 0, 0, 0, // PC_VMULOUB
BPU, 0, 0, 0, 0, 0, // PC_VMULOUH
BPU, 0, 0, 0, 0, 0, // PC_VNOR
BPU, 0, 0, 0, 0, 0, // PC_VOR
BPU, 0, 0, 0, 0, 0, // PC_VPKPX
BPU, 0, 0, 0, 0, 0, // PC_VPKSHSS
BPU, 0, 0, 0, 0, 0, // PC_VPKSHUS
BPU, 0, 0, 0, 0, 0, // PC_VPKSWSS
BPU, 0, 0, 0, 0, 0, // PC_VPKSWUS
BPU, 0, 0, 0, 0, 0, // PC_VPKUHUM
BPU, 0, 0, 0, 0, 0, // PC_VPKUHUS
BPU, 0, 0, 0, 0, 0, // PC_VPKUWUM
BPU, 0, 0, 0, 0, 0, // PC_VPKUWUS
BPU, 0, 0, 0, 0, 0, // PC_VREFP
BPU, 0, 0, 0, 0, 0, // PC_VRFIM
BPU, 0, 0, 0, 0, 0, // PC_VRFIN
BPU, 0, 0, 0, 0, 0, // PC_VRFIP
BPU, 0, 0, 0, 0, 0, // PC_VRFIZ
BPU, 0, 0, 0, 0, 0, // PC_VRLB
BPU, 0, 0, 0, 0, 0, // PC_VRLH
BPU, 0, 0, 0, 0, 0, // PC_VRLW
BPU, 0, 0, 0, 0, 0, // PC_VRSQRTEFP
BPU, 0, 0, 0, 0, 0, // PC_VSL
BPU, 0, 0, 0, 0, 0, // PC_VSLB
BPU, 0, 0, 0, 0, 0, // PC_VSLH
BPU, 0, 0, 0, 0, 0, // PC_VSLO
BPU, 0, 0, 0, 0, 0, // PC_VSLW
BPU, 0, 0, 0, 0, 0, // PC_VSPLTB
BPU, 0, 0, 0, 0, 0, // PC_VSPLTH
BPU, 0, 0, 0, 0, 0, // PC_VSPLTW
BPU, 0, 0, 0, 0, 0, // PC_VSPLTISB
BPU, 0, 0, 0, 0, 0, // PC_VSPLTISH
BPU, 0, 0, 0, 0, 0, // PC_VSPLTISW
BPU, 0, 0, 0, 0, 0, // PC_VSR
BPU, 0, 0, 0, 0, 0, // PC_VSRAB
BPU, 0, 0, 0, 0, 0, // PC_VSRAH
BPU, 0, 0, 0, 0, 0, // PC_VSRAW
BPU, 0, 0, 0, 0, 0, // PC_VSRB
BPU, 0, 0, 0, 0, 0, // PC_VSRH
BPU, 0, 0, 0, 0, 0, // PC_VSRO
BPU, 0, 0, 0, 0, 0, // PC_VSRW
BPU, 0, 0, 0, 0, 0, // PC_VSUBCUW
BPU, 0, 0, 0, 0, 0, // PC_VSUBFP
BPU, 0, 0, 0, 0, 0, // PC_VSUBSBS
BPU, 0, 0, 0, 0, 0, // PC_VSUBSHS
BPU, 0, 0, 0, 0, 0, // PC_VSUBSWS
BPU, 0, 0, 0, 0, 0, // PC_VSUBUBM
BPU, 0, 0, 0, 0, 0, // PC_VSUBUBS
BPU, 0, 0, 0, 0, 0, // PC_VSUBUHM
BPU, 0, 0, 0, 0, 0, // PC_VSUBUHS
BPU, 0, 0, 0, 0, 0, // PC_VSUBUWM
BPU, 0, 0, 0, 0, 0, // PC_VSUBUWS
BPU, 0, 0, 0, 0, 0, // PC_VSUMSWS
BPU, 0, 0, 0, 0, 0, // PC_VSUM2SWS
BPU, 0, 0, 0, 0, 0, // PC_VSUM4SBS
BPU, 0, 0, 0, 0, 0, // PC_VSUM4SHS
BPU, 0, 0, 0, 0, 0, // PC_VSUM4UBS
BPU, 0, 0, 0, 0, 0, // PC_VUPKHPX
BPU, 0, 0, 0, 0, 0, // PC_VUPKHSB
BPU, 0, 0, 0, 0, 0, // PC_VUPKHSH
BPU, 0, 0, 0, 0, 0, // PC_VUPKLPX
BPU, 0, 0, 0, 0, 0, // PC_VUPKLSB
BPU, 0, 0, 0, 0, 0, // PC_VUPKLSH
BPU, 0, 0, 0, 0, 0, // PC_VXOR
BPU, 0, 0, 0, 0, 0, // PC_VMADDFP
BPU, 0, 0, 0, 0, 0, // PC_VMHADDSHS
BPU, 0, 0, 0, 0, 0, // PC_VMHRADDSHS
BPU, 0, 0, 0, 0, 0, // PC_VMLADDUHM
BPU, 0, 0, 0, 0, 0, // PC_VMSUMMBM
BPU, 0, 0, 0, 0, 0, // PC_VMSUMSHM
BPU, 0, 0, 0, 0, 0, // PC_VMSUMSHS
BPU, 0, 0, 0, 0, 0, // PC_VMSUMUBM
BPU, 0, 0, 0, 0, 0, // PC_VMSUMUHM
BPU, 0, 0, 0, 0, 0, // PC_VMSUMUHS
BPU, 0, 0, 0, 0, 0, // PC_VNMSUBFP
BPU, 0, 0, 0, 0, 0, // PC_VPERM
BPU, 0, 0, 0, 0, 0, // PC_VSEL
BPU, 0, 0, 0, 0, 0, // PC_VSLDOI
BPU, 0, 0, 0, 0, 0, // PC_VMR
BPU, 0, 0, 0, 0, 0, // PC_VMRP
BPU, 0, 0, 0, 0, 0, // PC_SLE
BPU, 0, 0, 0, 0, 0, // PC_SLEQ
BPU, 0, 0, 0, 0, 0, // PC_SLIQ
BPU, 0, 0, 0, 0, 0, // PC_SLLIQ
BPU, 0, 0, 0, 0, 0, // PC_SLLQ
BPU, 0, 0, 0, 0, 0, // PC_SLQ
BPU, 0, 0, 0, 0, 0, // PC_SRAIQ
BPU, 0, 0, 0, 0, 0, // PC_SRAQ
BPU, 0, 0, 0, 0, 0, // PC_SRE
BPU, 0, 0, 0, 0, 0, // PC_SREA
BPU, 0, 0, 0, 0, 0, // PC_SREQ
BPU, 0, 0, 0, 0, 0, // PC_SRIQ
BPU, 0, 0, 0, 0, 0, // PC_SRLIQ
BPU, 0, 0, 0, 0, 0, // PC_SRLQ
BPU, 0, 0, 0, 0, 0, // PC_SRQ
BPU, 0, 0, 0, 0, 0, // PC_MASKG
BPU, 0, 0, 0, 0, 0, // PC_MASKIR
BPU, 0, 0, 0, 0, 0, // PC_LSCBX
BPU, 0, 0, 0, 0, 0, // PC_DIV
BPU, 0, 0, 0, 0, 0, // PC_DIVS
BPU, 0, 0, 0, 0, 0, // PC_DOZ
BPU, 0, 0, 0, 0, 0, // PC_MUL
BPU, 0, 0, 0, 0, 0, // PC_NABS
BPU, 0, 0, 0, 0, 0, // PC_ABS
BPU, 0, 0, 0, 0, 0, // PC_CLCS
BPU, 0, 0, 0, 0, 0, // PC_DOZI
BPU, 0, 0, 0, 0, 0, // PC_RLMI
BPU, 0, 0, 0, 0, 0, // PC_RRIB
};
static void advance(int firstStage, int oldStage, int newStage) {
PCode *instr = pipeline[oldStage].instr;
int cycles = instruction_timing[instr->op].cycles[newStage - firstStage];
pipeline[newStage].instr = instr;
pipeline[newStage].remaining = cycles;
pipeline[oldStage].instr = NULL;
}
static void assign_completion_buffer(PCode *instr) {
completionbuffers.used++;
completionbuffers.free--;
completionbuffers.entries[completionbuffers.nextFreeSlot].instr = instr;
completionbuffers.entries[completionbuffers.nextFreeSlot].completed = 0;
completionbuffers.nextFreeSlot = (completionbuffers.nextFreeSlot + 1) % MaxEntries;
}
static void complete_instruction(int stage) {
PCode *instr = pipeline[stage].instr;
int buf = 0;
while (buf < MaxEntries && completionbuffers.entries[buf].instr != instr)
buf++;
completionbuffers.entries[buf].completed = 1;
pipeline[stage].instr = NULL;
if (stage == IU1)
iu1_completed_instruction = instr;
else if (stage == IU2)
iu2_completed_instruction = instr;
}
static void retire_instruction(void) {
completionbuffers.entries[completionbuffers.nextToRetire].instr = NULL;
completionbuffers.used--;
completionbuffers.free++;
completionbuffers.nextToRetire = (completionbuffers.nextToRetire + 1) % MaxEntries;
}
static int latency(PCode *instr) {
int cycles = instruction_timing[instr->op].latency;
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
cycles += 2;
if (instr->op == PC_LMW || instr->op == PC_STMW)
cycles += instr->argCount - 2;
return cycles;
}
static void initialize(void) {
int stage;
int i;
for (stage = 0; stage < NumStages; stage++)
pipeline[stage].instr = NULL;
completionbuffers.free = MaxEntries;
completionbuffers.used = 0;
completionbuffers.nextToRetire = 0;
completionbuffers.nextFreeSlot = 0;
for (i = 0; i < MaxEntries; i++)
completionbuffers.entries[i].instr = NULL;
iu1_completed_instruction = NULL;
iu2_completed_instruction = NULL;
}
static int can_issue(PCode *instr) {
int stage;
if (completionbuffers.free == 0)
return 0;
stage = instruction_timing[instr->op].stage;
if (stage == IU2) {
PCode *check;
int isClear1 = !pipeline[IU1].instr;
int isClear2 = !pipeline[IU2].instr;
if (!isClear1 && !isClear2)
return 0;
if (isClear1 && isClear2)
return 1;
if (isClear1)
check = pipeline[IU2].instr;
else
check = pipeline[IU1].instr;
if (is_dependent(instr, check, RegClass_GPR))
return 0;
if (is_dependent(instr, iu1_completed_instruction, RegClass_GPR))
return 0;
if (is_dependent(instr, iu2_completed_instruction, RegClass_GPR))
return 0;
} else {
if (pipeline[stage].instr)
return 0;
}
if ((instr->flags & fIsWrite) && pipeline[LSU2].instr && (pipeline[LSU2].instr->flags & fIsWrite))
return 0;
return 1;
}
static void issue(PCode *instr) {
int stage = instruction_timing[instr->op].stage;
int cycles = instruction_timing[instr->op].cycles[0];
assign_completion_buffer(instr);
if (stage == IU2 && !pipeline[IU1].instr)
stage = IU1;
pipeline[stage].instr = instr;
pipeline[stage].remaining = cycles;
}
static void advance_clock(void) {
int stage;
iu1_completed_instruction = NULL;
iu2_completed_instruction = NULL;
for (stage = 0; stage < NumStages; stage++) {
if (pipeline[stage].instr && pipeline[stage].remaining)
--pipeline[stage].remaining;
}
if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) {
retire_instruction();
if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) {
retire_instruction();
}
}
if (pipeline[IU1].instr && pipeline[IU1].remaining == 0)
complete_instruction(IU1);
if (pipeline[LSU2].instr && pipeline[LSU2].remaining == 0)
complete_instruction(LSU2);
if (pipeline[FPU3].instr && pipeline[FPU3].remaining == 0)
complete_instruction(FPU3);
if (pipeline[SRU].instr && pipeline[SRU].remaining == 0)
complete_instruction(SRU);
if (pipeline[BPU].instr && pipeline[BPU].remaining == 0)
complete_instruction(BPU);
if (pipeline[IU2].instr && pipeline[IU2].remaining == 0)
complete_instruction(IU2);
if (
pipeline[FPU1].instr &&
pipeline[FPU1].remaining == 0 &&
(pipeline[FPU1].instr->op == PC_FDIV || pipeline[FPU1].instr->op == PC_FDIVS)
)
complete_instruction(FPU1);
if (pipeline[FPU2].instr && pipeline[FPU2].remaining == 0 && !pipeline[FPU3].instr)
advance(FPU1, FPU2, FPU3);
if (pipeline[FPU1].instr && pipeline[FPU1].remaining == 0 && !pipeline[FPU2].instr)
advance(FPU1, FPU1, FPU2);
if (pipeline[LSU1].instr && pipeline[LSU1].remaining == 0 && !pipeline[LSU2].instr)
advance(LSU1, LSU1, LSU2);
}
static int serializes(PCode *instr) {
return instruction_timing[instr->op].serializes;
}
MachineInfo machine750 = {
2,
1,
0,
&latency,
&initialize,
&can_issue,
&issue,
&advance_clock,
&serializes,
&default_uses_vpermute_unit
};

View File

@@ -0,0 +1,615 @@
#include "compiler/Scheduler.h"
#include "compiler/PCode.h"
#include "compiler/PCodeInfo.h"
// https://www.nxp.com/docs/en/user-guide/MPC821UM.pdf
typedef enum Stage {
BranchUnit,
Stage1,
Stage2,
LSU1,
LSU2,
CRUnit,
NumStages,
Stage7
} Stage;
static struct {
// the instruction currently in this pipeline stage
PCode *instr;
// how many cycles are left for this instruction to finish
int remaining;
} pipeline[NumStages];
enum {
MaxEntries = 6
};
static struct {
// how many entries remain unused in the queue
unsigned int free;
// how many entries are currently used in the queue
unsigned int used;
// the index of the next instruction that will be retired
unsigned int nextToRetire;
// the index of the next free slot that will be used when an instruction is dispatched
unsigned int nextFreeSlot;
// circular array of entries in the completion queue
struct {
PCode *instr;
int completed;
} entries[MaxEntries];
} completionbuffers;
static struct {
// the initial stage for this instruction
Stage stage;
// the total amount of cycles required by this instruction
char latency;
// how long it takes to finish each stage
char cycles[3];
// does this instruction serialise?
char serializes;
} instruction_timing[OPCODE_MAX] = {
BranchUnit, 0, 0, 0, 0, 0, // PC_B
BranchUnit, 0, 0, 0, 0, 0, // PC_BL
BranchUnit, 0, 0, 0, 0, 0, // PC_BC
BranchUnit, 0, 0, 0, 0, 0, // PC_BCLR
BranchUnit, 0, 0, 0, 0, 0, // PC_BCCTR
BranchUnit, 0, 0, 0, 0, 0, // PC_BT
BranchUnit, 0, 0, 0, 0, 0, // PC_BTLR
BranchUnit, 0, 0, 0, 0, 0, // PC_BTCTR
BranchUnit, 0, 0, 0, 0, 0, // PC_BF
BranchUnit, 0, 0, 0, 0, 0, // PC_BFLR
BranchUnit, 0, 0, 0, 0, 0, // PC_BFCTR
BranchUnit, 0, 0, 0, 0, 0, // PC_BDNZ
BranchUnit, 0, 0, 0, 0, 0, // PC_BDNZT
BranchUnit, 0, 0, 0, 0, 0, // PC_BDNZF
BranchUnit, 0, 0, 0, 0, 0, // PC_BDZ
BranchUnit, 0, 0, 0, 0, 0, // PC_BDZT
BranchUnit, 0, 0, 0, 0, 0, // PC_BDZF
BranchUnit, 0, 0, 0, 0, 0, // PC_BLR
BranchUnit, 0, 0, 0, 0, 0, // PC_BCTR
BranchUnit, 0, 0, 0, 0, 0, // PC_BCTRL
BranchUnit, 0, 0, 0, 0, 0, // PC_BLRL
BranchUnit, 0, 0, 0, 0, 0, // PC_LBZ
LSU1, 2, 1, 1, 0, 0, // PC_LBZU
LSU1, 2, 1, 1, 0, 0, // PC_LBZX
LSU1, 2, 1, 1, 0, 0, // PC_LBZUX
LSU1, 2, 1, 1, 0, 0, // PC_LHZ
LSU1, 2, 1, 1, 0, 0, // PC_LHZU
LSU1, 2, 1, 1, 0, 0, // PC_LHZX
LSU1, 2, 1, 1, 0, 0, // PC_LHZUX
LSU1, 2, 1, 1, 0, 0, // PC_LHA
LSU1, 2, 1, 1, 0, 0, // PC_LHAU
LSU1, 2, 1, 1, 0, 0, // PC_LHAX
LSU1, 2, 1, 1, 0, 0, // PC_LHAUX
LSU1, 2, 1, 1, 0, 0, // PC_LHBRX
LSU1, 2, 1, 1, 0, 0, // PC_LWZ
LSU1, 2, 1, 1, 0, 0, // PC_LWZU
LSU1, 2, 1, 1, 0, 0, // PC_LWZX
LSU1, 2, 1, 1, 0, 0, // PC_LWZUX
LSU1, 2, 1, 1, 0, 0, // PC_LWBRX
LSU1, 2, 1, 1, 0, 0, // PC_LMW
LSU1, 2, 1, 1, 0, 0, // PC_STB
LSU1, 2, 1, 1, 0, 0, // PC_STBU
LSU1, 2, 1, 1, 0, 0, // PC_STBX
LSU1, 2, 1, 1, 0, 0, // PC_STBUX
LSU1, 2, 1, 1, 0, 0, // PC_STH
LSU1, 2, 1, 1, 0, 0, // PC_STHU
LSU1, 2, 1, 1, 0, 0, // PC_STHX
LSU1, 2, 1, 1, 0, 0, // PC_STHUX
LSU1, 2, 1, 1, 0, 0, // PC_STHBRX
LSU1, 2, 1, 1, 0, 0, // PC_STW
LSU1, 2, 1, 1, 0, 0, // PC_STWU
LSU1, 2, 1, 1, 0, 0, // PC_STWX
LSU1, 2, 1, 1, 0, 0, // PC_STWUX
LSU1, 2, 1, 1, 0, 0, // PC_STWBRX
LSU1, 2, 1, 1, 0, 0, // PC_STMW
LSU1, 2, 1, 1, 0, 0, // PC_DCBF
LSU1, 2, 1, 1, 0, 0, // PC_DCBST
LSU1, 2, 1, 1, 0, 0, // PC_DCBT
LSU1, 2, 1, 1, 0, 0, // PC_DCBTST
LSU1, 2, 1, 1, 0, 0, // PC_DCBZ
LSU1, 2, 1, 1, 0, 0, // PC_ADD
Stage1, 1, 1, 0, 0, 0, // PC_ADDC
Stage1, 1, 1, 0, 0, 0, // PC_ADDE
Stage1, 1, 1, 0, 0, 0, // PC_ADDI
Stage1, 1, 1, 0, 0, 0, // PC_ADDIC
Stage1, 1, 1, 0, 0, 0, // PC_ADDICR
Stage1, 1, 1, 0, 0, 0, // PC_ADDIS
Stage1, 1, 1, 0, 0, 0, // PC_ADDME
Stage1, 1, 1, 0, 0, 0, // PC_ADDZE
Stage1, 1, 1, 0, 0, 0, // PC_DIVW
Stage1, 37, 37, 0, 0, 0, // PC_DIVWU
Stage1, 37, 37, 0, 0, 0, // PC_MULHW
Stage1, 5, 5, 0, 0, 0, // PC_MULHWU
Stage1, 5, 5, 0, 0, 0, // PC_MULLI
Stage1, 3, 3, 0, 0, 0, // PC_MULLW
Stage1, 5, 5, 0, 0, 0, // PC_NEG
Stage1, 1, 1, 0, 0, 0, // PC_SUBF
Stage1, 1, 1, 0, 0, 0, // PC_SUBFC
Stage1, 1, 1, 0, 0, 0, // PC_SUBFE
Stage1, 1, 1, 0, 0, 0, // PC_SUBFIC
Stage1, 1, 1, 0, 0, 0, // PC_SUBFME
Stage1, 1, 1, 0, 0, 0, // PC_SUBFZE
Stage1, 1, 1, 0, 0, 0, // PC_CMPI
Stage1, 3, 1, 0, 0, 0, // PC_CMP
Stage1, 3, 1, 0, 0, 0, // PC_CMPLI
Stage1, 3, 1, 0, 0, 0, // PC_CMPL
Stage1, 3, 1, 0, 0, 0, // PC_ANDI
Stage1, 1, 1, 0, 0, 0, // PC_ANDIS
Stage1, 1, 1, 0, 0, 0, // PC_ORI
Stage1, 1, 1, 0, 0, 0, // PC_ORIS
Stage1, 1, 1, 0, 0, 0, // PC_XORI
Stage1, 1, 1, 0, 0, 0, // PC_XORIS
Stage1, 1, 1, 0, 0, 0, // PC_AND
Stage1, 1, 1, 0, 0, 0, // PC_OR
Stage1, 1, 1, 0, 0, 0, // PC_XOR
Stage1, 1, 1, 0, 0, 0, // PC_NAND
Stage1, 1, 1, 0, 0, 0, // PC_NOR
Stage1, 1, 1, 0, 0, 0, // PC_EQV
Stage1, 1, 1, 0, 0, 0, // PC_ANDC
Stage1, 1, 1, 0, 0, 0, // PC_ORC
Stage1, 1, 1, 0, 0, 0, // PC_EXTSB
Stage1, 1, 1, 0, 0, 0, // PC_EXTSH
Stage1, 1, 1, 0, 0, 0, // PC_CNTLZW
Stage1, 1, 1, 0, 0, 0, // PC_RLWINM
Stage1, 1, 1, 0, 0, 0, // PC_RLWNM
Stage1, 1, 1, 0, 0, 0, // PC_RLWIMI
Stage1, 1, 1, 0, 0, 0, // PC_SLW
Stage1, 1, 1, 0, 0, 0, // PC_SRW
Stage1, 1, 1, 0, 0, 0, // PC_SRAWI
Stage1, 1, 1, 0, 0, 0, // PC_SRAW
Stage1, 1, 1, 0, 0, 0, // PC_CRAND
CRUnit, 1, 1, 0, 0, 0, // PC_CRANDC
CRUnit, 1, 1, 0, 0, 0, // PC_CREQV
CRUnit, 1, 1, 0, 0, 0, // PC_CRNAND
CRUnit, 1, 1, 0, 0, 0, // PC_CRNOR
CRUnit, 1, 1, 0, 0, 0, // PC_CROR
CRUnit, 1, 1, 0, 0, 0, // PC_CRORC
CRUnit, 1, 1, 0, 0, 0, // PC_CRXOR
CRUnit, 1, 1, 0, 0, 0, // PC_MCRF
CRUnit, 1, 1, 0, 0, 0, // PC_MTXER
Stage1, 1, 1, 0, 0, 0, // PC_MTCTR
BranchUnit, 2, 2, 0, 0, 0, // PC_MTLR
BranchUnit, 2, 2, 0, 0, 0, // PC_MTCRF
Stage1, 1, 1, 0, 0, 0, // PC_MTMSR
Stage1, 1, 1, 0, 0, 0, // PC_MTSPR
Stage1, 1, 1, 0, 0, 0, // PC_MFMSR
Stage1, 1, 1, 0, 0, 0, // PC_MFSPR
Stage1, 1, 1, 0, 0, 0, // PC_MFXER
Stage7, 3, 1, 1, 1, 0, // PC_MFCTR
Stage7, 3, 1, 1, 1, 0, // PC_MFLR
Stage1, 1, 1, 0, 0, 0, // PC_MFCR
Stage1, 1, 1, 0, 0, 0, // PC_MFFS
Stage1, 1, 1, 0, 0, 0, // PC_MTFSF
Stage1, 1, 1, 0, 0, 0, // PC_EIEIO
Stage1, 1, 1, 0, 0, 0, // PC_ISYNC
Stage1, 1, 1, 0, 0, 0, // PC_SYNC
Stage1, 1, 1, 0, 0, 0, // PC_RFI
Stage1, 1, 1, 0, 0, 0, // PC_LI
LSU1, 2, 1, 1, 0, 0, // PC_LIS
LSU1, 2, 1, 1, 0, 0, // PC_MR
LSU1, 2, 1, 1, 0, 0, // PC_NOP
LSU1, 2, 1, 1, 0, 0, // PC_NOT
LSU1, 2, 1, 1, 0, 0, // PC_LFS
LSU1, 2, 1, 1, 0, 0, // PC_LFSU
LSU1, 2, 1, 1, 0, 0, // PC_LFSX
LSU1, 2, 1, 1, 0, 0, // PC_LFSUX
LSU1, 2, 1, 1, 0, 0, // PC_LFD
LSU1, 2, 1, 1, 0, 0, // PC_LFDU
LSU1, 2, 1, 1, 0, 0, // PC_LFDX
LSU1, 2, 1, 1, 0, 0, // PC_LFDUX
LSU1, 2, 1, 1, 0, 0, // PC_STFS
LSU1, 2, 1, 1, 0, 0, // PC_STFSU
LSU1, 2, 1, 1, 0, 0, // PC_STFSX
LSU1, 2, 1, 1, 0, 0, // PC_STFSUX
Stage7, 3, 1, 1, 1, 0, // PC_STFD
Stage7, 3, 1, 1, 1, 0, // PC_STFDU
Stage7, 3, 1, 1, 1, 0, // PC_STFDX
Stage7, 3, 1, 1, 1, 0, // PC_STFDUX
Stage7, 3, 1, 1, 1, 0, // PC_FMR
Stage7, 3, 1, 1, 1, 0, // PC_FABS
Stage7, 3, 1, 1, 1, 0, // PC_FNEG
Stage7, 3, 1, 1, 1, 0, // PC_FNABS
Stage7, 4, 2, 1, 1, 0, // PC_FADD
Stage7, 3, 1, 1, 1, 0, // PC_FADDS
Stage7, 33, 33, 0, 0, 0, // PC_FSUB
Stage7, 18, 18, 0, 0, 0, // PC_FSUBS
Stage7, 4, 2, 1, 1, 0, // PC_FMUL
Stage7, 3, 1, 1, 1, 0, // PC_FMULS
Stage7, 4, 2, 1, 1, 0, // PC_FDIV
Stage7, 3, 1, 1, 1, 0, // PC_FDIVS
Stage7, 4, 2, 1, 1, 0, // PC_FMADD
Stage7, 3, 1, 1, 1, 0, // PC_FMADDS
Stage7, 4, 2, 1, 1, 0, // PC_FMSUB
Stage7, 3, 1, 1, 1, 0, // PC_FMSUBS
Stage7, 18, 18, 0, 0, 0, // PC_FNMADD
Stage7, 3, 1, 1, 1, 0, // PC_FNMADDS
Stage7, 3, 1, 1, 1, 0, // PC_FNMSUB
Stage7, 3, 1, 1, 1, 0, // PC_FNMSUBS
Stage7, 3, 1, 1, 1, 0, // PC_FRES
Stage7, 3, 1, 1, 1, 0, // PC_FRSQRTE
Stage7, 5, 1, 1, 1, 0, // PC_FSEL
Stage7, 5, 1, 1, 1, 0, // PC_FRSP
LSU1, 1, 0, 0, 0, 0, // PC_FCTIW
LSU1, 1, 0, 0, 0, 0, // PC_FCTIWZ
LSU1, 1, 0, 0, 0, 0, // PC_FCMPU
LSU1, 1, 0, 0, 0, 0, // PC_FCMPO
LSU1, 1, 0, 0, 0, 0, // PC_LWARX
LSU1, 1, 0, 0, 0, 0, // PC_LSWI
LSU1, 1, 0, 0, 0, 0, // PC_LSWX
Stage1, 1, 0, 0, 0, 0, // PC_STFIWX
Stage1, 1, 0, 0, 0, 0, // PC_STSWI
Stage1, 1, 0, 0, 0, 0, // PC_STSWX
Stage1, 1, 0, 0, 0, 0, // PC_STWCX
Stage1, 1, 0, 0, 0, 0, // PC_ECIWX
Stage1, 1, 0, 0, 0, 0, // PC_ECOWX
Stage1, 1, 0, 0, 0, 0, // PC_DCBI
Stage1, 1, 0, 0, 0, 0, // PC_ICBI
Stage1, 1, 0, 0, 0, 0, // PC_MCRFS
Stage1, 1, 0, 0, 0, 0, // PC_MCRXR
Stage1, 1, 0, 0, 0, 0, // PC_MFTB
Stage1, 1, 0, 0, 0, 0, // PC_MFSR
Stage1, 1, 0, 0, 0, 0, // PC_MTSR
Stage1, 1, 0, 0, 0, 0, // PC_MFSRIN
Stage1, 1, 0, 0, 0, 0, // PC_MTSRIN
Stage1, 1, 0, 0, 0, 0, // PC_MTFSB0
Stage1, 1, 0, 0, 0, 0, // PC_MTFSB1
Stage1, 1, 0, 0, 0, 0, // PC_MTFSFI
Stage1, 1, 0, 0, 0, 1, // PC_SC
Stage1, 1, 0, 0, 0, 1, // PC_FSQRT
Stage1, 1, 0, 0, 0, 0, // PC_FSQRTS
Stage1, 1, 0, 0, 0, 0, // PC_TLBIA
Stage1, 1, 0, 0, 0, 0, // PC_TLBIE
Stage1, 1, 0, 0, 0, 0, // PC_TLBLD
Stage1, 1, 0, 0, 0, 0, // PC_TLBLI
Stage1, 1, 0, 0, 0, 0, // PC_TLBSYNC
Stage1, 1, 0, 0, 0, 0, // PC_TW
Stage1, 1, 0, 0, 0, 1, // PC_TRAP
Stage1, 1, 0, 0, 0, 1, // PC_TWI
Stage1, 1, 0, 0, 0, 1, // PC_OPWORD
Stage1, 1, 0, 0, 0, 1, // PC_MFROM
Stage1, 1, 0, 0, 0, 0, // PC_DSA
Stage1, 1, 0, 0, 0, 0, // PC_ESA
Stage1, 1, 0, 0, 0, 0, // PC_DCCCI
Stage1, 0, 0, 0, 0, 0, // PC_DCREAD
Stage1, 0, 0, 0, 0, 0, // PC_ICBT
Stage1, 0, 0, 0, 0, 0, // PC_ICCCI
Stage1, 0, 0, 0, 0, 0, // PC_ICREAD
Stage1, 0, 0, 0, 0, 0, // PC_RFCI
Stage1, 0, 0, 0, 0, 0, // PC_TLBRE
Stage1, 0, 0, 0, 0, 0, // PC_TLBSX
Stage1, 0, 0, 0, 0, 0, // PC_TLBWE
Stage1, 0, 0, 0, 0, 0, // PC_WRTEE
Stage1, 0, 0, 0, 0, 0, // PC_WRTEEI
Stage1, 0, 0, 0, 0, 0, // PC_MFDCR
Stage1, 0, 0, 0, 0, 0, // PC_MTDCR
Stage1, 0, 0, 0, 0, 0, // PC_DCBA
Stage1, 0, 0, 0, 0, 0, // PC_DSS
BranchUnit, 0, 0, 0, 0, 0, // PC_DSSALL
BranchUnit, 0, 0, 0, 0, 0, // PC_DST
BranchUnit, 0, 0, 0, 0, 0, // PC_DSTT
BranchUnit, 0, 0, 0, 0, 0, // PC_DSTST
BranchUnit, 0, 0, 0, 0, 0, // PC_DSTSTT
BranchUnit, 0, 0, 0, 0, 0, // PC_LVEBX
BranchUnit, 0, 0, 0, 0, 0, // PC_LVEHX
BranchUnit, 0, 0, 0, 0, 0, // PC_LVEWX
BranchUnit, 0, 0, 0, 0, 0, // PC_LVSL
BranchUnit, 0, 0, 0, 0, 0, // PC_LVSR
BranchUnit, 0, 0, 0, 0, 0, // PC_LVX
BranchUnit, 0, 0, 0, 0, 0, // PC_LVXL
BranchUnit, 0, 0, 0, 0, 0, // PC_STVEBX
BranchUnit, 0, 0, 0, 0, 0, // PC_STVEHX
BranchUnit, 0, 0, 0, 0, 0, // PC_STVEWX
BranchUnit, 0, 0, 0, 0, 0, // PC_STVX
BranchUnit, 0, 0, 0, 0, 0, // PC_STVXL
BranchUnit, 0, 0, 0, 0, 0, // PC_MFVSCR
BranchUnit, 0, 0, 0, 0, 0, // PC_MTVSCR
BranchUnit, 0, 0, 0, 0, 0, // PC_VADDCUW
BranchUnit, 0, 0, 0, 0, 0, // PC_VADDFP
BranchUnit, 0, 0, 0, 0, 0, // PC_VADDSBS
BranchUnit, 0, 0, 0, 0, 0, // PC_VADDSHS
BranchUnit, 0, 0, 0, 0, 0, // PC_VADDSWS
BranchUnit, 0, 0, 0, 0, 0, // PC_VADDUBM
BranchUnit, 0, 0, 0, 0, 0, // PC_VADDUBS
BranchUnit, 0, 0, 0, 0, 0, // PC_VADDUHM
BranchUnit, 0, 0, 0, 0, 0, // PC_VADDUHS
BranchUnit, 0, 0, 0, 0, 0, // PC_VADDUWM
BranchUnit, 0, 0, 0, 0, 0, // PC_VADDUWS
BranchUnit, 0, 0, 0, 0, 0, // PC_VAND
BranchUnit, 0, 0, 0, 0, 0, // PC_VANDC
BranchUnit, 0, 0, 0, 0, 0, // PC_VAVGSB
BranchUnit, 0, 0, 0, 0, 0, // PC_VAVGSH
BranchUnit, 0, 0, 0, 0, 0, // PC_VAVGSW
BranchUnit, 0, 0, 0, 0, 0, // PC_VAVGUB
BranchUnit, 0, 0, 0, 0, 0, // PC_VAVGUH
BranchUnit, 0, 0, 0, 0, 0, // PC_VAVGUW
BranchUnit, 0, 0, 0, 0, 0, // PC_VCFSX
BranchUnit, 0, 0, 0, 0, 0, // PC_VCFUX
BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPBFP
BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPEQFP
BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPEQUB
BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPEQUH
BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPEQUW
BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPGEFP
BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPGTFP
BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPGTSB
BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPGTSH
BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPGTSW
BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPGTUB
BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPGTUH
BranchUnit, 0, 0, 0, 0, 0, // PC_VCMPGTUW
BranchUnit, 0, 0, 0, 0, 0, // PC_VCTSXS
BranchUnit, 0, 0, 0, 0, 0, // PC_VCTUXS
BranchUnit, 0, 0, 0, 0, 0, // PC_VEXPTEFP
BranchUnit, 0, 0, 0, 0, 0, // PC_VLOGEFP
BranchUnit, 0, 0, 0, 0, 0, // PC_VMAXFP
BranchUnit, 0, 0, 0, 0, 0, // PC_VMAXSB
BranchUnit, 0, 0, 0, 0, 0, // PC_VMAXSH
BranchUnit, 0, 0, 0, 0, 0, // PC_VMAXSW
BranchUnit, 0, 0, 0, 0, 0, // PC_VMAXUB
BranchUnit, 0, 0, 0, 0, 0, // PC_VMAXUH
BranchUnit, 0, 0, 0, 0, 0, // PC_VMAXUW
BranchUnit, 0, 0, 0, 0, 0, // PC_VMINFP
BranchUnit, 0, 0, 0, 0, 0, // PC_VMINSB
BranchUnit, 0, 0, 0, 0, 0, // PC_VMINSH
BranchUnit, 0, 0, 0, 0, 0, // PC_VMINSW
BranchUnit, 0, 0, 0, 0, 0, // PC_VMINUB
BranchUnit, 0, 0, 0, 0, 0, // PC_VMINUH
BranchUnit, 0, 0, 0, 0, 0, // PC_VMINUW
BranchUnit, 0, 0, 0, 0, 0, // PC_VMRGHB
BranchUnit, 0, 0, 0, 0, 0, // PC_VMRGHH
BranchUnit, 0, 0, 0, 0, 0, // PC_VMRGHW
BranchUnit, 0, 0, 0, 0, 0, // PC_VMRGLB
BranchUnit, 0, 0, 0, 0, 0, // PC_VMRGLH
BranchUnit, 0, 0, 0, 0, 0, // PC_VMRGLW
BranchUnit, 0, 0, 0, 0, 0, // PC_VMULESB
BranchUnit, 0, 0, 0, 0, 0, // PC_VMULESH
BranchUnit, 0, 0, 0, 0, 0, // PC_VMULEUB
BranchUnit, 0, 0, 0, 0, 0, // PC_VMULEUH
BranchUnit, 0, 0, 0, 0, 0, // PC_VMULOSB
BranchUnit, 0, 0, 0, 0, 0, // PC_VMULOSH
BranchUnit, 0, 0, 0, 0, 0, // PC_VMULOUB
BranchUnit, 0, 0, 0, 0, 0, // PC_VMULOUH
BranchUnit, 0, 0, 0, 0, 0, // PC_VNOR
BranchUnit, 0, 0, 0, 0, 0, // PC_VOR
BranchUnit, 0, 0, 0, 0, 0, // PC_VPKPX
BranchUnit, 0, 0, 0, 0, 0, // PC_VPKSHSS
BranchUnit, 0, 0, 0, 0, 0, // PC_VPKSHUS
BranchUnit, 0, 0, 0, 0, 0, // PC_VPKSWSS
BranchUnit, 0, 0, 0, 0, 0, // PC_VPKSWUS
BranchUnit, 0, 0, 0, 0, 0, // PC_VPKUHUM
BranchUnit, 0, 0, 0, 0, 0, // PC_VPKUHUS
BranchUnit, 0, 0, 0, 0, 0, // PC_VPKUWUM
BranchUnit, 0, 0, 0, 0, 0, // PC_VPKUWUS
BranchUnit, 0, 0, 0, 0, 0, // PC_VREFP
BranchUnit, 0, 0, 0, 0, 0, // PC_VRFIM
BranchUnit, 0, 0, 0, 0, 0, // PC_VRFIN
BranchUnit, 0, 0, 0, 0, 0, // PC_VRFIP
BranchUnit, 0, 0, 0, 0, 0, // PC_VRFIZ
BranchUnit, 0, 0, 0, 0, 0, // PC_VRLB
BranchUnit, 0, 0, 0, 0, 0, // PC_VRLH
BranchUnit, 0, 0, 0, 0, 0, // PC_VRLW
BranchUnit, 0, 0, 0, 0, 0, // PC_VRSQRTEFP
BranchUnit, 0, 0, 0, 0, 0, // PC_VSL
BranchUnit, 0, 0, 0, 0, 0, // PC_VSLB
BranchUnit, 0, 0, 0, 0, 0, // PC_VSLH
BranchUnit, 0, 0, 0, 0, 0, // PC_VSLO
BranchUnit, 0, 0, 0, 0, 0, // PC_VSLW
BranchUnit, 0, 0, 0, 0, 0, // PC_VSPLTB
BranchUnit, 0, 0, 0, 0, 0, // PC_VSPLTH
BranchUnit, 0, 0, 0, 0, 0, // PC_VSPLTW
BranchUnit, 0, 0, 0, 0, 0, // PC_VSPLTISB
BranchUnit, 0, 0, 0, 0, 0, // PC_VSPLTISH
BranchUnit, 0, 0, 0, 0, 0, // PC_VSPLTISW
BranchUnit, 0, 0, 0, 0, 0, // PC_VSR
BranchUnit, 0, 0, 0, 0, 0, // PC_VSRAB
BranchUnit, 0, 0, 0, 0, 0, // PC_VSRAH
BranchUnit, 0, 0, 0, 0, 0, // PC_VSRAW
BranchUnit, 0, 0, 0, 0, 0, // PC_VSRB
BranchUnit, 0, 0, 0, 0, 0, // PC_VSRH
BranchUnit, 0, 0, 0, 0, 0, // PC_VSRO
BranchUnit, 0, 0, 0, 0, 0, // PC_VSRW
BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBCUW
BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBFP
BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBSBS
BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBSHS
BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBSWS
BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBUBM
BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBUBS
BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBUHM
BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBUHS
BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBUWM
BranchUnit, 0, 0, 0, 0, 0, // PC_VSUBUWS
BranchUnit, 0, 0, 0, 0, 0, // PC_VSUMSWS
BranchUnit, 0, 0, 0, 0, 0, // PC_VSUM2SWS
BranchUnit, 0, 0, 0, 0, 0, // PC_VSUM4SBS
BranchUnit, 0, 0, 0, 0, 0, // PC_VSUM4SHS
BranchUnit, 0, 0, 0, 0, 0, // PC_VSUM4UBS
BranchUnit, 0, 0, 0, 0, 0, // PC_VUPKHPX
BranchUnit, 0, 0, 0, 0, 0, // PC_VUPKHSB
BranchUnit, 0, 0, 0, 0, 0, // PC_VUPKHSH
BranchUnit, 0, 0, 0, 0, 0, // PC_VUPKLPX
BranchUnit, 0, 0, 0, 0, 0, // PC_VUPKLSB
BranchUnit, 0, 0, 0, 0, 0, // PC_VUPKLSH
BranchUnit, 0, 0, 0, 0, 0, // PC_VXOR
BranchUnit, 0, 0, 0, 0, 0, // PC_VMADDFP
BranchUnit, 0, 0, 0, 0, 0, // PC_VMHADDSHS
BranchUnit, 0, 0, 0, 0, 0, // PC_VMHRADDSHS
BranchUnit, 0, 0, 0, 0, 0, // PC_VMLADDUHM
BranchUnit, 0, 0, 0, 0, 0, // PC_VMSUMMBM
BranchUnit, 0, 0, 0, 0, 0, // PC_VMSUMSHM
BranchUnit, 0, 0, 0, 0, 0, // PC_VMSUMSHS
BranchUnit, 0, 0, 0, 0, 0, // PC_VMSUMUBM
BranchUnit, 0, 0, 0, 0, 0, // PC_VMSUMUHM
BranchUnit, 0, 0, 0, 0, 0, // PC_VMSUMUHS
BranchUnit, 0, 0, 0, 0, 0, // PC_VNMSUBFP
BranchUnit, 0, 0, 0, 0, 0, // PC_VPERM
BranchUnit, 0, 0, 0, 0, 0, // PC_VSEL
BranchUnit, 0, 0, 0, 0, 0, // PC_VSLDOI
BranchUnit, 0, 0, 0, 0, 0, // PC_VMR
BranchUnit, 0, 0, 0, 0, 0, // PC_VMRP
BranchUnit, 0, 0, 0, 0, 0, // PC_SLE
BranchUnit, 0, 0, 0, 0, 0, // PC_SLEQ
BranchUnit, 0, 0, 0, 0, 0, // PC_SLIQ
BranchUnit, 0, 0, 0, 0, 0, // PC_SLLIQ
BranchUnit, 0, 0, 0, 0, 0, // PC_SLLQ
BranchUnit, 0, 0, 0, 0, 0, // PC_SLQ
BranchUnit, 0, 0, 0, 0, 0, // PC_SRAIQ
BranchUnit, 0, 0, 0, 0, 0, // PC_SRAQ
BranchUnit, 0, 0, 0, 0, 0, // PC_SRE
BranchUnit, 0, 0, 0, 0, 0, // PC_SREA
BranchUnit, 0, 0, 0, 0, 0, // PC_SREQ
BranchUnit, 0, 0, 0, 0, 0, // PC_SRIQ
BranchUnit, 0, 0, 0, 0, 0, // PC_SRLIQ
BranchUnit, 0, 0, 0, 0, 0, // PC_SRLQ
BranchUnit, 0, 0, 0, 0, 0, // PC_SRQ
BranchUnit, 0, 0, 0, 0, 0, // PC_MASKG
BranchUnit, 0, 0, 0, 0, 0, // PC_MASKIR
BranchUnit, 0, 0, 0, 0, 0, // PC_LSCBX
BranchUnit, 0, 0, 0, 0, 0, // PC_DIV
BranchUnit, 0, 0, 0, 0, 0, // PC_DIVS
BranchUnit, 0, 0, 0, 0, 0, // PC_DOZ
BranchUnit, 0, 0, 0, 0, 0, // PC_MUL
BranchUnit, 0, 0, 0, 0, 0, // PC_NABS
BranchUnit, 0, 0, 0, 0, 0, // PC_ABS
BranchUnit, 0, 0, 0, 0, 0, // PC_CLCS
BranchUnit, 0, 0, 0, 0, 0, // PC_DOZI
BranchUnit, 0, 0, 0, 0, 0, // PC_RLMI
BranchUnit, 0, 0, 0, 0, 0, // PC_RRIB
};
static void advance(int firstStage, int oldStage, int newStage) {
PCode *instr = pipeline[oldStage].instr;
int cycles = instruction_timing[instr->op].cycles[newStage - firstStage];
pipeline[newStage].instr = instr;
pipeline[newStage].remaining = cycles;
pipeline[oldStage].instr = NULL;
}
static void assign_completion_buffer(PCode *instr) {
completionbuffers.used++;
completionbuffers.free--;
completionbuffers.entries[completionbuffers.nextFreeSlot].instr = instr;
completionbuffers.entries[completionbuffers.nextFreeSlot].completed = 0;
completionbuffers.nextFreeSlot = (completionbuffers.nextFreeSlot + 1) % MaxEntries;
}
static void complete_instruction(int stage) {
PCode *instr = pipeline[stage].instr;
int buf = 0;
while (buf < MaxEntries && completionbuffers.entries[buf].instr != instr)
buf++;
completionbuffers.entries[buf].completed = 1;
pipeline[stage].instr = NULL;
}
static void retire_instruction(void) {
completionbuffers.entries[completionbuffers.nextToRetire].instr = NULL;
completionbuffers.used--;
completionbuffers.free++;
completionbuffers.nextToRetire = (completionbuffers.nextToRetire + 1) % MaxEntries;
}
static int latency(PCode *instr) {
int cycles = instruction_timing[instr->op].latency;
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
cycles += 2;
if (instr->op == PC_LMW || instr->op == PC_STMW)
cycles += instr->argCount - 2;
return cycles;
}
static void initialize(void) {
int stage;
int i;
for (stage = 0; stage < NumStages; stage++)
pipeline[stage].instr = NULL;
completionbuffers.free = MaxEntries;
completionbuffers.used = 0;
completionbuffers.nextToRetire = 0;
completionbuffers.nextFreeSlot = 0;
for (i = 0; i < MaxEntries; i++)
completionbuffers.entries[i].instr = NULL;
}
static int can_issue(PCode *instr) {
int stage;
if (completionbuffers.free == 0)
return 0;
stage = instruction_timing[instr->op].stage;
if (pipeline[stage].instr)
return 0;
if ((instr->flags & fIsWrite) && pipeline[LSU2].instr && (pipeline[LSU2].instr->flags & fIsWrite))
return 0;
return 1;
}
static void issue(PCode *instr) {
int stage = instruction_timing[instr->op].stage;
int cycles = instruction_timing[instr->op].cycles[0];
assign_completion_buffer(instr);
pipeline[stage].instr = instr;
pipeline[stage].remaining = cycles;
}
static void advance_clock(void) {
int stage;
for (stage = 0; stage < NumStages; stage++) {
if (pipeline[stage].instr && pipeline[stage].remaining)
--pipeline[stage].remaining;
}
if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) {
retire_instruction();
if (completionbuffers.used && completionbuffers.entries[completionbuffers.nextToRetire].completed) {
retire_instruction();
}
}
if (pipeline[Stage1].instr && pipeline[Stage1].remaining == 0)
complete_instruction(Stage1);
if (pipeline[LSU2].instr && pipeline[LSU2].remaining == 0)
complete_instruction(LSU2);
if (pipeline[BranchUnit].instr && pipeline[BranchUnit].remaining == 0)
complete_instruction(BranchUnit);
if (pipeline[LSU1].instr && pipeline[LSU1].remaining == 0 && !pipeline[LSU2].instr)
advance(LSU1, LSU1, LSU2);
}
static int serializes(PCode *instr) {
return instruction_timing[instr->op].serializes;
}
MachineInfo machine821 = {
1,
0,
0,
&latency,
&initialize,
&can_issue,
&issue,
&advance_clock,
&serializes,
&default_uses_vpermute_unit
};

View File

@@ -0,0 +1,752 @@
#include "compiler/Scheduler.h"
#include "compiler/CError.h"
#include "compiler/PCode.h"
#include "compiler/PCodeInfo.h"
// https://www.nxp.com/docs/en/reference-manual/MPC7450UM.pdf
typedef enum Stage {
BPU, // Branch Prediction Unit
IU2_1, // Multiple-Cycle Integer Unit
IU2_2,
IU2_3,
IU1a, // Single-Cycle Integer Unit
IU1b, // Single-Cycle Integer Unit
IU1c, // Single-Cycle Integer Unit
LSU_1, // Load/Store Unit
LSU_2,
LSU_3,
LSU_4,
FPU_1, // Floating-Point Unit
FPU_2,
FPU_3,
FPU_4,
VIU1, // Vector Simple Integer Unit
VPU_1, // Vector Permute Unit
VPU_2,
VIU2_1, // Vector Complex Integer Unit
VIU2_2,
VIU2_3,
VIU2_4,
VFPU_1, // Vector Floating-Point Unit
VFPU_2,
VFPU_3,
VFPU_4,
NumStages
} Stage;
static struct {
// the instruction currently in this pipeline stage
PCode *instr;
// how many cycles are left for this instruction to finish
int remaining;
} pipeline_altivec[NumStages];
enum {
Queue0,
Queue1,
Queue2,
Queue3,
Queue4,
Queue5,
Queue6,
Queue7,
NumQueues
};
static int fetchqueues[NumQueues];
enum {
MaxEntries = 16
};
static struct {
// how many entries remain unused in the queue
unsigned int free;
// how many entries are currently used in the queue
unsigned int used;
// the index of the next instruction that will be retired
unsigned int nextToRetire;
// the index of the next free slot that will be used when an instruction is dispatched
unsigned int nextFreeSlot;
// circular array of entries in the completion queue
struct {
PCode *instr;
int completed;
} entries[MaxEntries];
} completionbuffers;
static struct {
short index;
// the initial stage for this instruction
Stage stage;
// the total amount of cycles required by this instruction
char latency;
// how long it takes to finish each stage
char cycles[4];
// does this instruction serialise?
char serializes;
char unused;
} instruction_timing[] = {
0, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_B
1, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BL
2, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BC
3, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCLR
4, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCCTR
5, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BT
6, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BTLR
7, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BTCTR
8, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BF
9, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BFLR
10, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BFCTR
11, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDNZ
12, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDNZT
13, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDNZF
14, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDZ
15, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDZT
16, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BDZF
17, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BLR
18, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCTR
19, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BCTRL
20, BPU, 0, 0, 0, 0, 0, 0, 0, // PC_BLRL
21, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LBZ
22, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LBZU
23, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LBZX
24, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LBZUX
25, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHZ
26, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHZU
27, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHZX
28, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHZUX
29, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHA
30, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHAU
31, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHAX
32, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHAUX
33, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LHBRX
34, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LWZ
35, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LWZU
36, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LWZX
37, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LWZUX
38, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LWBRX
39, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_LMW
40, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STB
41, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STBU
42, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STBX
43, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STBUX
44, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STH
45, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STHU
46, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STHX
47, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STHUX
48, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STHBRX
49, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STW
50, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STWU
51, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STWX
52, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STWUX
53, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STWBRX
54, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STMW
55, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_DCBF
56, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_DCBST
57, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DCBT
58, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DCBTST
59, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DCBZ
60, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADD
61, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDC
62, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDE
63, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDI
64, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDIC
65, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDICR
66, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDIS
67, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDME
68, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ADDZE
69, IU2_1, 23, 23, 0, 0, 0, 0, 0, // PC_DIVW
70, IU2_1, 23, 23, 0, 0, 0, 0, 0, // PC_DIVWU
71, IU2_1, 4, 2, 2, 0, 0, 0, 0, // PC_MULHW
72, IU2_1, 4, 2, 2, 0, 0, 0, 0, // PC_MULHWU
73, IU2_1, 3, 1, 1, 1, 0, 0, 0, // PC_MULLI
74, IU2_1, 4, 2, 2, 0, 0, 0, 0, // PC_MULLW
75, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_NEG
76, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SUBF
77, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFC
78, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFE
79, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFIC
80, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFME
81, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SUBFZE
82, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_CMPI
83, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_CMP
84, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_CMPLI
85, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_CMPL
86, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ANDI
87, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ANDIS
88, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ORI
89, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ORIS
90, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_XORI
91, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_XORIS
92, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_AND
93, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_OR
94, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_XOR
95, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_NAND
96, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_NOR
97, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_EQV
98, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ANDC
99, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ORC
100, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_EXTSB
101, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_EXTSH
102, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_CNTLZW
103, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_RLWINM
104, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_RLWNM
105, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_RLWIMI
106, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SLW
107, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_SRW
108, IU1a, 2, 2, 0, 0, 0, 0, 0, // PC_SRAWI
109, IU1a, 2, 2, 0, 0, 0, 0, 0, // PC_SRAW
110, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CRAND
111, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CRANDC
112, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CREQV
113, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CRNAND
114, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CRNOR
115, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CROR
116, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CRORC
117, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_CRXOR
118, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_MCRF
119, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTXER
120, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTCTR
121, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTLR
122, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_MTCRF
123, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTMSR
124, IU2_1, 3, 3, 0, 0, 0, 1, 0, // PC_MTSPR
125, IU2_1, 3, 2, 1, 0, 0, 0, 0, // PC_MFMSR
126, IU2_1, 3, 3, 0, 0, 0, 1, 0, // PC_MFSPR
127, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_MFXER
128, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_MFCTR
129, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_MFLR
130, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MFCR
131, FPU_1, 5, 5, 0, 0, 0, 1, 0, // PC_MFFS
132, FPU_1, 5, 5, 0, 0, 0, 1, 0, // PC_MTFSF
133, LSU_1, 3, 3, 0, 0, 0, 1, 0, // PC_EIEIO
134, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_ISYNC
135, LSU_1, 35, 35, 0, 0, 0, 1, 0, // PC_SYNC
136, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_RFI
137, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_LI
138, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_LIS
139, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_MR
140, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_NOP
141, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_NOT
142, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFS
143, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFSU
144, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFSX
145, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFSUX
146, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFD
147, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFDU
148, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFDX
149, LSU_1, 4, 1, 1, 1, 1, 0, 0, // PC_LFDUX
150, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFS
151, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFSU
152, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFSX
153, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFSUX
154, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFD
155, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFDU
156, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFDX
157, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STFDUX
158, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMR
159, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FABS
160, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FNEG
161, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FNABS
162, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FADD
163, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FADDS
164, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FSUB
165, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FSUBS
166, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMUL
167, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMULS
168, FPU_1, 35, 35, 0, 0, 0, 0, 0, // PC_FDIV
169, FPU_1, 21, 21, 0, 0, 0, 0, 0, // PC_FDIVS
170, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMADD
171, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMADDS
172, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMSUB
173, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FMSUBS
174, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FNMADD
175, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FNMADDS
176, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FNMSUB
177, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FNMSUBS
178, FPU_1, 14, 14, 0, 0, 0, 0, 0, // PC_FRES
179, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FRSQRTE
180, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FSEL
181, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FRSP
182, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FCTIW
183, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FCTIWZ
184, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FCMPU
185, FPU_1, 5, 1, 1, 1, 2, 0, 0, // PC_FCMPO
186, LSU_1, 3, 3, 0, 0, 0, 1, 0, // PC_LWARX
187, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LSWI
188, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_LSWX
189, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STFIWX
190, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STSWI
191, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_STSWX
192, LSU_1, 3, 1, 1, 1, 0, 1, 0, // PC_STWCX
193, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_ECIWX
194, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_ECOWX
195, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_DCBI
196, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_ICBI
197, IU2_1, 5, 5, 0, 0, 0, 1, 0, // PC_MCRFS
198, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MCRXR
199, IU2_1, 5, 5, 0, 0, 0, 0, 0, // PC_MFTB
200, IU2_1, 4, 1, 3, 0, 0, 0, 0, // PC_MFSR
201, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTSR
202, IU2_1, 4, 1, 3, 0, 0, 0, 0, // PC_MFSRIN
203, IU2_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTSRIN
204, FPU_1, 5, 5, 0, 0, 0, 1, 0, // PC_MTFSB0
205, FPU_1, 5, 5, 0, 0, 0, 1, 0, // PC_MTFSB1
206, FPU_1, 5, 5, 0, 0, 0, 0, 0, // PC_MTFSFI
207, IU2_1, 1, 1, 0, 0, 0, 1, 0, // PC_SC
208, FPU_1, 1, 1, 0, 0, 0, 0, 0, // PC_FSQRT
209, FPU_1, 1, 1, 0, 0, 0, 0, 0, // PC_FSQRTS
210, LSU_1, 1, 1, 0, 0, 0, 0, 0, // PC_TLBIA
211, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_TLBIE
212, LSU_1, 3, 3, 0, 0, 0, 0, 0, // PC_TLBLD
213, LSU_1, 3, 3, 0, 0, 0, 1, 0, // PC_TLBLI
214, LSU_1, 3, 3, 0, 0, 0, 1, 0, // PC_TLBSYNC
215, IU1a, 2, 2, 0, 0, 0, 0, 0, // PC_TW
216, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_TRAP
217, IU1a, 2, 2, 0, 0, 0, 0, 0, // PC_TWI
218, IU1a, 1, 1, 0, 0, 0, 1, 0, // PC_OPWORD
219, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_MFROM
220, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_DSA
221, IU1a, 1, 1, 0, 0, 0, 0, 0, // PC_ESA
222, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_DCCCI
223, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_DCREAD
224, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_ICBT
225, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_ICCCI
226, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_ICREAD
227, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_RFCI
228, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_TLBRE
229, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_TLBSX
230, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_TLBWE
231, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_WRTEE
232, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_WRTEEI
233, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_MFDCR
234, IU1a, 1, 0, 0, 0, 0, 0, 0, // PC_MTDCR
235, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DCBA
236, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DSS
237, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DSSALL
238, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DST
239, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DSTT
240, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DSTST
241, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_DSTSTT
242, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVEBX
243, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVEHX
244, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVEWX
245, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVSL
246, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVSR
247, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVX
248, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_LVXL
249, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STVEBX
250, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STVEHX
251, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STVEWX
252, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STVX
253, LSU_1, 3, 1, 1, 1, 0, 0, 0, // PC_STVXL
254, VFPU_1, 2, 2, 0, 0, 0, 1, 0, // PC_MFVSCR
255, VFPU_1, 2, 2, 0, 0, 0, 1, 0, // PC_MTVSCR
256, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDCUW
257, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VADDFP
258, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDSBS
259, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDSHS
260, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDSWS
261, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUBM
262, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUBS
263, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUHM
264, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUHS
265, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUWM
266, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VADDUWS
267, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAND
268, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VANDC
269, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGSB
270, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGSH
271, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGSW
272, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGUB
273, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGUH
274, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VAVGUW
275, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VCFSX
276, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VCFUX
277, VFPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VCMPBFP
278, VFPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VCMPEQFP
279, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPEQUB
280, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPEQUH
281, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPEQUW
282, VFPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VCMPGEFP
283, VFPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VCMPGTFP
284, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTSB
285, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTSH
286, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTSW
287, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTUB
288, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTUH
289, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VCMPGTUW
290, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VCTSXS
291, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VCTUXS
292, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VEXPTEFP
293, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VLOGEFP
294, VFPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMAXFP
295, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXSB
296, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXSH
297, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXSW
298, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXUB
299, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXUH
300, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMAXUW
301, VFPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMINFP
302, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMINSB
303, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMINSH
304, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMINSW
305, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMINUB
306, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMINUH
307, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMINUW
308, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRGHB
309, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRGHH
310, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRGHW
311, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRGLB
312, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRGLH
313, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRGLW
314, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULESB
315, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULESH
316, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULEUB
317, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULEUH
318, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULOSB
319, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULOSH
320, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULOUB
321, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMULOUH
322, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VNOR
323, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VOR
324, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKPX
325, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKSHSS
326, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKSHUS
327, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKSWSS
328, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKSWUS
329, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKUHUM
330, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKUHUS
331, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKUWUM
332, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPKUWUS
333, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VREFP
334, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIM
335, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIN
336, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIP
337, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VRFIZ
338, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VRLB
339, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VRLH
340, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VRLW
341, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VRSQRTEFP
342, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSL
343, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSLB
344, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSLH
345, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSLO
346, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSLW
347, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSPLTB
348, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSPLTH
349, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSPLTW
350, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSPLTISB
351, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSPLTISH
352, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSPLTISW
353, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSR
354, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSRAB
355, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSRAH
356, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSRAW
357, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSRB
358, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSRH
359, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSRO
360, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSRW
361, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBCUW
362, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUBFP
363, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBSBS
364, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBSHS
365, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBSWS
366, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUBM
367, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUBS
368, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUHM
369, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUHS
370, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUWM
371, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSUBUWS
372, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUMSWS
373, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUM2SWS
374, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUM4SBS
375, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUM4SHS
376, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VSUM4UBS
377, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VUPKHPX
378, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VUPKHSB
379, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VUPKHSH
380, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VUPKLPX
381, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VUPKLSB
382, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VUPKLSH
383, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VXOR
384, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMADDFP
385, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMHADDSHS
386, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMHRADDSHS
387, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMLADDUHM
388, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMSUMMBM
389, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMSUMSHM
390, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMSUMSHS
391, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMSUMUBM
392, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMSUMUHM
393, VIU2_1, 4, 1, 1, 1, 1, 0, 0, // PC_VMSUMUHS
394, VFPU_1, 4, 1, 1, 1, 1, 0, 0, // PC_VNMSUBFP
395, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VPERM
396, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VSEL
397, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VSLDOI
398, VIU1, 1, 1, 0, 0, 0, 0, 0, // PC_VMR
399, VPU_1, 2, 1, 1, 0, 0, 0, 0, // PC_VMRP
-1, IU2_1, 1, 1, 0, 0, 0, 1, 0
};
enum { NumPipelineUnits = 6 };
static struct {
Stage start, end;
} pipeline_units[8] = {
IU2_1, IU2_3,
LSU_1, LSU_4,
FPU_1, FPU_4,
VPU_1, VPU_2,
VIU2_1, VIU2_4,
VFPU_1, VFPU_4
};
enum { NumFinalStages = 11 };
static Stage finalstages[16] = {
BPU, IU2_3, IU1a, IU1b,
IU1c, LSU_4, FPU_4, VIU1,
VPU_2, VIU2_4, VFPU_4
};
// forward decl
static void complete_instruction(int stage);
static void advance(int firstStage, int oldStage, int newStage) {
PCode *instr = pipeline_altivec[oldStage].instr;
int cycles = instruction_timing[instr->op].cycles[newStage - firstStage];
pipeline_altivec[newStage].instr = instr;
pipeline_altivec[newStage].remaining = cycles;
pipeline_altivec[oldStage].instr = NULL;
pipeline_altivec[oldStage].remaining = 0;
if (cycles == 0)
complete_instruction(newStage);
}
static void assign_completion_buffer(PCode *instr) {
completionbuffers.used++;
completionbuffers.free--;
completionbuffers.entries[completionbuffers.nextFreeSlot].instr = instr;
completionbuffers.entries[completionbuffers.nextFreeSlot].completed = 0;
completionbuffers.nextFreeSlot = (completionbuffers.nextFreeSlot + 1) % MaxEntries;
}
static void complete_instruction(int stage) {
PCode *instr = pipeline_altivec[stage].instr;
int buf = 0;
while (buf < MaxEntries && completionbuffers.entries[buf].instr != instr)
buf++;
completionbuffers.entries[buf].completed = 1;
pipeline_altivec[stage].instr = NULL;
}
static void retire_instruction(void) {
completionbuffers.entries[completionbuffers.nextToRetire].instr = NULL;
completionbuffers.entries[completionbuffers.nextToRetire].completed = 0;
completionbuffers.used--;
completionbuffers.free++;
completionbuffers.nextToRetire = (completionbuffers.nextToRetire + 1) % MaxEntries;
}
static int latency(PCode *instr) {
int cycles = instruction_timing[instr->op].latency;
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
cycles += 1;
if (instr->op == PC_LMW || instr->op == PC_STMW)
cycles += instr->argCount - 2;
return cycles;
}
static void initialize(void) {
int stage;
int i;
fetchqueues[Queue0] = 1;
for (i = 1; i < NumQueues; i++)
fetchqueues[i] = 0;
for (stage = 0; stage < NumStages; stage++)
pipeline_altivec[stage].instr = NULL;
completionbuffers.free = MaxEntries;
completionbuffers.used = 0;
completionbuffers.nextToRetire = 0;
completionbuffers.nextFreeSlot = 0;
for (i = 0; i < MaxEntries; i++) {
completionbuffers.entries[i].instr = NULL;
completionbuffers.entries[i].completed = 0;
}
}
static int can_issue(PCode *instr) {
int stage;
if (completionbuffers.free == 0)
return 0;
stage = instruction_timing[instr->op].stage;
if (stage == IU1a) {
int isClear1 = !pipeline_altivec[IU1a].instr;
int isClear2 = !pipeline_altivec[IU1b].instr;
if (!isClear1 && !isClear2)
return 0;
} else {
if (pipeline_altivec[stage].instr)
return 0;
}
if (fetchqueues[Queue1] <= 0)
return 0;
if (stage == FPU_1) {
if (fetchqueues[Queue2] < 1 || fetchqueues[Queue5] >= 1)
return 0;
} else if (stage >= VIU1 && stage <= VFPU_1) {
if (fetchqueues[Queue4] < 1 || fetchqueues[Queue7] >= 2)
return 0;
} else if (stage != BPU) {
if (fetchqueues[Queue3] < 1 || fetchqueues[Queue6] >= 3)
return 0;
}
return 1;
}
static void issue(PCode *instr) {
int stage = instruction_timing[instr->op].stage;
int cycles = instruction_timing[instr->op].cycles[0];
assign_completion_buffer(instr);
CError_ASSERT(879, --fetchqueues[Queue1] >= 0);
if (stage == FPU_1) {
fetchqueues[Queue2]--;
fetchqueues[Queue5]++;
} else if (stage >= VIU1 && stage <= VFPU_1) {
fetchqueues[Queue4]--;
fetchqueues[Queue7]++;
} else if (stage != BPU) {
fetchqueues[Queue3]--;
fetchqueues[Queue6]++;
}
fetchqueues[Queue2] = (fetchqueues[Queue1] < fetchqueues[Queue2]) ? fetchqueues[Queue1] : fetchqueues[Queue2];
fetchqueues[Queue3] = (fetchqueues[Queue1] < fetchqueues[Queue3]) ? fetchqueues[Queue1] : fetchqueues[Queue3];
fetchqueues[Queue4] = (fetchqueues[Queue1] < fetchqueues[Queue4]) ? fetchqueues[Queue1] : fetchqueues[Queue4];
if (stage == IU1a) {
if (!pipeline_altivec[IU1a].instr)
stage = IU1a;
else if (!pipeline_altivec[IU1b].instr)
stage = IU1b;
else if (!pipeline_altivec[IU1c].instr)
stage = IU1c;
}
pipeline_altivec[stage].instr = instr;
pipeline_altivec[stage].remaining = cycles;
}
static void advance_clock(void) {
int num;
int i;
unsigned int unit;
for (i = 0; i < NumStages; i++) {
if (pipeline_altivec[i].instr && pipeline_altivec[i].remaining)
--pipeline_altivec[i].remaining;
}
for (i = 0; i < 3; i++) {
if (completionbuffers.used == 0)
break;
if (completionbuffers.entries[completionbuffers.nextToRetire].completed == 0)
break;
retire_instruction();
}
unit = 0;
do {
if (pipeline_altivec[finalstages[unit]].instr && pipeline_altivec[finalstages[unit]].remaining == 0)
complete_instruction(finalstages[unit]);
} while (++unit < NumFinalStages);
unit = 0;
do {
Stage first;
Stage current;
first = pipeline_units[unit].start;
for (current = first; current < pipeline_units[unit].end; current++) {
if (pipeline_altivec[current].instr && pipeline_altivec[current].remaining == 0 && !pipeline_altivec[current + 1].instr)
advance(first, current, current + 1);
}
} while (++unit < NumPipelineUnits);
fetchqueues[Queue5] = 0;
fetchqueues[Queue6] = 0;
fetchqueues[Queue7] = 0;
#define CHEAP_MIN(a, b) ( ((a) < (b)) ? (a) : (b) )
num = 2 - fetchqueues[Queue2];
num += 6 - fetchqueues[Queue3];
num += 4 - fetchqueues[Queue4];
num = (num > 3) ? 3 : num;
num = (completionbuffers.free < num) ? completionbuffers.free : num;
if (fetchqueues[Queue0] < num)
num = fetchqueues[Queue0];
fetchqueues[Queue1] += num;
fetchqueues[Queue0] -= num;
fetchqueues[Queue2] = CHEAP_MIN(fetchqueues[Queue1], CHEAP_MIN(2, fetchqueues[Queue2] + num));
fetchqueues[Queue3] = CHEAP_MIN(fetchqueues[Queue1], CHEAP_MIN(6, fetchqueues[Queue3] + num));
fetchqueues[Queue4] = CHEAP_MIN(fetchqueues[Queue1], CHEAP_MIN(4, fetchqueues[Queue4] + num));
CError_ASSERT(991, fetchqueues[Queue1] <= (fetchqueues[Queue2] + fetchqueues[Queue3] + fetchqueues[Queue4]));
if (fetchqueues[Queue0] <= 8)
fetchqueues[Queue0] += 4;
}
static int serializes(PCode *instr) {
return instruction_timing[instr->op].serializes;
}
static int uses_vpermute_unit_altivec(PCode *instr) {
return instruction_timing[instr->op].stage == VPU_1;
}
MachineInfo machine7450 = {
6,
1,
4,
&latency,
&initialize,
&can_issue,
&issue,
&advance_clock,
&serializes,
&uses_vpermute_unit_altivec
};

View File

@@ -0,0 +1,547 @@
#include "compiler/Scheduler.h"
#include "compiler/CError.h"
#include "compiler/CParser.h"
#include "compiler/Alias.h"
#include "compiler/CompilerTools.h"
#include "compiler/PCode.h"
#include "compiler/Registers.h"
#ifdef __MWERKS__
#pragma options align=mac68k
#endif
typedef struct DGNode {
struct DGNode *x0;
struct DGNode *x4;
struct DGSuccessor *successors;
PCode *instr;
UInt16 x10;
UInt16 x12;
UInt16 x14;
UInt16 x16;
short predCount;
} DGNode;
typedef struct DGSuccessor {
struct DGSuccessor *next;
DGNode *node;
UInt16 x8;
} DGSuccessor;
typedef struct DGNodeList {
struct DGNodeList *next;
DGNode *node;
} DGNodeList;
#ifdef __MWERKS__
#pragma options align=reset
#endif
static DGNodeList **register_uses[RegClassMax];
static DGNodeList **register_defs[RegClassMax];
static DGNodeList *memory_uses;
static DGNodeList *memory_defs;
static DGNodeList *side_effects;
static DGNodeList *volatile_refs;
static DGNode *defaultsuccessor;
static UInt16 criticalpath;
static MachineInfo *MI;
static void initresources(void) {
int rclass;
int i;
for (rclass = 0; (char) rclass < RegClassMax; rclass++) {
register_uses[(char) rclass] = oalloc(sizeof(DGNodeList *) * used_virtual_registers[(char) rclass]);
register_defs[(char) rclass] = oalloc(sizeof(DGNodeList *) * used_virtual_registers[(char) rclass]);
for (i = 0; i < used_virtual_registers[(char) rclass]; i++) {
register_uses[(char) rclass][i] = register_defs[(char) rclass][i] = NULL;
}
}
memory_uses = memory_defs = NULL;
side_effects = NULL;
volatile_refs = NULL;
criticalpath = 0;
}
static DGNode *makedgnode(PCode *instr) {
DGNode *node;
node = oalloc(sizeof(DGNode));
node->x0 = NULL;
node->x4 = NULL;
node->successors = NULL;
node->instr = instr;
node->x10 = node->x16 = MI->latency(instr);
node->x12 = 0;
node->x14 = 0;
node->predCount = 0;
return node;
}
static DGNode *adddgnode(DGNode *head, DGNode *node) {
if (head)
head->x4 = node;
node->x0 = head;
return node;
}
static DGNode *removedgnode(DGNode *head, DGNode *node) {
if (node->x4)
node->x4->x0 = node->x0;
else
head = node->x0;
if (node->x0)
node->x0->x4 = node->x4;
return head;
}
static void addtolist(DGNodeList **list, DGNode *node) {
DGNodeList *entry = oalloc(sizeof(DGNodeList));
entry->node = node;
entry->next = *list;
*list = entry;
}
static DGNodeList *makedglistnode(DGNode *node) {
DGNodeList *list = oalloc(sizeof(DGNodeList));
list->next = NULL;
list->node = node;
return list;
}
int is_same_operand(PCodeArg *a, PCodeArg *b) {
if (a->kind != b->kind)
return 0;
switch (a->kind) {
case PCOp_IMMEDIATE:
if (a->data.imm.value != b->data.imm.value)
return 0;
break;
case PCOp_REGISTER:
if ((char) a->arg != (char) b->arg)
return 0;
if (a->data.reg.reg != b->data.reg.reg)
return 0;
break;
case PCOp_MEMORY:
if (a->data.mem.offset != b->data.mem.offset)
return 0;
if (a->data.mem.obj != b->data.mem.obj)
return 0;
break;
case PCOp_LABEL:
if (a->data.label.label != b->data.label.label)
return 0;
break;
}
return 1;
}
static void addsuccessor(DGNode *a, DGNode *b, Boolean flag) {
int v6;
int r29;
DGSuccessor *succ;
if (flag)
v6 = a->x10;
else
v6 = 0;
if (a != b) {
r29 = (v6 > 0) ? v6 : 0;
for (succ = a->successors; succ; succ = succ->next) {
if (succ->node == b) {
if (succ->x8 < r29) {
succ->x8 = r29;
if (b->x16 + succ->x8 > a->x16)
a->x16 = b->x16 + succ->x8;
}
return;
}
}
succ = oalloc(sizeof(DGSuccessor));
succ->node = b;
succ->next = a->successors;
a->successors = succ;
succ->x8 = r29;
if (flag && (succ->node->instr->flags & fIsBranch))
succ->x8 += MI->x8;
b->predCount++;
if (b->x16 + succ->x8 > a->x16)
a->x16 = b->x16 + succ->x8;
}
}
static void serializeall(DGNode *nodes, DGNode *node) {
DGNode *scan;
for (scan = nodes; scan; scan = scan->x0)
addsuccessor(node, scan, 0);
}
static void serializelist(DGNode *node, DGNodeList *list) {
while (list) {
if (list->node != node)
addsuccessor(node, list->node, 0);
list = list->next;
}
}
static void serializeregister(int rclass, DGNode *node, DGNodeList **defs, DGNodeList **uses, int isWrite) {
DGNodeList *list;
if (isWrite) {
for (list = *uses; list; list = list->next) {
if (list->node != node)
addsuccessor(node, list->node, 1);
}
for (list = *defs; list; list = list->next) {
if (list->node != node)
addsuccessor(node, list->node, ((char) rclass == RegClass_SPR) || (MI->x4 == 0));
}
list = makedglistnode(node);
list->next = *defs;
*defs = list;
} else {
for (list = *defs; list; list = list->next) {
if (list->node != node)
addsuccessor(node, list->node, ((char) rclass == RegClass_SPR) || (MI->x4 == 0));
}
list = makedglistnode(node);
list->next = *uses;
*uses = list;
}
}
static void serialize_load(DGNode *node) {
DGNodeList *list;
for (list = memory_defs; list; list = list->next) {
if (may_alias(node->instr, list->node->instr))
addsuccessor(node, list->node, 1);
}
addtolist(&memory_uses, node);
}
static void serialize_store(DGNode *node) {
DGNodeList *list;
for (list = memory_uses; list; list = list->next) {
if (may_alias(node->instr, list->node->instr))
addsuccessor(node, list->node, 1);
}
for (list = memory_defs; list; list = list->next) {
if (may_alias(node->instr, list->node->instr))
addsuccessor(node, list->node, 1);
}
addtolist(&memory_defs, node);
if (node->instr->flags & fPCodeFlag40000)
addtolist(&memory_uses, node);
}
static void findsuccessors(DGNode *nodes, DGNode *node) {
PCode *instr;
PCodeArg *op;
int i;
instr = node->instr;
for (i = 0, op = instr->args; i < instr->argCount; i++, op++) {
switch (op->kind) {
case PCOp_IMMEDIATE:
case PCOp_MEMORY:
break;
case PCOp_REGISTER:
if (
op->data.reg.reg < 0 ||
op->data.reg.reg > used_virtual_registers[(char) op->arg]
)
{
CError_FATAL(491);
}
if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR) {
if (op->data.reg.reg == Register2)
break;
if (op->data.reg.reg == Register0 && !(op->data.reg.effect & (EffectRead | EffectWrite)))
break;
}
serializeregister(
op->arg,
node,
&register_defs[(char) op->arg][op->data.reg.reg],
&register_uses[(char) op->arg][op->data.reg.reg],
op->data.reg.effect & EffectWrite
);
break;
}
}
if (instr->flags & (fIsRead | fPCodeFlag20000))
serialize_load(node);
else if (instr->flags & (fIsWrite | fPCodeFlag40000))
serialize_store(node);
if (instr->flags & fIsVolatile) {
serializelist(node, volatile_refs);
addtolist(&volatile_refs, node);
}
if (
((instr->flags & fIsCall | fIsBranch) && (instr->flags & fLink)) ||
(instr->flags & fSideEffects) ||
MI->serializes(instr)
)
{
serializeall(nodes, node);
addtolist(&side_effects, node);
}
if (side_effects)
serializelist(node, side_effects);
if (!node->successors && defaultsuccessor)
addsuccessor(node, defaultsuccessor, 0);
if (node->x16 > criticalpath)
criticalpath = node->x16;
}
static void computedeadlines(DGNode *nodes) {
while (nodes) {
nodes->x14 = criticalpath - nodes->x16;
nodes = nodes->x0;
}
}
static int uncovering(DGNode *node) {
int counter;
DGSuccessor *succ;
counter = 0;
for (succ = node->successors; succ; succ = succ->next) {
if (succ->node->predCount == 1)
counter++;
}
return counter;
}
static DGNode *selectinstruction(DGNode *nodes, UInt16 counter) {
DGNode *node;
DGNode *node2;
int a;
int b;
node = nodes;
while (node) {
if (node->predCount == 0 && node->x12 <= counter && MI->can_issue(node->instr))
break;
node = node->x0;
}
if (!node)
return NULL;
for (node2 = node->x0; node2; node2 = node2->x0) {
if (
node2->predCount == 0 &&
node2->x12 <= counter &&
MI->can_issue(node2->instr) &&
(node->x14 > counter || node2->x14 <= counter)
)
{
if (node->x14 > counter && node2->x14 <= counter) {
node = node2;
continue;
}
if ((a = uncovering(node)) > (b = uncovering(node2)))
continue;
if (a < b) {
node = node2;
continue;
}
if (node->x16 > node2->x16)
continue;
if (node->x16 < node2->x16) {
node = node2;
continue;
}
if (coloring) {
if (opcodeinfo[node->instr->op].x9 < opcodeinfo[node2->instr->op].x9)
continue;
if (opcodeinfo[node->instr->op].x9 > opcodeinfo[node2->instr->op].x9)
node = node2;
}
}
}
return node;
}
static void holdoffsuccessors(DGNode *node, UInt16 counter) {
DGSuccessor *succ;
DGNode *n;
for (succ = node->successors; succ; succ = succ->next) {
n = succ->node;
n->predCount--;
if (n->x12 < counter + succ->x8)
n->x12 = counter + succ->x8;
}
}
static void scheduleblock(PCodeBlock *block) {
DGNode *node;
UInt16 counter;
PCode *instr;
UInt16 i;
DGNode *head;
initresources();
defaultsuccessor = NULL;
head = NULL;
for (instr = block->lastPCode; instr; instr = instr->prevPCode) {
DGNode *n = makedgnode(instr);
findsuccessors(head, n);
if (instr->flags & fIsBranch)
defaultsuccessor = n;
head = adddgnode(head, n);
}
computedeadlines(head);
block->firstPCode = block->lastPCode = NULL;
block->pcodeCount = 0;
MI->initialize();
counter = 0;
while (head != NULL) {
for (i = 0; i < MI->x0; i++) {
if (head == NULL)
break;
node = selectinstruction(head, counter);
if (!node)
break;
instr = node->instr;
if (node->successors)
holdoffsuccessors(node, counter);
appendpcode(block, instr);
MI->issue(instr);
head = removedgnode(head, node);
}
MI->advance_clock();
counter++;
}
freeoheap();
}
void scheduleinstructions(Boolean flag) {
PCodeBlock *block;
int cpu;
cpu = copts.scheduling;
if (cpu == 10) {
MI = &machine7450;
} else if (copts.altivec_model != 0 || cpu == 7) {
MI = &machine7400;
} else if (cpu == 2) {
MI = &machine603;
} else if (cpu == 5) {
MI = &machine603e;
} else if (cpu == 3) {
MI = &machine604;
} else if (cpu == 6) {
MI = &machine604;
} else if (cpu == 4) {
MI = &machine750;
} else if (cpu == 1) {
MI = &machine601;
} else if (cpu == 9) {
MI = &machine821;
} else {
MI = &machine603;
}
gather_alias_info();
for (block = pcbasicblocks; block; block = block->nextBlock) {
if (
block->pcodeCount > 2 &&
(flag || !(block->flags & (fIsProlog | fIsEpilogue))) &&
!(block->flags & fScheduled)
)
{
scheduleblock(block);
block->flags |= fScheduled;
}
}
}
int is_dependent(PCode *a, PCode *b, char rclass) {
int i;
int reg;
PCodeArg *op;
if (
b &&
b->argCount >= 1 &&
b->args[0].kind == PCOp_REGISTER &&
(char) b->args[0].arg == rclass &&
(b->args[0].data.reg.effect & EffectWrite)
)
{
reg = b->args[0].data.reg.reg;
for (i = 0; i < a->argCount; i++) {
op = &a->args[i];
if (
op->kind == PCOp_REGISTER &&
(char) op->arg == rclass &&
(op->data.reg.effect & (EffectRead | EffectWrite)) &&
op->data.reg.reg == reg
)
return 1;
}
}
return 0;
}
int uses_vpermute_unit(PCode *instr) {
int cpu;
cpu = copts.scheduling;
if (cpu == 10)
return machine7450.uses_vpermute_unit(instr);
if (copts.altivec_model != 0 || cpu == 7)
return machine7400.uses_vpermute_unit(instr);
return 0;
}
int default_uses_vpermute_unit(PCode *instr) {
return 0;
}