MWCC/compiler_and_linker/BackEnd/PowerPC/GlobalOptimizer/LoopOptimization.c

1554 lines
54 KiB
C

#include "compiler/LoopOptimization.h"
#include "compiler/CFunc.h"
#include "compiler/CParser.h"
#include "compiler/BitVectors.h"
#include "compiler/CompilerTools.h"
#include "compiler/LoopDetection.h"
#include "compiler/PCode.h"
#include "compiler/Registers.h"
#include "compiler/UseDefChains.h"
#include "compiler/objects.h"
#include "compiler/types.h"
int optimizedloops;
int optimizedloop_full_unroll;
int optimizedloop_trans_regs;
static UInt32 *liveonexit;
static UInt32 *inductionvariables;
static int last_virtual_GPR;
static int ispowerof2(SInt32 value) {
int bit = getbit(value);
return (bit > 0 && bit < 31) ? bit : 0;
}
static void insertupdateinstructions(Loop *loop) {
// nothing
}
static void computeliveonexit(Loop *loop) {
UInt32 *usevec;
UInt32 *defvec;
BlockList *blocklist;
RegUseOrDef *list;
int gpr;
bitvectorinitialize(usevec = oalloc(4 * ((number_of_Uses + 31) >> 5)), number_of_Uses, 0);
for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
if (bitvectorgetbit(blocklist->block->blockIndex, loop->vec24))
bitvectorunion(usevec, usedefinfo[blocklist->block->blockIndex].usevec1C, number_of_Uses);
}
bitvectorinitialize(defvec = oalloc(4 * ((number_of_Defs + 31) >> 5)), number_of_Defs, 0);
if (loop->preheader)
bitvectorunion(defvec, usedefinfo[loop->preheader->blockIndex].defvec8, number_of_Defs);
bitvectorinitialize(liveonexit, last_virtual_GPR, 0);
for (gpr = 32; gpr < last_virtual_GPR; gpr++) {
for (list = reg_Defs[RegClass_GPR][gpr]; list; list = list->next) {
if (bitvectorgetbit(list->id, defvec)) {
if (!Defs[list->id].pcode->block || bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)) {
bitvectorsetbit(gpr, liveonexit);
break;
}
}
}
for (list = reg_Uses[RegClass_GPR][gpr]; list; list = list->next) {
if (bitvectorgetbit(list->id, usevec)) {
if (!Uses[list->id].pcode->block || !bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)) {
bitvectorsetbit(gpr, liveonexit);
break;
}
}
}
}
}
static void eliminateinductionvariables(Loop *loop) {
BlockList *blocklist;
PCode *instr;
PCode *nextInstr;
PCodeArg *op;
int i;
bitvectorinitialize(inductionvariables, last_virtual_GPR, 0xFFFFFFFF);
for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
for (instr = blocklist->block->firstPCode; instr; instr = instr->nextPCode) {
if (
instr->op != PC_ADDI ||
instr->args[0].data.reg.reg < 32 ||
instr->args[0].data.reg.reg >= last_virtual_GPR ||
instr->args[1].data.reg.reg != instr->args[0].data.reg.reg
) {
op = instr->args;
i = instr->argCount;
while (i--) {
if (
op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
op->data.reg.reg >= n_real_registers[RegClass_GPR] &&
op->data.reg.reg < last_virtual_GPR
)
bitvectorclearbit(op->data.reg.reg, inductionvariables);
op++;
}
}
}
}
if (loop->parent) {
for (instr = loop->preheader->firstPCode; instr; instr = nextInstr) {
nextInstr = instr->nextPCode;
op = instr->args;
i = instr->argCount;
while (i--) {
if (
op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
op->data.reg.reg >= n_real_registers[RegClass_GPR] &&
op->data.reg.reg < last_virtual_GPR
)
bitvectorsetbit(op->data.reg.reg, liveonexit);
op++;
}
}
}
for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
for (instr = blocklist->block->firstPCode; instr; instr = nextInstr) {
nextInstr = instr->nextPCode;
if (
instr->op == PC_ADDI &&
instr->args[0].data.reg.reg >= 32 &&
instr->args[0].data.reg.reg < last_virtual_GPR &&
instr->args[1].data.reg.reg == instr->args[0].data.reg.reg &&
bitvectorgetbit(instr->args[0].data.reg.reg, inductionvariables) &&
!bitvectorgetbit(instr->args[0].data.reg.reg, liveonexit)
) {
deletepcode(instr);
optimizedloops = 1;
}
}
}
}
static void skiplooptest(Loop *loop) {
PCodeBlock *preheader;
PCodeLabel *label;
PCLink **ptr;
PCLink *link;
PCode *lastInstr;
PCode *instr;
preheader = loop->preheader;
lastInstr = loop->body->lastPCode;
CError_ASSERT(340, lastInstr->args[2].kind == PCOp_LABEL);
label = lastInstr->args[2].data.label.label;
preheader->lastPCode->args[0].data.label.label = label;
preheader->successors->block = label->block;
ptr = &loop->body->predecessors;
while ((link = *ptr)) {
if (link->block == preheader) {
*ptr = link->nextLink;
break;
}
ptr = &link->nextLink;
}
link->nextLink = label->block->predecessors;
label->block->predecessors = link;
while (1) {
instr = loop->body->firstPCode;
CError_ASSERT(369, instr);
if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL)
break;
deletepcode(instr);
insertpcodebefore(loop->preheader->lastPCode, instr);
loop->bodySize--;
}
for (instr = instr->nextPCode; instr && !(instr->flags & fIsBranch); instr = instr->nextPCode)
insertpcodebefore(loop->preheader->lastPCode, copypcode(instr));
}
static void unrollloop(Loop *loop) {
PCodeBlock *newBlock;
int i;
int factor;
PCode *instr;
PCodeBlock *block;
PCode *firstInstr;
PCode *nextInstr;
for (factor = copts.unroll_factor_limit; factor > 1; factor--) {
if ((loop->iterationCount % factor) == 0 && (loop->bodySize - 2) * factor <= copts.unroll_instr_limit)
break;
}
if (factor == 1)
return;
if ((loop->iterationCount / factor) != 1 && loop->bodySize < 4)
return;
newBlock = oalloc(sizeof(PCodeBlock));
newBlock->firstPCode = newBlock->lastPCode = NULL;
for (i = 0; i < factor - 1; i++) {
firstInstr = loop->body->firstPCode;
CError_ASSERT(448, firstInstr);
if (firstInstr->op != PC_CMP && firstInstr->op != PC_CMPL && firstInstr->op != PC_CMPI && firstInstr->op != PC_CMPLI)
CError_FATAL(450);
for (instr = firstInstr->nextPCode; instr && !(instr->flags & fIsBranch); instr = instr->nextPCode)
appendpcode(newBlock, copypcode(instr));
for (block = loop->preheader->successors->block; block != loop->body; block = block->successors->block) {
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
if (instr->op != PC_B)
appendpcode(newBlock, copypcode(instr));
}
}
}
block = loop->body->predecessors->block;
for (instr = newBlock->firstPCode; instr; instr = nextInstr) {
nextInstr = instr->nextPCode;
appendpcode(block, instr);
}
loop->iterationCount /= factor;
}
void pccomputepredecessors1(PCodeBlock *block) {
PCLink *succ;
PCLink *pred;
for (succ = block->successors; succ; succ = succ->nextLink) {
if (!succ->block) {
CError_FATAL(496);
} else {
for (pred = succ->block->predecessors; pred; pred = pred->nextLink) {
if (pred->block == block)
break;
}
if (!pred) {
pred = lalloc(sizeof(PCLink));
pred->block = block;
pred->nextLink = succ->block->predecessors;
succ->block->predecessors = pred;
}
}
}
}
static PCodeBlock *insertnewpcblock(PCodeBlock *block, int loopWeight) {
PCodeBlock *newBlock;
PCodeBlock *nextBlock;
PCodeLabel *label;
PCLink *prev;
PCLink *link;
label = makepclabel();
newBlock = lalloc(sizeof(PCodeBlock));
nextBlock = block->nextBlock;
newBlock->nextBlock = nextBlock;
block->nextBlock = newBlock;
newBlock->prevBlock = block;
nextBlock->prevBlock = newBlock;
newBlock->labels = NULL;
newBlock->predecessors = newBlock->successors = NULL;
newBlock->firstPCode = newBlock->lastPCode = NULL;
newBlock->pcodeCount = 0;
newBlock->loopWeight = loopWeight;
newBlock->flags = 0;
newBlock->blockIndex = pcblockcount++;
pclabel(newBlock, label);
prev = NULL;
for (link = block->successors; link; link = link->nextLink) {
if (link->block == nextBlock) {
if (!prev)
block->successors = link->nextLink;
else
prev->nextLink = link->nextLink;
link->nextLink = NULL;
newBlock->successors = link;
break;
}
prev = link;
}
prev = NULL;
for (link = nextBlock->predecessors; link; link = link->nextLink) {
if (link->block == block) {
if (!prev)
nextBlock->predecessors = link->nextLink;
else
prev->nextLink = link->nextLink;
link->nextLink = NULL;
newBlock->predecessors = link;
break;
}
prev = link;
}
link = lalloc(sizeof(PCLink));
link->block = newBlock;
link->nextLink = block->successors;
block->successors = link;
link = lalloc(sizeof(PCLink));
link->block = newBlock;
link->nextLink = nextBlock->predecessors;
nextBlock->predecessors = link;
return newBlock;
}
static void unrollloopconditional(Loop *loop) {
PCodeBlock *lastBlock;
PCodeLabel *label24;
PCode *instr;
PCode *instrCopy;
int outputBlockCount;
int inputBlockCount;
PCodeBlock **blocks;
PCodeBlock **blocks2;
PCodeBlock **blocks3;
PCode *firstInstr;
int factor;
PCodeBlock *block;
PCLink *link;
PCLink *prev;
int i;
int j;
int k;
int instructionCount;
label24 = NULL;
outputBlockCount = 0;
instructionCount = 0;
for (factor = copts.unroll_factor_limit; factor > 1; factor--) {
if ((loop->iterationCount % factor) == 0 && (loop->bodySize - 2) * factor <= copts.unroll_instr_limit)
break;
}
if (factor == 1)
return;
inputBlockCount = 0;
for (block = loop->preheader->successors->block; block != loop->body; block = block->nextBlock) {
inputBlockCount++;
if (!bitvectorgetbit(block->blockIndex, loop->memberblocks))
instructionCount += block->pcodeCount;
}
if ((loop->bodySize - instructionCount - 2) < instructionCount || instructionCount > 8)
return;
blocks = oalloc(inputBlockCount * sizeof(PCodeBlock *));
blocks2 = oalloc(inputBlockCount * sizeof(PCodeBlock *));
blocks3 = oalloc(factor * (inputBlockCount * sizeof(PCodeBlock *)));
memclrw(blocks, inputBlockCount * sizeof(PCodeBlock *));
memclrw(blocks2, inputBlockCount * sizeof(PCodeBlock *));
memclrw(blocks3, factor * (inputBlockCount * sizeof(PCodeBlock *)));
block = loop->preheader->nextBlock;
for (i = 0; i < inputBlockCount; i++) {
blocks[i] = block;
block = block->nextBlock;
}
lastBlock = blocks[inputBlockCount - 1];
for (i = 0; i < factor - 1; i++) {
for (j = 0; j < inputBlockCount; j++) {
blocks2[j] = insertnewpcblock(lastBlock, loop->loopWeight);
blocks3[outputBlockCount++] = blocks2[j];
lastBlock = blocks2[j];
}
if (label24) {
pclabel(blocks2[0], label24);
label24 = NULL;
}
for (j = 0; j < inputBlockCount; j++) {
for (instr = blocks[j]->firstPCode; instr; instr = instr->nextPCode) {
if (instr->flags & fIsBranch) {
PCodeArg *op;
int opID;
instrCopy = copypcode(instr);
op = NULL;
for (opID = 0; opID < instr->argCount; opID++) {
if (instr->args[opID].kind == PCOp_LABEL) {
op = &instr->args[opID];
break;
}
}
if (op) {
if (op->data.label.label->block == loop->body) {
if (!label24)
label24 = makepclabel();
instrCopy->args[opID].data.label.label = label24;
} else {
for (k = 0; k < inputBlockCount; k++) {
if (op->data.label.label->block == blocks[k]) {
instrCopy->args[opID].data.label.label = blocks2[k]->labels;
break;
}
}
}
}
appendpcode(blocks2[j], instrCopy);
if (op)
pcbranch(blocks2[j], instrCopy->args[opID].data.label.label);
} else {
appendpcode(blocks2[j], copypcode(instr));
}
}
}
firstInstr = loop->body->firstPCode;
CError_ASSERT(762, firstInstr != NULL);
if (firstInstr->op != PC_CMP && firstInstr->op != PC_CMPL && firstInstr->op != PC_CMPI && firstInstr->op != PC_CMPLI)
CError_FATAL(764);
for (instr = firstInstr->nextPCode; instr && !(instr->flags & fIsBranch); instr = instr->nextPCode)
appendpcode(blocks2[inputBlockCount - 1], copypcode(instr));
for (j = 0; j < inputBlockCount; j++) {
for (link = blocks[j]->successors; link; link = link->nextLink) {
if (link->block == blocks[j]->nextBlock)
break;
}
if (!link) {
for (link = blocks2[j]->successors, prev = NULL; link; link = link->nextLink) {
if (link->block == blocks2[j]->nextBlock) {
if (prev)
prev->nextLink = link->nextLink;
else
blocks2[j]->successors = link->nextLink;
} else {
prev = link;
}
}
for (link = blocks2[j]->nextBlock->predecessors, prev = NULL; link; link = link->nextLink) {
if (link->block == blocks2[j]) {
if (prev)
prev->nextLink = link->nextLink;
else
blocks2[j]->nextBlock->predecessors = link->nextLink;
} else {
prev = link;
}
}
}
}
}
if (label24)
pclabel(loop->body, label24);
for (i = 0; i < inputBlockCount; i++) {
for (instr = blocks[i]->firstPCode; instr; instr = instr->nextPCode) {
if (instr->flags & fIsBranch) {
PCodeArg *op;
int opID;
op = NULL;
for (opID = 0; opID < instr->argCount; opID++) {
if (instr->args[opID].kind == PCOp_LABEL) {
op = &instr->args[opID];
break;
}
}
if (op && op->data.label.label->block == loop->body) {
instr->args[opID].data.label.label = blocks3[0]->labels;
for (link = blocks[i]->successors, prev = NULL; link; link = link->nextLink) {
if (link->block == loop->body) {
if (prev)
prev->nextLink = link->nextLink;
else
blocks[i]->successors = link->nextLink;
} else {
prev = link;
}
}
for (link = loop->body->predecessors, prev = NULL; link; link = link->nextLink) {
if (link->block == blocks[i]) {
if (prev)
prev->nextLink = link->nextLink;
else
loop->body->predecessors = link->nextLink;
} else {
prev = link;
}
}
link = blocks[i]->successors;
while (link && link->block != blocks3[0])
link = link->nextLink;
if (!link) {
pcbranch(blocks[i], op->data.label.label);
pccomputepredecessors1(blocks[i]);
}
}
}
}
}
for (i = 0; i < outputBlockCount; i++)
pccomputepredecessors1(blocks3[i]);
loop->iterationCount /= factor;
}
static void unrollunknownBDNZ(Loop *loop) {
int factor; // r29
PCodeBlock *preheader; // r17
PCodeBlock *blockA; // r23
PCodeBlock *postheader; // r22
PCodeBlock *newblock1; // r26
PCodeBlock *newblock2; // r31
PCodeBlock *newblock3; // r19
PCodeBlock *newblock4; // r20
PCodeBlock *newblock5; // r24
PCode *mtctr; // r21
int mtctr_reg; // r27
PCode *instr28; // r28
PCode *instr6; // r6
PCodeBlock *block;
PCode *instr;
PCodeArg *op;
int i;
int val;
short mtctr_shifted_reg; // r16
short reg25; // r25
PCLink *link;
if (loop->bodySize < 4)
return;
factor = 128;
while (factor > copts.unroll_factor_limit)
factor >>= 1;
while (factor > 1 && (loop->bodySize - 2) * factor > copts.unroll_instr_limit)
factor >>= 1;
if (factor < 2)
return;
preheader = loop->preheader;
blockA = loop->body->nextBlock;
postheader = preheader->nextBlock;
newblock1 = insertnewpcblock(preheader, loop->loopWeight);
newblock2 = insertnewpcblock(newblock1, loop->loopWeight);
newblock3 = insertnewpcblock(newblock2, loop->loopWeight);
newblock4 = insertnewpcblock(newblock3, loop->loopWeight);
newblock5 = insertnewpcblock(newblock4, loop->loopWeight);
addblocktoloop(loop, newblock1);
addblocktoloop(loop, newblock2);
addblocktoloop(loop, newblock3);
addblocktoloop(loop, newblock4);
addblocktoloop(loop, newblock5);
for (instr = preheader->lastPCode; instr; instr = instr->prevPCode) {
if (instr->op == PC_MTCTR) {
mtctr = instr;
mtctr_reg = instr->args[0].data.reg.reg;
}
}
if (!mtctr)
return;
instr28 = NULL;
for (block = postheader; block != loop->body; block = block->successors->block) {
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
if (instr->op != PC_B) {
appendpcode(newblock2, copypcode(instr));
if (instr == loop->pc18)
instr28 = newblock2->lastPCode;
}
}
}
if (!instr28) {
appendpcode(newblock2, copypcode(loop->pc18));
instr28 = newblock2->lastPCode;
}
instr6 = NULL;
for (instr = newblock2->firstPCode; instr; instr = instr->nextPCode) {
if (instr != instr28) {
op = instr->args;
i = instr->argCount;
while (i--) {
if (
op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
op->data.reg.reg == loop->pc18->args[0].data.reg.reg &&
(op->data.reg.effect & (EffectRead | EffectWrite))
) {
instr6 = instr;
break;
}
op++;
}
}
if (instr6)
break;
}
if (!instr6) {
deletepcode(instr28);
deletepcode(loop->pc18);
if (loop->footer)
blockA = loop->footer;
else
blockA = insertnewpcblock(loop->body, loop->loopWeight);
} else {
instr28 = NULL;
}
for (i = 1; i < factor; i++) {
for (block = postheader; block != loop->body; block = block->successors->block) {
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
if (instr->op != PC_B)
appendpcode(newblock2, copypcode(instr));
}
}
}
mtctr_shifted_reg = used_virtual_registers[RegClass_GPR]++;
appendpcode(newblock1, makepcode(
PC_RLWINM,
mtctr_shifted_reg,
mtctr_reg,
32 - ispowerof2(factor),
ispowerof2(factor),
31));
appendpcode(newblock1, makepcode(PC_CMPLI, 0, mtctr_shifted_reg, 0));
if (instr28) {
reg25 = used_virtual_registers[RegClass_GPR]++;
if (loop->step == 1) {
instr = makepcode(PC_MR, reg25, mtctr_reg);
} else if (loop->step == -1) {
instr = makepcode(PC_NEG, reg25, mtctr_reg);
} else {
val = ispowerof2(abs(loop->step));
if (val > 0) {
instr = makepcode(PC_RLWINM, reg25, mtctr_reg, val, 0, 31 - val);
if (loop->step < 0) {
appendpcode(newblock1, instr);
instr = makepcode(PC_NEG, reg25, reg25);
}
} else {
instr = makepcode(PC_MULLI, reg25, mtctr_reg, loop->step);
}
}
appendpcode(newblock1, instr);
}
appendpcode(newblock1, makepcode(PC_MTCTR, mtctr_shifted_reg));
appendpcode(newblock1, makepcode(PC_BT, 0, 2, newblock5->labels));
pcbranch(newblock1, newblock5->labels);
link = lalloc(sizeof(PCLink));
link->block = newblock1;
link->nextLink = newblock5->predecessors;
newblock5->predecessors = link;
appendpcode(newblock3, makepcode(PC_BDNZ, newblock2->labels));
pcbranch(newblock3, newblock2->labels);
link = lalloc(sizeof(PCLink));
link->block = newblock3;
link->nextLink = newblock2->predecessors;
newblock2->predecessors = link;
appendpcode(newblock4, makepcode(PC_ANDI, mtctr_reg, mtctr_reg, factor - 1));
appendpcode(newblock4, makepcode(PC_BT, 0, 2, blockA->labels));
pcbranch(newblock4, blockA->labels);
link = lalloc(sizeof(PCLink));
link->block = newblock4;
link->nextLink = blockA->predecessors;
blockA->predecessors = link;
deletepcode(mtctr);
appendpcode(newblock5, mtctr);
if (instr28 && bitvectorgetbit(instr28->args[0].data.reg.reg, liveonexit)) {
instr = makepcode(PC_ADD, instr28->args[0].data.reg.reg, instr28->args[0].data.reg.reg, reg25);
if (blockA->firstPCode)
insertpcodebefore(blockA->firstPCode, instr);
else
appendpcode(blockA, instr);
}
}
static void deleteloop(Loop *loop) {
PCodeBlock *body;
PCodeBlock *preheader;
PCodeBlock *nextblock;
PCodeLabel *label;
PCode *instr;
PCLink **ptr;
PCLink *link;
PCodeBlock *block;
body = loop->body;
preheader = loop->preheader;
nextblock = body->nextBlock;
label = makepclabel();
while (1) {
instr = body->firstPCode;
CError_ASSERT(1294, instr != NULL);
if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL)
break;
deletepcode(instr);
insertpcodebefore(loop->preheader->lastPCode, instr);
loop->bodySize--;
}
pclabel(nextblock, label);
preheader->lastPCode->args[0].data.label.label = label;
preheader->successors->block = nextblock;
ptr = &nextblock->predecessors;
while ((link = *ptr)) {
if (link->block == body) {
link->block = preheader;
break;
}
ptr = &link->nextLink;
}
block = pcbasicblocks;
while ((nextblock = block->nextBlock)) {
if (bitvectorgetbit(nextblock->blockIndex, loop->memberblocks))
block->nextBlock = nextblock->nextBlock;
else
block = nextblock;
}
}
static void deleteloopheader(Loop *loop) {
PCLink *link;
PCLink **ptr;
ptr = &loop->body->successors;
while ((link = *ptr)) {
if (link->block == loop->body->lastPCode->args[2].data.label.label->block) {
*ptr = link->nextLink;
break;
}
ptr = &link->nextLink;
}
ptr = &loop->body->lastPCode->args[2].data.label.label->block->predecessors;
while ((link = *ptr)) {
if (link->block == loop->body) {
*ptr = link->nextLink;
break;
}
ptr = &link->nextLink;
}
deletepcode(loop->body->firstPCode);
deletepcode(loop->body->lastPCode);
optimizedloop_full_unroll = 1;
}
static void rewriteloopwithBDNZ(Loop *loop) {
PCode *instr;
if (!FITS_IN_SHORT(loop->iterationCount)) {
instr = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(loop->iterationCount));
insertpcodebefore(loop->preheader->lastPCode, instr);
if (loop->iterationCount != 0)
insertpcodeafter(instr, makepcode(PC_ADDI, instr->args[0].data.reg.reg, instr->args[0].data.reg.reg, 0, LOW_PART(loop->iterationCount)));
} else {
instr = makepcode(PC_LI, used_virtual_registers[RegClass_GPR]++, loop->iterationCount);
insertpcodebefore(loop->preheader->lastPCode, instr);
}
insertpcodebefore(loop->preheader->lastPCode, makepcode(PC_MTCTR, instr->args[0].data.reg.reg));
instr = makepcode(PC_BDNZ, loop->body->lastPCode->args[2].data.label.label);
deletepcode(loop->body->firstPCode);
deletepcode(loop->body->lastPCode);
appendpcode(loop->body, instr);
for (loop = loop->parent; loop; loop = loop->parent)
loop->x4E = 1;
}
static void rewriteunknownloopwithBDNZ(Loop *loop) {
SInt32 value1; // r24
SInt32 value2; // r30
Opcode branchOpcode; // r19
SInt32 absStep; // r28
int branchCondition; // r20
int counterReg; // r27
int reg1; // r26
int reg2; // r22
unsigned char mode; // r23
PCodeBlock *afterLoop; // r25
PCode *addiInstr;
PCode *instr;
PCLink *link;
PCLink **ptr;
absStep = abs(loop->step);
afterLoop = NULL;
for (addiInstr = loop->body->lastPCode->prevPCode; addiInstr->op == PC_ADDI; addiInstr = loop->body->lastPCode->prevPCode) {
deletepcode(addiInstr);
if (loop->body->lastPCode->args[2].data.label.label->block->firstPCode)
insertpcodebefore(loop->body->lastPCode->args[2].data.label.label->block->firstPCode, addiInstr);
else
appendpcode(loop->body->lastPCode->args[2].data.label.label->block, addiInstr);
afterLoop = insertnewpcblock(loop->body, loop->loopWeight);
appendpcode(afterLoop, copypcode(addiInstr));
loop->footer = afterLoop;
}
if (loop->unknownCondition == ELESS) {
branchOpcode = PC_BF;
if (loop->lowerType == LOOP_BOUND_CONSTANT) {
branchCondition = 1; // GT
value1 = loop->lower;
reg1 = addiInstr->args[2].data.reg.reg;
value2 = absStep - 1 - loop->lower;
mode = 0;
} else if (loop->upperType == LOOP_BOUND_CONSTANT) {
branchCondition = 0; // LT
value1 = loop->upper;
reg1 = addiInstr->args[1].data.reg.reg;
value2 = absStep - 1 + loop->upper;
mode = 1;
} else {
branchCondition = 0; // LT
reg1 = addiInstr->args[1].data.reg.reg;
reg2 = addiInstr->args[2].data.reg.reg;
value2 = absStep - 1;
mode = 2;
}
} else if (loop->unknownCondition == ELESSEQU) {
branchOpcode = PC_BT;
if (loop->lowerType == LOOP_BOUND_CONSTANT) {
branchCondition = 0; // LT
value1 = loop->lower;
reg1 = addiInstr->args[2].data.reg.reg;
value2 = absStep - loop->lower;
mode = 0;
} else if (loop->upperType == LOOP_BOUND_CONSTANT) {
branchCondition = 1; // GT
value1 = loop->upper;
reg1 = addiInstr->args[1].data.reg.reg;
value2 = absStep + loop->upper;
mode = 1;
} else {
branchCondition = 1; // GT
value1 = 0;
reg1 = addiInstr->args[1].data.reg.reg;
reg2 = addiInstr->args[2].data.reg.reg;
value2 = absStep;
mode = 2;
}
} else if (loop->unknownCondition == EGREATER) {
branchOpcode = PC_BF;
if (loop->lowerType == LOOP_BOUND_CONSTANT) {
branchCondition = 0; // LT
value1 = loop->lower;
reg1 = addiInstr->args[2].data.reg.reg;
value2 = absStep - 1 + loop->lower;
mode = 1;
} else if (loop->upperType == LOOP_BOUND_CONSTANT) {
branchCondition = 1; // GT
value1 = loop->upper;
reg1 = addiInstr->args[1].data.reg.reg;
value2 = absStep - 1 - loop->upper;
mode = 0;
} else {
branchCondition = 1; // GT
value1 = 0;
reg1 = addiInstr->args[1].data.reg.reg;
reg2 = addiInstr->args[2].data.reg.reg;
value2 = absStep - 1;
mode = 3;
}
} else if (loop->unknownCondition == EGREATEREQU) {
branchOpcode = PC_BT;
if (loop->lowerType == LOOP_BOUND_CONSTANT) {
branchCondition = 1; // GT
value1 = loop->lower;
reg1 = addiInstr->args[2].data.reg.reg;
value2 = absStep + loop->lower;
mode = 1;
} else if (loop->upperType == LOOP_BOUND_CONSTANT) {
branchCondition = 0; // LT
value1 = loop->upper;
reg1 = addiInstr->args[1].data.reg.reg;
value2 = absStep - loop->upper;
mode = 0;
} else {
branchCondition = 0; // LT
reg1 = addiInstr->args[1].data.reg.reg;
reg2 = addiInstr->args[2].data.reg.reg;
value2 = absStep;
mode = 3;
}
} else if (loop->unknownCondition == ENOTEQU) {
branchOpcode = PC_BT;
branchCondition = 2; // EQ
if (loop->step > 0) {
if (loop->lowerType == LOOP_BOUND_CONSTANT) {
value1 = loop->lower;
reg1 = addiInstr->args[2].data.reg.reg;
value2 = absStep - 1 - loop->lower;
mode = 0;
} else if (loop->upperType == LOOP_BOUND_CONSTANT) {
value1 = loop->upper;
reg1 = addiInstr->args[1].data.reg.reg;
value2 = absStep - 1 + loop->upper;
mode = 1;
} else {
reg1 = addiInstr->args[1].data.reg.reg;
reg2 = addiInstr->args[2].data.reg.reg;
value2 = absStep - 1;
mode = 2;
}
} else {
if (loop->lowerType == LOOP_BOUND_CONSTANT) {
value1 = loop->lower;
reg1 = addiInstr->args[2].data.reg.reg;
value2 = absStep - 1 + loop->lower;
mode = 1;
} else if (loop->upperType == LOOP_BOUND_CONSTANT) {
value1 = loop->upper;
reg1 = addiInstr->args[1].data.reg.reg;
value2 = absStep - 1 - loop->upper;
mode = 0;
} else {
reg1 = addiInstr->args[1].data.reg.reg;
reg2 = addiInstr->args[2].data.reg.reg;
value2 = absStep - 1;
mode = 3;
}
}
}
while (1) {
instr = loop->body->firstPCode;
CError_ASSERT(1857, instr);
if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL)
break;
deletepcode(instr);
insertpcodebefore(loop->preheader->lastPCode, instr);
loop->bodySize--;
}
// build a value for mtctr
counterReg = used_virtual_registers[RegClass_GPR]++;
if (mode == 1) {
if (value2 == 0) {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_NEG, counterReg, reg1));
} else if (FITS_IN_SHORT(value2)) {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_SUBFIC, counterReg, reg1, value2));
} else {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_LIS, counterReg, 0, HIGH_PART(value2)));
if (value2 != 0) {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2)));
}
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_SUBF, counterReg, reg1, counterReg));
}
} else if (mode == 0) {
if (value2 == 0) {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_MR, counterReg, reg1));
} else if (FITS_IN_SHORT(value2)) {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_ADDI, counterReg, reg1, 0, value2));
} else {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_ADDIS, counterReg, reg1, 0, HIGH_PART(value2)));
if (value2 != 0) {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2)));
}
}
} else if (mode == 2) {
if (value2 == 0) {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_SUBF, counterReg, reg1, reg2));
} else if (FITS_IN_SHORT(value2)) {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_ADDI, counterReg, reg2, 0, value2));
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_SUBF, counterReg, reg1, counterReg));
} else {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_ADDIS, counterReg, reg2, 0, HIGH_PART(value2)));
if (value2 != 0) {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2)));
}
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_SUBF, counterReg, reg1, counterReg));
}
} else {
if (value2 == 0) {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_SUBF, counterReg, reg2, reg1));
} else {
if (FITS_IN_SHORT(value2)) {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_ADDI, counterReg, reg1, 0, value2));
} else {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_ADDIS, counterReg, reg1, 0, HIGH_PART(value2)));
if (value2 != 0) {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2)));
}
}
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_SUBF, counterReg, reg2, counterReg));
}
}
if (absStep > 1) {
unsigned char bits;
if ((bits = ispowerof2(absStep))) {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_RLWINM, counterReg, counterReg, 32 - bits, bits, 31));
} else {
int reg = used_virtual_registers[RegClass_GPR]++;
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_LI, reg, absStep));
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_DIVWU, counterReg, counterReg, reg));
}
}
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_MTCTR, counterReg));
if (mode < 2) {
if (addiInstr->op == PC_CMPL || addiInstr->op == PC_CMPLI) {
if (FITS_IN_USHORT(value1)) {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_CMPLI, 0, reg1, value1));
} else {
PCode *tmp;
tmp = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(value1));
insertpcodebefore(loop->preheader->lastPCode, tmp);
if (loop->iterationCount != 0)
insertpcodeafter(tmp,
makepcode(PC_ADDI,
tmp->args[0].data.reg.reg,
tmp->args[0].data.reg.reg,
0, LOW_PART(value1)));
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_CMPL, 0, reg1, tmp->args[0].data.reg.reg));
}
} else {
if (FITS_IN_SHORT(value1)) {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_CMPI, 0, reg1, value1));
} else {
PCode *tmp;
tmp = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(value1));
insertpcodebefore(loop->preheader->lastPCode, tmp);
if (loop->iterationCount != 0)
insertpcodeafter(tmp,
makepcode(PC_ADDI,
tmp->args[0].data.reg.reg,
tmp->args[0].data.reg.reg,
0, LOW_PART(value1)));
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_CMP, 0, reg1, tmp->args[0].data.reg.reg));
}
}
} else {
if (addiInstr->op == PC_CMPL || addiInstr->op == PC_CMPLI) {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_CMPL, 0, reg1, reg2));
} else {
insertpcodebefore(loop->preheader->lastPCode,
makepcode(PC_CMP, 0, reg1, reg2));
}
}
if (!afterLoop)
afterLoop = loop->body->nextBlock;
instr = makepcode(branchOpcode, 0, branchCondition, afterLoop->labels);
deletepcode(loop->preheader->lastPCode);
appendpcode(loop->preheader, instr);
instr = makepcode(PC_BDNZ, loop->body->lastPCode->args[2].data.label.label);
deletepcode(loop->body->firstPCode);
deletepcode(loop->body->lastPCode);
appendpcode(loop->body, instr);
loop->preheader->successors = NULL;
ptr = &loop->body->predecessors;
while ((link = *ptr)) {
if (link->block == loop->preheader) {
*ptr = link->nextLink;
break;
}
ptr = &link->nextLink;
}
link = lalloc(sizeof(PCLink));
link->block = loop->preheader->nextBlock;
link->nextLink = loop->preheader->successors;
loop->preheader->successors = link;
link = lalloc(sizeof(PCLink));
link->block = loop->preheader;
link->nextLink = loop->preheader->nextBlock->predecessors;
loop->preheader->nextBlock->predecessors = link;
link = lalloc(sizeof(PCLink));
link->block = afterLoop;
link->nextLink = loop->preheader->successors;
loop->preheader->successors = link;
link = lalloc(sizeof(PCLink));
link->block = loop->preheader;
link->nextLink = afterLoop->predecessors;
afterLoop->predecessors = link;
for (loop = loop->parent; loop; loop = loop->parent)
loop->x4E = 1;
}
static void optimizeloop(Loop *loop) {
computeliveonexit(loop);
if (loop->x53 || loop->x54)
insertupdateinstructions(loop);
if (loop->isKnownCountingLoop) {
if (loop->iterationCount > 0) {
skiplooptest(loop);
if (!copts.optimizesize && !loop->x4D && !loop->x57) {
if (loop->x4F)
unrollloop(loop);
else if (!loop->x4E)
unrollloopconditional(loop);
}
}
if (loop->iterationCount != 0) {
if (loop->iterationCount == 1)
deleteloopheader(loop);
else if (!loop->x4E && !loop->x4D)
rewriteloopwithBDNZ(loop);
}
optimizedloops = 1;
} else if (loop->isUnknownCountingLoop && !loop->x4E && !loop->x4D) {
rewriteunknownloopwithBDNZ(loop);
if (copts.unroll_speculative && !copts.optimizesize) {
if (loop->x4F && !loop->x57 && !loop->x52)
unrollunknownBDNZ(loop);
}
optimizedloops = 1;
}
eliminateinductionvariables(loop);
}
typedef struct LocalArray {
struct LocalArray *next;
Object *object;
unsigned int invalid:1;
unsigned int isFloat:1;
unsigned int isSigned:1;
SInt32 elementSize;
SInt32 arraySize;
SInt32 elementCount;
int totalUses;
int elements[1];
} LocalArray;
static LocalArray *scanforlocalarrays(void) {
SInt32 elementCount;
LocalArray *head;
LocalArray *array;
ObjectList *list;
int i;
SInt32 arraySize;
SInt32 elementSize;
head = NULL;
for (list = locals; list; list = list->next) {
if (
list->object &&
!(IS_TYPE_POINTER(list->object->type) ? (TPTR_QUAL(list->object->type) & Q_VOLATILE) : (list->object->qual & Q_VOLATILE)) &&
list->object->type &&
IS_TYPE_ARRAY(list->object->type) &&
TPTR_TARGET(list->object->type) &&
TPTR_TARGET(list->object->type)->type < TYPESTRUCT
) {
arraySize = list->object->type->size;
elementSize = TPTR_TARGET(list->object->type)->size;
elementCount = arraySize / elementSize;
if (elementCount > 0 && elementCount <= 8) {
array = oalloc(sizeof(int) * (elementCount - 1) + sizeof(LocalArray));
array->next = head;
head = array;
array->object = list->object;
array->elementSize = elementSize;
array->arraySize = arraySize;
array->elementCount = elementCount;
array->totalUses = 0;
array->isSigned = 1;
array->isFloat = IS_TYPE_FLOAT(TPTR_TARGET(list->object->type));
array->invalid = 0;
array->isSigned = 1;
if (!array->isFloat && is_unsigned(TPTR_TARGET(list->object->type)))
array->isSigned = 0;
for (i = 0; i < elementCount; i++) {
array->elements[i] = 0;
}
}
}
}
return head;
}
static LocalArray *lookup_array_object(LocalArray *arrays, Object *object) {
while (arrays) {
if (arrays->object == object)
return arrays;
arrays = arrays->next;
}
return NULL;
}
void changearraytoregisters(void) {
LocalArray *arrays;
PCodeBlock *block;
PCode *instr;
int i;
PCodeArg *op;
LocalArray **ptr;
LocalArray *array;
int intCounter;
int floatCounter;
int reg;
int reg2;
PCode *newInstr;
arrays = NULL;
if ((arrays = scanforlocalarrays())) {
for (block = pcbasicblocks; block; block = block->nextBlock) {
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
if (!(instr->flags & fIsBranch) && instr->argCount) {
op = instr->args;
i = instr->argCount;
while (i--) {
if (
op->kind == PCOp_MEMORY &&
(PCOpMemoryArg) op->arg == PCOpMemory1 &&
(array = lookup_array_object(arrays, op->data.mem.obj)) &&
!array->invalid
) {
if (
(instr->flags & (fIsRead | fIsWrite)) &&
(op->data.mem.offset % array->elementSize) == 0 &&
op->data.mem.offset < array->arraySize
) {
switch (instr->op) {
case PC_LBZ:
case PC_STB:
if (array->elementSize != 1)
array->invalid = 1;
break;
case PC_LHZ:
case PC_LHA:
case PC_STH:
if (array->elementSize != 2)
array->invalid = 1;
break;
case PC_LWZ:
case PC_STW:
if (array->elementSize != 4)
array->invalid = 1;
break;
case PC_LFD:
case PC_STFD:
if (array->elementSize != 8)
array->invalid = 1;
break;
default:
array->invalid = 1;
break;
}
if (!array->invalid)
array->elements[op->data.mem.offset / array->elementSize]++;
} else {
array->invalid = 1;
}
}
op++;
}
}
}
}
intCounter = 0;
floatCounter = 0;
ptr = &arrays;
array = *ptr;
while (array) {
if (array->invalid) {
*ptr = array = array->next;
continue;
}
if (array->isFloat)
floatCounter += array->elementCount;
if (!array->isFloat)
intCounter += array->elementCount;
for (i = 0; i < array->elementCount; i++)
array->totalUses += array->elements[i];
array = array->next;
}
if (arrays) {
while (intCounter > 8) {
LocalArray *best;
int score;
score = 0;
best = NULL;
for (array = arrays; array; array = array->next) {
if (!array->isFloat) {
if (best) {
if (array->totalUses < score) {
score = array->totalUses;
best = array;
}
} else {
best = array;
score = array->totalUses;
}
}
}
if (!best)
break;
if (best == arrays) {
arrays = best->next;
} else {
for (array = arrays; array; array = array->next) {
if (array->next == best) {
array->next = best->next;
break;
}
}
}
intCounter -= best->elementCount;
}
while (floatCounter > 8) {
LocalArray *best;
int score;
score = 0;
best = NULL;
for (array = arrays; array; array = array->next) {
if (array->isFloat) {
if (best) {
if (array->totalUses < score) {
score = array->totalUses;
best = array;
}
} else {
best = array;
score = array->totalUses;
}
}
}
if (!best)
break;
if (best == arrays) {
arrays = best->next;
} else {
for (array = arrays; array; array = array->next) {
if (array->next == best) {
array->next = best->next;
break;
}
}
}
floatCounter -= best->elementCount;
}
CError_ASSERT(2394, intCounter <= 8 && floatCounter <= 8);
if (!arrays)
return;
optimizedloop_trans_regs = 1;
for (array = arrays; array; array = array->next) {
for (i = 0; i < array->elementCount; i++) {
if (array->isFloat)
array->elements[i] = used_virtual_registers[RegClass_FPR]++;
else
array->elements[i] = used_virtual_registers[RegClass_GPR]++;
}
}
for (block = pcbasicblocks; block; block = block->nextBlock) {
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
if (
!(instr->flags & fIsBranch) &&
instr->argCount &&
(instr->flags & (fIsRead | fIsWrite)) &&
instr->args[2].kind == PCOp_MEMORY &&
(PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 &&
(array = lookup_array_object(arrays, instr->args[2].data.mem.obj)) &&
!(array->invalid)
)
{
reg2 = instr->args[0].data.reg.reg;
reg = array->elements[instr->args[2].data.mem.offset / array->elementSize];
newInstr = NULL;
switch (instr->op) {
case PC_LBZ:
if (array->isSigned)
newInstr = makepcode(PC_MR, reg2, reg);
else
newInstr = makepcode(PC_RLWINM, reg2, reg, 0, 24, 31);
break;
case PC_STB:
if (array->isSigned)
newInstr = makepcode(PC_EXTSB, reg, reg2);
else
newInstr = makepcode(PC_RLWINM, reg, reg2, 0, 24, 31);
break;
case PC_LHZ:
newInstr = makepcode(PC_RLWINM, reg2, reg, 0, 16, 31);
break;
case PC_LHA:
newInstr = makepcode(PC_EXTSH, reg2, reg);
break;
case PC_STH:
if (array->isSigned)
newInstr = makepcode(PC_EXTSH, reg, reg2);
else
newInstr = makepcode(PC_RLWINM, reg, reg2, 0, 16, 31);
break;
case PC_LWZ:
newInstr = makepcode(PC_MR, reg2, reg);
break;
case PC_STW:
newInstr = makepcode(PC_MR, reg, reg2);
break;
case PC_LFD:
newInstr = makepcode(PC_FMR, reg2, reg);
break;
case PC_STFD:
newInstr = makepcode(PC_FMR, reg, reg2);
break;
default:
CError_FATAL(2494);
break;
}
if (newInstr) {
insertpcodebefore(instr, newInstr);
deletepcode(instr);
instr = newInstr;
}
}
}
}
}
}
freeoheap();
}
static void optimizenestedloops(Loop *loop) {
while (loop) {
if (loop->children)
optimizenestedloops(loop->children);
optimizeloop(loop);
loop = loop->nextSibling;
}
}
void optimizeloops(void) {
optimizedloops = 0;
optimizedloop_full_unroll = 0;
optimizedloop_trans_regs = 0;
if (loopsinflowgraph) {
computeusedefchains(0);
last_virtual_GPR = used_virtual_registers[RegClass_GPR];
liveonexit = oalloc(4 * ((used_virtual_registers[RegClass_GPR] + 31) >> 5));
inductionvariables = oalloc(4 * ((last_virtual_GPR + 31) >> 5));
optimizenestedloops(loopsinflowgraph);
freeoheap();
}
}