mirror of https://git.wuffs.org/MWCC
1567 lines
54 KiB
C
1567 lines
54 KiB
C
#include "compiler/LoopOptimization.h"
|
|
#include "compiler/CFunc.h"
|
|
#include "compiler/CParser.h"
|
|
#include "compiler/BitVectors.h"
|
|
#include "compiler/CompilerTools.h"
|
|
#include "compiler/LoopDetection.h"
|
|
#include "compiler/PCode.h"
|
|
#include "compiler/Registers.h"
|
|
#include "compiler/UseDefChains.h"
|
|
#include "compiler/objects.h"
|
|
#include "compiler/types.h"
|
|
|
|
int optimizedloops;
|
|
int optimizedloop_full_unroll;
|
|
int optimizedloop_trans_regs;
|
|
static UInt32 *liveonexit;
|
|
static UInt32 *inductionvariables;
|
|
static int last_virtual_GPR;
|
|
|
|
static int ispowerof2(SInt32 value) {
|
|
int bit = getbit(value);
|
|
return (bit > 0 && bit < 31) ? bit : 0;
|
|
}
|
|
|
|
static void insertupdateinstructions(Loop *loop) {
|
|
// nothing
|
|
}
|
|
|
|
static void computeliveonexit(Loop *loop) {
|
|
UInt32 *usevec;
|
|
UInt32 *defvec;
|
|
BlockList *blocklist;
|
|
RegUseOrDef *list;
|
|
int gpr;
|
|
|
|
bitvectorinitialize(usevec = oalloc(4 * ((number_of_Uses + 31) >> 5)), number_of_Uses, 0);
|
|
for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
|
|
if (bitvectorgetbit(blocklist->block->blockIndex, loop->vec24))
|
|
bitvectorunion(usevec, usedefinfo[blocklist->block->blockIndex].usevec1C, number_of_Uses);
|
|
}
|
|
|
|
bitvectorinitialize(defvec = oalloc(4 * ((number_of_Defs + 31) >> 5)), number_of_Defs, 0);
|
|
if (loop->preheader)
|
|
bitvectorunion(defvec, usedefinfo[loop->preheader->blockIndex].defvec8, number_of_Defs);
|
|
|
|
bitvectorinitialize(liveonexit, last_virtual_GPR, 0);
|
|
|
|
for (gpr = 32; gpr < last_virtual_GPR; gpr++) {
|
|
for (list = reg_Defs[RegClass_GPR][gpr]; list; list = list->next) {
|
|
if (bitvectorgetbit(list->id, defvec)) {
|
|
if (!Defs[list->id].pcode->block || bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)) {
|
|
bitvectorsetbit(gpr, liveonexit);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (list = reg_Uses[RegClass_GPR][gpr]; list; list = list->next) {
|
|
if (bitvectorgetbit(list->id, usevec)) {
|
|
if (!Uses[list->id].pcode->block || !bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)) {
|
|
bitvectorsetbit(gpr, liveonexit);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void eliminateinductionvariables(Loop *loop) {
|
|
BlockList *blocklist;
|
|
PCode *instr;
|
|
PCode *nextInstr;
|
|
PCodeArg *op;
|
|
int i;
|
|
|
|
bitvectorinitialize(inductionvariables, last_virtual_GPR, 0xFFFFFFFF);
|
|
|
|
for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
|
|
for (instr = blocklist->block->firstPCode; instr; instr = instr->nextPCode) {
|
|
if (
|
|
instr->op != PC_ADDI ||
|
|
instr->args[0].data.reg.reg < 32 ||
|
|
instr->args[0].data.reg.reg >= last_virtual_GPR ||
|
|
instr->args[1].data.reg.reg != instr->args[0].data.reg.reg
|
|
) {
|
|
op = instr->args;
|
|
i = instr->argCount;
|
|
while (i--) {
|
|
if (
|
|
op->kind == PCOp_REGISTER &&
|
|
op->arg == RegClass_GPR &&
|
|
op->data.reg.reg >= n_real_registers[RegClass_GPR] &&
|
|
op->data.reg.reg < last_virtual_GPR
|
|
)
|
|
bitvectorclearbit(op->data.reg.reg, inductionvariables);
|
|
op++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (loop->parent) {
|
|
for (instr = loop->preheader->firstPCode; instr; instr = nextInstr) {
|
|
nextInstr = instr->nextPCode;
|
|
op = instr->args;
|
|
i = instr->argCount;
|
|
while (i--) {
|
|
if (
|
|
op->kind == PCOp_REGISTER &&
|
|
op->arg == RegClass_GPR &&
|
|
op->data.reg.reg >= n_real_registers[RegClass_GPR] &&
|
|
op->data.reg.reg < last_virtual_GPR
|
|
)
|
|
bitvectorsetbit(op->data.reg.reg, liveonexit);
|
|
op++;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
|
|
for (instr = blocklist->block->firstPCode; instr; instr = nextInstr) {
|
|
nextInstr = instr->nextPCode;
|
|
if (
|
|
instr->op == PC_ADDI &&
|
|
instr->args[0].data.reg.reg >= 32 &&
|
|
instr->args[0].data.reg.reg < last_virtual_GPR &&
|
|
instr->args[1].data.reg.reg == instr->args[0].data.reg.reg &&
|
|
bitvectorgetbit(instr->args[0].data.reg.reg, inductionvariables) &&
|
|
!bitvectorgetbit(instr->args[0].data.reg.reg, liveonexit)
|
|
) {
|
|
deletepcode(instr);
|
|
optimizedloops = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void skiplooptest(Loop *loop) {
|
|
PCodeBlock *preheader;
|
|
PCodeLabel *label;
|
|
PCLink **ptr;
|
|
PCLink *link;
|
|
PCode *lastInstr;
|
|
PCode *instr;
|
|
|
|
preheader = loop->preheader;
|
|
lastInstr = loop->body->lastPCode;
|
|
#line 340
|
|
CError_ASSERT(lastInstr->args[2].kind == PCOp_LABEL);
|
|
|
|
label = lastInstr->args[2].data.label.label;
|
|
preheader->lastPCode->args[0].data.label.label = label;
|
|
preheader->successors->block = label->block;
|
|
|
|
ptr = &loop->body->predecessors;
|
|
while ((link = *ptr)) {
|
|
if (link->block == preheader) {
|
|
*ptr = link->nextLink;
|
|
break;
|
|
}
|
|
ptr = &link->nextLink;
|
|
}
|
|
|
|
link->nextLink = label->block->predecessors;
|
|
label->block->predecessors = link;
|
|
|
|
while (1) {
|
|
instr = loop->body->firstPCode;
|
|
#line 369
|
|
CError_ASSERT(instr);
|
|
|
|
if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL)
|
|
break;
|
|
|
|
deletepcode(instr);
|
|
insertpcodebefore(loop->preheader->lastPCode, instr);
|
|
loop->bodySize--;
|
|
}
|
|
|
|
for (instr = instr->nextPCode; instr && !(instr->flags & fPCodeFlag1); instr = instr->nextPCode)
|
|
insertpcodebefore(loop->preheader->lastPCode, copypcode(instr));
|
|
}
|
|
|
|
static void unrollloop(Loop *loop) {
|
|
PCodeBlock *newBlock;
|
|
int i;
|
|
int factor;
|
|
PCode *instr;
|
|
PCodeBlock *block;
|
|
PCode *firstInstr;
|
|
PCode *nextInstr;
|
|
|
|
for (factor = copts.ppc_unroll_factor_limit; factor > 1; factor--) {
|
|
if ((loop->iterationCount % factor) == 0 && (loop->bodySize - 2) * factor <= copts.ppc_unroll_instructions_limit)
|
|
break;
|
|
}
|
|
|
|
if (factor == 1)
|
|
return;
|
|
if ((loop->iterationCount / factor) != 1 && loop->bodySize < 4)
|
|
return;
|
|
|
|
newBlock = oalloc(sizeof(PCodeBlock));
|
|
newBlock->firstPCode = newBlock->lastPCode = NULL;
|
|
for (i = 0; i < factor - 1; i++) {
|
|
firstInstr = loop->body->firstPCode;
|
|
#line 448
|
|
CError_ASSERT(firstInstr);
|
|
if (firstInstr->op != PC_CMP && firstInstr->op != PC_CMPL && firstInstr->op != PC_CMPI && firstInstr->op != PC_CMPLI) {
|
|
#line 450
|
|
CError_FATAL();
|
|
}
|
|
|
|
for (instr = firstInstr->nextPCode; instr && !(instr->flags & fPCodeFlag1); instr = instr->nextPCode)
|
|
appendpcode(newBlock, copypcode(instr));
|
|
|
|
for (block = loop->preheader->successors->block; block != loop->body; block = block->successors->block) {
|
|
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
|
|
if (instr->op != PC_B)
|
|
appendpcode(newBlock, copypcode(instr));
|
|
}
|
|
}
|
|
}
|
|
|
|
block = loop->body->predecessors->block;
|
|
for (instr = newBlock->firstPCode; instr; instr = nextInstr) {
|
|
nextInstr = instr->nextPCode;
|
|
appendpcode(block, instr);
|
|
}
|
|
loop->iterationCount /= factor;
|
|
}
|
|
|
|
void pccomputepredecessors1(PCodeBlock *block) {
|
|
PCLink *succ;
|
|
PCLink *pred;
|
|
|
|
for (succ = block->successors; succ; succ = succ->nextLink) {
|
|
if (!succ->block) {
|
|
#line 496
|
|
CError_FATAL();
|
|
} else {
|
|
for (pred = succ->block->predecessors; pred; pred = pred->nextLink) {
|
|
if (pred->block == block)
|
|
break;
|
|
}
|
|
|
|
if (!pred) {
|
|
pred = lalloc(sizeof(PCLink));
|
|
pred->block = block;
|
|
pred->nextLink = succ->block->predecessors;
|
|
succ->block->predecessors = pred;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static PCodeBlock *insertnewpcblock(PCodeBlock *block, int loopWeight) {
|
|
PCodeBlock *newBlock;
|
|
PCodeBlock *nextBlock;
|
|
PCodeLabel *label;
|
|
PCLink *prev;
|
|
PCLink *link;
|
|
|
|
label = makepclabel();
|
|
newBlock = lalloc(sizeof(PCodeBlock));
|
|
|
|
nextBlock = block->nextBlock;
|
|
newBlock->nextBlock = nextBlock;
|
|
block->nextBlock = newBlock;
|
|
|
|
newBlock->prevBlock = block;
|
|
nextBlock->prevBlock = newBlock;
|
|
|
|
newBlock->labels = NULL;
|
|
newBlock->predecessors = newBlock->successors = NULL;
|
|
newBlock->firstPCode = newBlock->lastPCode = NULL;
|
|
newBlock->pcodeCount = 0;
|
|
newBlock->loopWeight = loopWeight;
|
|
newBlock->flags = 0;
|
|
newBlock->blockIndex = pcblockcount++;
|
|
pclabel(newBlock, label);
|
|
|
|
prev = NULL;
|
|
for (link = block->successors; link; link = link->nextLink) {
|
|
if (link->block == nextBlock) {
|
|
if (!prev)
|
|
block->successors = link->nextLink;
|
|
else
|
|
prev->nextLink = link->nextLink;
|
|
link->nextLink = NULL;
|
|
newBlock->successors = link;
|
|
break;
|
|
}
|
|
prev = link;
|
|
}
|
|
|
|
prev = NULL;
|
|
for (link = nextBlock->predecessors; link; link = link->nextLink) {
|
|
if (link->block == block) {
|
|
if (!prev)
|
|
nextBlock->predecessors = link->nextLink;
|
|
else
|
|
prev->nextLink = link->nextLink;
|
|
link->nextLink = NULL;
|
|
newBlock->predecessors = link;
|
|
break;
|
|
}
|
|
prev = link;
|
|
}
|
|
|
|
link = lalloc(sizeof(PCLink));
|
|
link->block = newBlock;
|
|
link->nextLink = block->successors;
|
|
block->successors = link;
|
|
|
|
link = lalloc(sizeof(PCLink));
|
|
link->block = newBlock;
|
|
link->nextLink = nextBlock->predecessors;
|
|
nextBlock->predecessors = link;
|
|
|
|
return newBlock;
|
|
}
|
|
|
|
static void unrollloopconditional(Loop *loop) {
|
|
PCodeBlock *lastBlock;
|
|
PCodeLabel *label24;
|
|
PCode *instr;
|
|
PCode *instrCopy;
|
|
int outputBlockCount;
|
|
int inputBlockCount;
|
|
PCodeBlock **blocks;
|
|
PCodeBlock **blocks2;
|
|
PCodeBlock **blocks3;
|
|
PCode *firstInstr;
|
|
int factor;
|
|
PCodeBlock *block;
|
|
PCLink *link;
|
|
PCLink *prev;
|
|
int i;
|
|
int j;
|
|
int k;
|
|
int instructionCount;
|
|
|
|
label24 = NULL;
|
|
outputBlockCount = 0;
|
|
instructionCount = 0;
|
|
|
|
for (factor = copts.ppc_unroll_factor_limit; factor > 1; factor--) {
|
|
if ((loop->iterationCount % factor) == 0 && (loop->bodySize - 2) * factor <= copts.ppc_unroll_instructions_limit)
|
|
break;
|
|
}
|
|
|
|
if (factor == 1)
|
|
return;
|
|
|
|
inputBlockCount = 0;
|
|
for (block = loop->preheader->successors->block; block != loop->body; block = block->nextBlock) {
|
|
inputBlockCount++;
|
|
if (!bitvectorgetbit(block->blockIndex, loop->memberblocks))
|
|
instructionCount += block->pcodeCount;
|
|
}
|
|
|
|
if ((loop->bodySize - instructionCount - 2) < instructionCount || instructionCount > 8)
|
|
return;
|
|
|
|
blocks = oalloc(inputBlockCount * sizeof(PCodeBlock *));
|
|
blocks2 = oalloc(inputBlockCount * sizeof(PCodeBlock *));
|
|
blocks3 = oalloc(factor * (inputBlockCount * sizeof(PCodeBlock *)));
|
|
memclrw(blocks, inputBlockCount * sizeof(PCodeBlock *));
|
|
memclrw(blocks2, inputBlockCount * sizeof(PCodeBlock *));
|
|
memclrw(blocks3, factor * (inputBlockCount * sizeof(PCodeBlock *)));
|
|
|
|
block = loop->preheader->nextBlock;
|
|
for (i = 0; i < inputBlockCount; i++) {
|
|
blocks[i] = block;
|
|
block = block->nextBlock;
|
|
}
|
|
|
|
lastBlock = blocks[inputBlockCount - 1];
|
|
for (i = 0; i < factor - 1; i++) {
|
|
for (j = 0; j < inputBlockCount; j++) {
|
|
blocks2[j] = insertnewpcblock(lastBlock, loop->loopWeight);
|
|
blocks3[outputBlockCount++] = blocks2[j];
|
|
lastBlock = blocks2[j];
|
|
}
|
|
if (label24) {
|
|
pclabel(blocks2[0], label24);
|
|
label24 = NULL;
|
|
}
|
|
|
|
for (j = 0; j < inputBlockCount; j++) {
|
|
for (instr = blocks[j]->firstPCode; instr; instr = instr->nextPCode) {
|
|
if (instr->flags & fPCodeFlag1) {
|
|
PCodeArg *op;
|
|
int opID;
|
|
instrCopy = copypcode(instr);
|
|
op = NULL;
|
|
for (opID = 0; opID < instr->argCount; opID++) {
|
|
if (instr->args[opID].kind == PCOp_LABEL) {
|
|
op = &instr->args[opID];
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (op) {
|
|
if (op->data.label.label->block == loop->body) {
|
|
if (!label24)
|
|
label24 = makepclabel();
|
|
instrCopy->args[opID].data.label.label = label24;
|
|
} else {
|
|
for (k = 0; k < inputBlockCount; k++) {
|
|
if (op->data.label.label->block == blocks[k]) {
|
|
instrCopy->args[opID].data.label.label = blocks2[k]->labels;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
appendpcode(blocks2[j], instrCopy);
|
|
if (op)
|
|
pcbranch(blocks2[j], instrCopy->args[opID].data.label.label);
|
|
} else {
|
|
appendpcode(blocks2[j], copypcode(instr));
|
|
}
|
|
}
|
|
}
|
|
|
|
firstInstr = loop->body->firstPCode;
|
|
#line 762
|
|
CError_ASSERT(firstInstr != NULL);
|
|
if (firstInstr->op != PC_CMP && firstInstr->op != PC_CMPL && firstInstr->op != PC_CMPI && firstInstr->op != PC_CMPLI) {
|
|
#line 764
|
|
CError_FATAL();
|
|
}
|
|
|
|
for (instr = firstInstr->nextPCode; instr && !(instr->flags & fPCodeFlag1); instr = instr->nextPCode)
|
|
appendpcode(blocks2[inputBlockCount - 1], copypcode(instr));
|
|
|
|
for (j = 0; j < inputBlockCount; j++) {
|
|
for (link = blocks[j]->successors; link; link = link->nextLink) {
|
|
if (link->block == blocks[j]->nextBlock)
|
|
break;
|
|
}
|
|
|
|
if (!link) {
|
|
for (link = blocks2[j]->successors, prev = NULL; link; link = link->nextLink) {
|
|
if (link->block == blocks2[j]->nextBlock) {
|
|
if (prev)
|
|
prev->nextLink = link->nextLink;
|
|
else
|
|
blocks2[j]->successors = link->nextLink;
|
|
} else {
|
|
prev = link;
|
|
}
|
|
}
|
|
|
|
for (link = blocks2[j]->nextBlock->predecessors, prev = NULL; link; link = link->nextLink) {
|
|
if (link->block == blocks2[j]) {
|
|
if (prev)
|
|
prev->nextLink = link->nextLink;
|
|
else
|
|
blocks2[j]->nextBlock->predecessors = link->nextLink;
|
|
} else {
|
|
prev = link;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (label24)
|
|
pclabel(loop->body, label24);
|
|
|
|
for (i = 0; i < inputBlockCount; i++) {
|
|
for (instr = blocks[i]->firstPCode; instr; instr = instr->nextPCode) {
|
|
if (instr->flags & fPCodeFlag1) {
|
|
PCodeArg *op;
|
|
int opID;
|
|
op = NULL;
|
|
for (opID = 0; opID < instr->argCount; opID++) {
|
|
if (instr->args[opID].kind == PCOp_LABEL) {
|
|
op = &instr->args[opID];
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (op && op->data.label.label->block == loop->body) {
|
|
instr->args[opID].data.label.label = blocks3[0]->labels;
|
|
|
|
for (link = blocks[i]->successors, prev = NULL; link; link = link->nextLink) {
|
|
if (link->block == loop->body) {
|
|
if (prev)
|
|
prev->nextLink = link->nextLink;
|
|
else
|
|
blocks[i]->successors = link->nextLink;
|
|
} else {
|
|
prev = link;
|
|
}
|
|
}
|
|
|
|
for (link = loop->body->predecessors, prev = NULL; link; link = link->nextLink) {
|
|
if (link->block == blocks[i]) {
|
|
if (prev)
|
|
prev->nextLink = link->nextLink;
|
|
else
|
|
loop->body->predecessors = link->nextLink;
|
|
} else {
|
|
prev = link;
|
|
}
|
|
}
|
|
|
|
link = blocks[i]->successors;
|
|
while (link && link->block != blocks3[0])
|
|
link = link->nextLink;
|
|
|
|
if (!link) {
|
|
pcbranch(blocks[i], op->data.label.label);
|
|
pccomputepredecessors1(blocks[i]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < outputBlockCount; i++)
|
|
pccomputepredecessors1(blocks3[i]);
|
|
|
|
loop->iterationCount /= factor;
|
|
}
|
|
|
|
static void unrollunknownBDNZ(Loop *loop) {
|
|
int factor; // r29
|
|
PCodeBlock *preheader; // r17
|
|
PCodeBlock *blockA; // r23
|
|
PCodeBlock *postheader; // r22
|
|
PCodeBlock *newblock1; // r26
|
|
PCodeBlock *newblock2; // r31
|
|
PCodeBlock *newblock3; // r19
|
|
PCodeBlock *newblock4; // r20
|
|
PCodeBlock *newblock5; // r24
|
|
PCode *mtctr; // r21
|
|
int mtctr_reg; // r27
|
|
PCode *instr28; // r28
|
|
PCode *instr6; // r6
|
|
PCodeBlock *block;
|
|
PCode *instr;
|
|
PCodeArg *op;
|
|
int i;
|
|
int val;
|
|
short mtctr_shifted_reg; // r16
|
|
short reg25; // r25
|
|
PCLink *link;
|
|
|
|
if (loop->bodySize < 4)
|
|
return;
|
|
|
|
factor = 128;
|
|
while (factor > copts.ppc_unroll_factor_limit)
|
|
factor >>= 1;
|
|
while (factor > 1 && (loop->bodySize - 2) * factor > copts.ppc_unroll_instructions_limit)
|
|
factor >>= 1;
|
|
|
|
if (factor < 2)
|
|
return;
|
|
|
|
preheader = loop->preheader;
|
|
blockA = loop->body->nextBlock;
|
|
postheader = preheader->nextBlock;
|
|
|
|
newblock1 = insertnewpcblock(preheader, loop->loopWeight);
|
|
newblock2 = insertnewpcblock(newblock1, loop->loopWeight);
|
|
newblock3 = insertnewpcblock(newblock2, loop->loopWeight);
|
|
newblock4 = insertnewpcblock(newblock3, loop->loopWeight);
|
|
newblock5 = insertnewpcblock(newblock4, loop->loopWeight);
|
|
addblocktoloop(loop, newblock1);
|
|
addblocktoloop(loop, newblock2);
|
|
addblocktoloop(loop, newblock3);
|
|
addblocktoloop(loop, newblock4);
|
|
addblocktoloop(loop, newblock5);
|
|
|
|
for (instr = preheader->lastPCode; instr; instr = instr->prevPCode) {
|
|
if (instr->op == PC_MTCTR) {
|
|
mtctr = instr;
|
|
mtctr_reg = instr->args[0].data.reg.reg;
|
|
}
|
|
}
|
|
|
|
if (!mtctr)
|
|
return;
|
|
|
|
instr28 = NULL;
|
|
for (block = postheader; block != loop->body; block = block->successors->block) {
|
|
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
|
|
if (instr->op != PC_B) {
|
|
appendpcode(newblock2, copypcode(instr));
|
|
if (instr == loop->pc18)
|
|
instr28 = newblock2->lastPCode;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!instr28) {
|
|
appendpcode(newblock2, copypcode(loop->pc18));
|
|
instr28 = newblock2->lastPCode;
|
|
}
|
|
|
|
instr6 = NULL;
|
|
for (instr = newblock2->firstPCode; instr; instr = instr->nextPCode) {
|
|
if (instr != instr28) {
|
|
op = instr->args;
|
|
i = instr->argCount;
|
|
while (i--) {
|
|
if (
|
|
op->kind == PCOp_REGISTER &&
|
|
op->arg == RegClass_GPR &&
|
|
op->data.reg.reg == loop->pc18->args[0].data.reg.reg &&
|
|
(op->data.reg.effect & (EffectRead | EffectWrite))
|
|
) {
|
|
instr6 = instr;
|
|
break;
|
|
}
|
|
op++;
|
|
}
|
|
}
|
|
if (instr6)
|
|
break;
|
|
}
|
|
|
|
if (!instr6) {
|
|
deletepcode(instr28);
|
|
deletepcode(loop->pc18);
|
|
if (loop->footer)
|
|
blockA = loop->footer;
|
|
else
|
|
blockA = insertnewpcblock(loop->body, loop->loopWeight);
|
|
} else {
|
|
instr28 = NULL;
|
|
}
|
|
|
|
for (i = 1; i < factor; i++) {
|
|
for (block = postheader; block != loop->body; block = block->successors->block) {
|
|
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
|
|
if (instr->op != PC_B)
|
|
appendpcode(newblock2, copypcode(instr));
|
|
}
|
|
}
|
|
}
|
|
|
|
mtctr_shifted_reg = used_virtual_registers[RegClass_GPR]++;
|
|
appendpcode(newblock1, makepcode(
|
|
PC_RLWINM,
|
|
mtctr_shifted_reg,
|
|
mtctr_reg,
|
|
32 - ispowerof2(factor),
|
|
ispowerof2(factor),
|
|
31));
|
|
|
|
appendpcode(newblock1, makepcode(PC_CMPLI, 0, mtctr_shifted_reg, 0));
|
|
|
|
if (instr28) {
|
|
reg25 = used_virtual_registers[RegClass_GPR]++;
|
|
if (loop->step == 1) {
|
|
instr = makepcode(PC_MR, reg25, mtctr_reg);
|
|
} else if (loop->step == -1) {
|
|
instr = makepcode(PC_NEG, reg25, mtctr_reg);
|
|
} else {
|
|
val = ispowerof2(abs(loop->step));
|
|
if (val > 0) {
|
|
instr = makepcode(PC_RLWINM, reg25, mtctr_reg, val, 0, 31 - val);
|
|
if (loop->step < 0) {
|
|
appendpcode(newblock1, instr);
|
|
instr = makepcode(PC_NEG, reg25, reg25);
|
|
}
|
|
} else {
|
|
instr = makepcode(PC_MULLI, reg25, mtctr_reg, loop->step);
|
|
}
|
|
}
|
|
appendpcode(newblock1, instr);
|
|
}
|
|
|
|
appendpcode(newblock1, makepcode(PC_MTCTR, mtctr_shifted_reg));
|
|
appendpcode(newblock1, makepcode(PC_BT, 0, 2, newblock5->labels));
|
|
pcbranch(newblock1, newblock5->labels);
|
|
|
|
link = lalloc(sizeof(PCLink));
|
|
link->block = newblock1;
|
|
link->nextLink = newblock5->predecessors;
|
|
newblock5->predecessors = link;
|
|
|
|
appendpcode(newblock3, makepcode(PC_BDNZ, newblock2->labels));
|
|
pcbranch(newblock3, newblock2->labels);
|
|
|
|
link = lalloc(sizeof(PCLink));
|
|
link->block = newblock3;
|
|
link->nextLink = newblock2->predecessors;
|
|
newblock2->predecessors = link;
|
|
|
|
appendpcode(newblock4, makepcode(PC_ANDI, mtctr_reg, mtctr_reg, factor - 1));
|
|
appendpcode(newblock4, makepcode(PC_BT, 0, 2, blockA->labels));
|
|
pcbranch(newblock4, blockA->labels);
|
|
|
|
link = lalloc(sizeof(PCLink));
|
|
link->block = newblock4;
|
|
link->nextLink = blockA->predecessors;
|
|
blockA->predecessors = link;
|
|
|
|
deletepcode(mtctr);
|
|
appendpcode(newblock5, mtctr);
|
|
|
|
if (instr28 && bitvectorgetbit(instr28->args[0].data.reg.reg, liveonexit)) {
|
|
instr = makepcode(PC_ADD, instr28->args[0].data.reg.reg, instr28->args[0].data.reg.reg, reg25);
|
|
if (blockA->firstPCode)
|
|
insertpcodebefore(blockA->firstPCode, instr);
|
|
else
|
|
appendpcode(blockA, instr);
|
|
}
|
|
}
|
|
|
|
static void deleteloop(Loop *loop) {
|
|
PCodeBlock *body;
|
|
PCodeBlock *preheader;
|
|
PCodeBlock *nextblock;
|
|
PCodeLabel *label;
|
|
PCode *instr;
|
|
PCLink **ptr;
|
|
PCLink *link;
|
|
PCodeBlock *block;
|
|
|
|
body = loop->body;
|
|
preheader = loop->preheader;
|
|
nextblock = body->nextBlock;
|
|
label = makepclabel();
|
|
|
|
while (1) {
|
|
instr = body->firstPCode;
|
|
#line 1294
|
|
CError_ASSERT(instr != NULL);
|
|
|
|
if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL)
|
|
break;
|
|
|
|
deletepcode(instr);
|
|
insertpcodebefore(loop->preheader->lastPCode, instr);
|
|
loop->bodySize--;
|
|
}
|
|
|
|
pclabel(nextblock, label);
|
|
preheader->lastPCode->args[0].data.label.label = label;
|
|
preheader->successors->block = nextblock;
|
|
|
|
ptr = &nextblock->predecessors;
|
|
while ((link = *ptr)) {
|
|
if (link->block == body) {
|
|
link->block = preheader;
|
|
break;
|
|
}
|
|
ptr = &link->nextLink;
|
|
}
|
|
|
|
block = pcbasicblocks;
|
|
while ((nextblock = block->nextBlock)) {
|
|
if (bitvectorgetbit(nextblock->blockIndex, loop->memberblocks))
|
|
block->nextBlock = nextblock->nextBlock;
|
|
else
|
|
block = nextblock;
|
|
}
|
|
}
|
|
|
|
static void deleteloopheader(Loop *loop) {
|
|
PCLink *link;
|
|
PCLink **ptr;
|
|
|
|
ptr = &loop->body->successors;
|
|
while ((link = *ptr)) {
|
|
if (link->block == loop->body->lastPCode->args[2].data.label.label->block) {
|
|
*ptr = link->nextLink;
|
|
break;
|
|
}
|
|
ptr = &link->nextLink;
|
|
}
|
|
|
|
ptr = &loop->body->lastPCode->args[2].data.label.label->block->predecessors;
|
|
while ((link = *ptr)) {
|
|
if (link->block == loop->body) {
|
|
*ptr = link->nextLink;
|
|
break;
|
|
}
|
|
ptr = &link->nextLink;
|
|
}
|
|
|
|
deletepcode(loop->body->firstPCode);
|
|
deletepcode(loop->body->lastPCode);
|
|
optimizedloop_full_unroll = 1;
|
|
}
|
|
|
|
static void rewriteloopwithBDNZ(Loop *loop) {
|
|
PCode *instr;
|
|
|
|
if (!FITS_IN_SHORT(loop->iterationCount)) {
|
|
instr = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(loop->iterationCount));
|
|
insertpcodebefore(loop->preheader->lastPCode, instr);
|
|
|
|
if (loop->iterationCount != 0)
|
|
insertpcodeafter(instr, makepcode(PC_ADDI, instr->args[0].data.reg.reg, instr->args[0].data.reg.reg, 0, LOW_PART(loop->iterationCount)));
|
|
} else {
|
|
instr = makepcode(PC_LI, used_virtual_registers[RegClass_GPR]++, loop->iterationCount);
|
|
insertpcodebefore(loop->preheader->lastPCode, instr);
|
|
}
|
|
|
|
insertpcodebefore(loop->preheader->lastPCode, makepcode(PC_MTCTR, instr->args[0].data.reg.reg));
|
|
|
|
instr = makepcode(PC_BDNZ, loop->body->lastPCode->args[2].data.label.label);
|
|
deletepcode(loop->body->firstPCode);
|
|
deletepcode(loop->body->lastPCode);
|
|
appendpcode(loop->body, instr);
|
|
|
|
for (loop = loop->parent; loop; loop = loop->parent)
|
|
loop->x4E = 1;
|
|
}
|
|
|
|
static void rewriteunknownloopwithBDNZ(Loop *loop) {
|
|
SInt32 value1; // r24
|
|
SInt32 value2; // r30
|
|
Opcode branchOpcode; // r19
|
|
SInt32 absStep; // r28
|
|
int branchCondition; // r20
|
|
int counterReg; // r27
|
|
int reg1; // r26
|
|
int reg2; // r22
|
|
unsigned char mode; // r23
|
|
PCodeBlock *afterLoop; // r25
|
|
PCode *addiInstr;
|
|
PCode *instr;
|
|
PCLink *link;
|
|
PCLink **ptr;
|
|
|
|
absStep = abs(loop->step);
|
|
afterLoop = NULL;
|
|
|
|
for (addiInstr = loop->body->lastPCode->prevPCode; addiInstr->op == PC_ADDI; addiInstr = loop->body->lastPCode->prevPCode) {
|
|
deletepcode(addiInstr);
|
|
if (loop->body->lastPCode->args[2].data.label.label->block->firstPCode)
|
|
insertpcodebefore(loop->body->lastPCode->args[2].data.label.label->block->firstPCode, addiInstr);
|
|
else
|
|
appendpcode(loop->body->lastPCode->args[2].data.label.label->block, addiInstr);
|
|
|
|
afterLoop = insertnewpcblock(loop->body, loop->loopWeight);
|
|
appendpcode(afterLoop, copypcode(addiInstr));
|
|
loop->footer = afterLoop;
|
|
}
|
|
|
|
if (loop->unknownCondition == ELESS) {
|
|
branchOpcode = PC_BF;
|
|
if (loop->lowerType == LOOP_BOUND_CONSTANT) {
|
|
branchCondition = 1;
|
|
value1 = loop->lower;
|
|
reg1 = addiInstr->args[2].data.reg.reg;
|
|
value2 = absStep - 1 - loop->lower;
|
|
mode = 0;
|
|
} else if (loop->upperType == LOOP_BOUND_CONSTANT) {
|
|
branchCondition = 0;
|
|
value1 = loop->upper;
|
|
reg1 = addiInstr->args[1].data.reg.reg;
|
|
value2 = absStep - 1 + loop->upper;
|
|
mode = 1;
|
|
} else {
|
|
branchCondition = 0;
|
|
reg1 = addiInstr->args[1].data.reg.reg;
|
|
reg2 = addiInstr->args[2].data.reg.reg;
|
|
value2 = absStep - 1;
|
|
mode = 2;
|
|
}
|
|
} else if (loop->unknownCondition == ELESSEQU) {
|
|
branchOpcode = PC_BT;
|
|
if (loop->lowerType == LOOP_BOUND_CONSTANT) {
|
|
branchCondition = 0;
|
|
value1 = loop->lower;
|
|
reg1 = addiInstr->args[2].data.reg.reg;
|
|
value2 = absStep - loop->lower;
|
|
mode = 0;
|
|
} else if (loop->upperType == LOOP_BOUND_CONSTANT) {
|
|
branchCondition = 1;
|
|
value1 = loop->upper;
|
|
reg1 = addiInstr->args[1].data.reg.reg;
|
|
value2 = absStep + loop->upper;
|
|
mode = 1;
|
|
} else {
|
|
branchCondition = 1;
|
|
value1 = 0;
|
|
reg1 = addiInstr->args[1].data.reg.reg;
|
|
reg2 = addiInstr->args[2].data.reg.reg;
|
|
value2 = absStep;
|
|
mode = 2;
|
|
}
|
|
} else if (loop->unknownCondition == EGREATER) {
|
|
branchOpcode = PC_BF;
|
|
if (loop->lowerType == LOOP_BOUND_CONSTANT) {
|
|
branchCondition = 0;
|
|
value1 = loop->lower;
|
|
reg1 = addiInstr->args[2].data.reg.reg;
|
|
value2 = absStep - 1 + loop->lower;
|
|
mode = 1;
|
|
} else if (loop->upperType == LOOP_BOUND_CONSTANT) {
|
|
branchCondition = 1;
|
|
value1 = loop->upper;
|
|
reg1 = addiInstr->args[1].data.reg.reg;
|
|
value2 = absStep - 1 - loop->upper;
|
|
mode = 0;
|
|
} else {
|
|
branchCondition = 1;
|
|
value1 = 0;
|
|
reg1 = addiInstr->args[1].data.reg.reg;
|
|
reg2 = addiInstr->args[2].data.reg.reg;
|
|
value2 = absStep - 1;
|
|
mode = 3;
|
|
}
|
|
} else if (loop->unknownCondition == EGREATEREQU) {
|
|
branchOpcode = PC_BT;
|
|
if (loop->lowerType == LOOP_BOUND_CONSTANT) {
|
|
branchCondition = 1;
|
|
value1 = loop->lower;
|
|
reg1 = addiInstr->args[2].data.reg.reg;
|
|
value2 = absStep + loop->lower;
|
|
mode = 1;
|
|
} else if (loop->upperType == LOOP_BOUND_CONSTANT) {
|
|
branchCondition = 0;
|
|
value1 = loop->upper;
|
|
reg1 = addiInstr->args[1].data.reg.reg;
|
|
value2 = absStep - loop->upper;
|
|
mode = 0;
|
|
} else {
|
|
branchCondition = 0;
|
|
reg1 = addiInstr->args[1].data.reg.reg;
|
|
reg2 = addiInstr->args[2].data.reg.reg;
|
|
value2 = absStep;
|
|
mode = 3;
|
|
}
|
|
} else if (loop->unknownCondition == ENOTEQU) {
|
|
branchOpcode = PC_BT;
|
|
branchCondition = 2;
|
|
if (loop->step > 0) {
|
|
if (loop->lowerType == LOOP_BOUND_CONSTANT) {
|
|
value1 = loop->lower;
|
|
reg1 = addiInstr->args[2].data.reg.reg;
|
|
value2 = absStep - 1 - loop->lower;
|
|
mode = 0;
|
|
} else if (loop->upperType == LOOP_BOUND_CONSTANT) {
|
|
value1 = loop->upper;
|
|
reg1 = addiInstr->args[1].data.reg.reg;
|
|
value2 = absStep - 1 + loop->upper;
|
|
mode = 1;
|
|
} else {
|
|
reg1 = addiInstr->args[1].data.reg.reg;
|
|
reg2 = addiInstr->args[2].data.reg.reg;
|
|
value2 = absStep - 1;
|
|
mode = 2;
|
|
}
|
|
} else {
|
|
if (loop->lowerType == LOOP_BOUND_CONSTANT) {
|
|
value1 = loop->lower;
|
|
reg1 = addiInstr->args[2].data.reg.reg;
|
|
value2 = absStep - 1 + loop->lower;
|
|
mode = 1;
|
|
} else if (loop->upperType == LOOP_BOUND_CONSTANT) {
|
|
value1 = loop->upper;
|
|
reg1 = addiInstr->args[1].data.reg.reg;
|
|
value2 = absStep - 1 - loop->upper;
|
|
mode = 0;
|
|
} else {
|
|
reg1 = addiInstr->args[1].data.reg.reg;
|
|
reg2 = addiInstr->args[2].data.reg.reg;
|
|
value2 = absStep - 1;
|
|
mode = 3;
|
|
}
|
|
}
|
|
}
|
|
|
|
while (1) {
|
|
instr = loop->body->firstPCode;
|
|
#line 1857
|
|
CError_ASSERT(instr);
|
|
|
|
if (instr->op == PC_CMP || instr->op == PC_CMPI || instr->op == PC_CMPLI || instr->op == PC_CMPL)
|
|
break;
|
|
|
|
deletepcode(instr);
|
|
insertpcodebefore(loop->preheader->lastPCode, instr);
|
|
loop->bodySize--;
|
|
}
|
|
|
|
// build a value for mtctr
|
|
counterReg = used_virtual_registers[RegClass_GPR]++;
|
|
|
|
if (mode == 1) {
|
|
if (value2 == 0) {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_NEG, counterReg, reg1));
|
|
} else if (FITS_IN_SHORT(value2)) {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_SUBFIC, counterReg, reg1, value2));
|
|
} else {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_LIS, counterReg, 0, HIGH_PART(value2)));
|
|
if (value2 != 0) {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2)));
|
|
}
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_SUBF, counterReg, reg1, counterReg));
|
|
}
|
|
} else if (mode == 0) {
|
|
if (value2 == 0) {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_MR, counterReg, reg1));
|
|
} else if (FITS_IN_SHORT(value2)) {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_ADDI, counterReg, reg1, 0, value2));
|
|
} else {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_ADDIS, counterReg, reg1, 0, HIGH_PART(value2)));
|
|
if (value2 != 0) {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2)));
|
|
}
|
|
}
|
|
} else if (mode == 2) {
|
|
if (value2 == 0) {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_SUBF, counterReg, reg1, reg2));
|
|
} else if (FITS_IN_SHORT(value2)) {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_ADDI, counterReg, reg2, 0, value2));
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_SUBF, counterReg, reg1, counterReg));
|
|
} else {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_ADDIS, counterReg, reg2, 0, HIGH_PART(value2)));
|
|
if (value2 != 0) {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2)));
|
|
}
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_SUBF, counterReg, reg1, counterReg));
|
|
}
|
|
} else {
|
|
if (value2 == 0) {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_SUBF, counterReg, reg2, reg1));
|
|
} else {
|
|
if (FITS_IN_SHORT(value2)) {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_ADDI, counterReg, reg1, 0, value2));
|
|
} else {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_ADDIS, counterReg, reg1, 0, HIGH_PART(value2)));
|
|
if (value2 != 0) {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_ADDI, counterReg, counterReg, 0, LOW_PART(value2)));
|
|
}
|
|
}
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_SUBF, counterReg, reg2, counterReg));
|
|
}
|
|
}
|
|
|
|
if (absStep > 1) {
|
|
unsigned char bits;
|
|
if ((bits = ispowerof2(absStep))) {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_RLWINM, counterReg, counterReg, 32 - bits, bits, 31));
|
|
} else {
|
|
int reg = used_virtual_registers[RegClass_GPR]++;
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_LI, reg, absStep));
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_DIVWU, counterReg, counterReg, reg));
|
|
}
|
|
}
|
|
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_MTCTR, counterReg));
|
|
|
|
if (mode < 2) {
|
|
if (addiInstr->op == PC_CMPL || addiInstr->op == PC_CMPLI) {
|
|
if (FITS_IN_USHORT(value1)) {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_CMPLI, 0, reg1, value1));
|
|
} else {
|
|
PCode *tmp;
|
|
tmp = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(value1));
|
|
insertpcodebefore(loop->preheader->lastPCode, tmp);
|
|
|
|
if (loop->iterationCount != 0)
|
|
insertpcodeafter(tmp,
|
|
makepcode(PC_ADDI,
|
|
tmp->args[0].data.reg.reg,
|
|
tmp->args[0].data.reg.reg,
|
|
0, LOW_PART(value1)));
|
|
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_CMPL, 0, reg1, tmp->args[0].data.reg.reg));
|
|
}
|
|
} else {
|
|
if (FITS_IN_SHORT(value1)) {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_CMPI, 0, reg1, value1));
|
|
} else {
|
|
PCode *tmp;
|
|
tmp = makepcode(PC_LIS, used_virtual_registers[RegClass_GPR]++, 0, HIGH_PART(value1));
|
|
insertpcodebefore(loop->preheader->lastPCode, tmp);
|
|
|
|
if (loop->iterationCount != 0)
|
|
insertpcodeafter(tmp,
|
|
makepcode(PC_ADDI,
|
|
tmp->args[0].data.reg.reg,
|
|
tmp->args[0].data.reg.reg,
|
|
0, LOW_PART(value1)));
|
|
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_CMP, 0, reg1, tmp->args[0].data.reg.reg));
|
|
}
|
|
}
|
|
} else {
|
|
if (addiInstr->op == PC_CMPL || addiInstr->op == PC_CMPLI) {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_CMPL, 0, reg1, reg2));
|
|
} else {
|
|
insertpcodebefore(loop->preheader->lastPCode,
|
|
makepcode(PC_CMP, 0, reg1, reg2));
|
|
}
|
|
}
|
|
|
|
if (!afterLoop)
|
|
afterLoop = loop->body->nextBlock;
|
|
|
|
instr = makepcode(branchOpcode, 0, branchCondition, afterLoop->labels);
|
|
deletepcode(loop->preheader->lastPCode);
|
|
appendpcode(loop->preheader, instr);
|
|
|
|
instr = makepcode(PC_BDNZ, loop->body->lastPCode->args[2].data.label.label);
|
|
deletepcode(loop->body->firstPCode);
|
|
deletepcode(loop->body->lastPCode);
|
|
appendpcode(loop->body, instr);
|
|
|
|
loop->preheader->successors = NULL;
|
|
ptr = &loop->body->predecessors;
|
|
while ((link = *ptr)) {
|
|
if (link->block == loop->preheader) {
|
|
*ptr = link->nextLink;
|
|
break;
|
|
}
|
|
ptr = &link->nextLink;
|
|
}
|
|
|
|
link = lalloc(sizeof(PCLink));
|
|
link->block = loop->preheader->nextBlock;
|
|
link->nextLink = loop->preheader->successors;
|
|
loop->preheader->successors = link;
|
|
|
|
link = lalloc(sizeof(PCLink));
|
|
link->block = loop->preheader;
|
|
link->nextLink = loop->preheader->nextBlock->predecessors;
|
|
loop->preheader->nextBlock->predecessors = link;
|
|
|
|
link = lalloc(sizeof(PCLink));
|
|
link->block = afterLoop;
|
|
link->nextLink = loop->preheader->successors;
|
|
loop->preheader->successors = link;
|
|
|
|
link = lalloc(sizeof(PCLink));
|
|
link->block = loop->preheader;
|
|
link->nextLink = afterLoop->predecessors;
|
|
afterLoop->predecessors = link;
|
|
|
|
for (loop = loop->parent; loop; loop = loop->parent)
|
|
loop->x4E = 1;
|
|
}
|
|
|
|
static void optimizeloop(Loop *loop) {
|
|
computeliveonexit(loop);
|
|
|
|
if (loop->x53 || loop->x54)
|
|
insertupdateinstructions(loop);
|
|
|
|
if (loop->isKnownCountingLoop) {
|
|
if (loop->iterationCount > 0) {
|
|
skiplooptest(loop);
|
|
if (!copts.optimize_for_size && !loop->x4D && !loop->x57) {
|
|
if (loop->x4F)
|
|
unrollloop(loop);
|
|
else if (!loop->x4E)
|
|
unrollloopconditional(loop);
|
|
}
|
|
}
|
|
|
|
if (loop->iterationCount != 0) {
|
|
if (loop->iterationCount == 1)
|
|
deleteloopheader(loop);
|
|
else if (!loop->x4E && !loop->x4D)
|
|
rewriteloopwithBDNZ(loop);
|
|
}
|
|
|
|
optimizedloops = 1;
|
|
} else if (loop->isUnknownCountingLoop && !loop->x4E && !loop->x4D) {
|
|
rewriteunknownloopwithBDNZ(loop);
|
|
if (copts.ppc_unroll_speculative && !copts.optimize_for_size) {
|
|
if (loop->x4F && !loop->x57 && !loop->x52)
|
|
unrollunknownBDNZ(loop);
|
|
}
|
|
optimizedloops = 1;
|
|
}
|
|
|
|
eliminateinductionvariables(loop);
|
|
}
|
|
|
|
typedef struct LocalArray {
|
|
struct LocalArray *next;
|
|
Object *object;
|
|
unsigned int invalid:1;
|
|
unsigned int isFloat:1;
|
|
unsigned int isSigned:1;
|
|
SInt32 elementSize;
|
|
SInt32 arraySize;
|
|
SInt32 elementCount;
|
|
int totalUses;
|
|
int elements[1];
|
|
} LocalArray;
|
|
|
|
static LocalArray *scanforlocalarrays(void) {
|
|
SInt32 elementCount;
|
|
LocalArray *head;
|
|
LocalArray *array;
|
|
ObjectList *list;
|
|
int i;
|
|
SInt32 arraySize;
|
|
SInt32 elementSize;
|
|
|
|
head = NULL;
|
|
|
|
for (list = locals; list; list = list->next) {
|
|
if (
|
|
list->object &&
|
|
!(IS_TYPE_POINTER(list->object->type) ? (TPTR_QUAL(list->object->type) & Q_VOLATILE) : (list->object->qual & Q_VOLATILE)) &&
|
|
list->object->type &&
|
|
IS_TYPE_ARRAY(list->object->type) &&
|
|
TPTR_TARGET(list->object->type) &&
|
|
TPTR_TARGET(list->object->type)->type < TYPESTRUCT
|
|
) {
|
|
arraySize = list->object->type->size;
|
|
elementSize = TPTR_TARGET(list->object->type)->size;
|
|
elementCount = arraySize / elementSize;
|
|
if (elementCount > 0 && elementCount <= 8) {
|
|
array = oalloc(sizeof(int) * (elementCount - 1) + sizeof(LocalArray));
|
|
array->next = head;
|
|
head = array;
|
|
|
|
array->object = list->object;
|
|
array->elementSize = elementSize;
|
|
array->arraySize = arraySize;
|
|
array->elementCount = elementCount;
|
|
array->totalUses = 0;
|
|
array->isSigned = 1;
|
|
array->isFloat = IS_TYPE_FLOAT(TPTR_TARGET(list->object->type));
|
|
array->invalid = 0;
|
|
array->isSigned = 1;
|
|
if (!array->isFloat && is_unsigned(TPTR_TARGET(list->object->type)))
|
|
array->isSigned = 0;
|
|
|
|
for (i = 0; i < elementCount; i++) {
|
|
array->elements[i] = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return head;
|
|
}
|
|
|
|
static LocalArray *lookup_array_object(LocalArray *arrays, Object *object) {
|
|
while (arrays) {
|
|
if (arrays->object == object)
|
|
return arrays;
|
|
arrays = arrays->next;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
void changearraytoregisters(void) {
|
|
LocalArray *arrays;
|
|
PCodeBlock *block;
|
|
PCode *instr;
|
|
int i;
|
|
PCodeArg *op;
|
|
LocalArray **ptr;
|
|
LocalArray *array;
|
|
int intCounter;
|
|
int floatCounter;
|
|
int reg;
|
|
int reg2;
|
|
PCode *newInstr;
|
|
|
|
arrays = NULL;
|
|
if ((arrays = scanforlocalarrays())) {
|
|
for (block = pcbasicblocks; block; block = block->nextBlock) {
|
|
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
|
|
if (!(instr->flags & fPCodeFlag1) && instr->argCount) {
|
|
op = instr->args;
|
|
i = instr->argCount;
|
|
while (i--) {
|
|
if (
|
|
op->kind == PCOp_MEMORY &&
|
|
(PCOpMemoryArg) op->arg == PCOpMemory1 &&
|
|
(array = lookup_array_object(arrays, op->data.mem.obj)) &&
|
|
!array->invalid
|
|
) {
|
|
if (
|
|
(instr->flags & (fPCodeFlag2 | fPCodeFlag4)) &&
|
|
(op->data.mem.offset % array->elementSize) == 0 &&
|
|
op->data.mem.offset < array->arraySize
|
|
) {
|
|
switch (instr->op) {
|
|
case PC_LBZ:
|
|
case PC_STB:
|
|
if (array->elementSize != 1)
|
|
array->invalid = 1;
|
|
break;
|
|
case PC_LHZ:
|
|
case PC_LHA:
|
|
case PC_STH:
|
|
if (array->elementSize != 2)
|
|
array->invalid = 1;
|
|
break;
|
|
case PC_LWZ:
|
|
case PC_STW:
|
|
if (array->elementSize != 4)
|
|
array->invalid = 1;
|
|
break;
|
|
case PC_LFD:
|
|
case PC_STFD:
|
|
if (array->elementSize != 8)
|
|
array->invalid = 1;
|
|
break;
|
|
default:
|
|
array->invalid = 1;
|
|
break;
|
|
}
|
|
|
|
if (!array->invalid)
|
|
array->elements[op->data.mem.offset / array->elementSize]++;
|
|
} else {
|
|
array->invalid = 1;
|
|
}
|
|
}
|
|
op++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
intCounter = 0;
|
|
floatCounter = 0;
|
|
ptr = &arrays;
|
|
array = *ptr;
|
|
while (array) {
|
|
if (array->invalid) {
|
|
*ptr = array = array->next;
|
|
continue;
|
|
}
|
|
|
|
if (array->isFloat)
|
|
floatCounter += array->elementCount;
|
|
if (!array->isFloat)
|
|
intCounter += array->elementCount;
|
|
|
|
for (i = 0; i < array->elementCount; i++)
|
|
array->totalUses += array->elements[i];
|
|
|
|
array = array->next;
|
|
}
|
|
|
|
if (arrays) {
|
|
while (intCounter > 8) {
|
|
LocalArray *best;
|
|
int score;
|
|
score = 0;
|
|
best = NULL;
|
|
for (array = arrays; array; array = array->next) {
|
|
if (!array->isFloat) {
|
|
if (best) {
|
|
if (array->totalUses < score) {
|
|
score = array->totalUses;
|
|
best = array;
|
|
}
|
|
} else {
|
|
best = array;
|
|
score = array->totalUses;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!best)
|
|
break;
|
|
|
|
if (best == arrays) {
|
|
arrays = best->next;
|
|
} else {
|
|
for (array = arrays; array; array = array->next) {
|
|
if (array->next == best) {
|
|
array->next = best->next;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
intCounter -= best->elementCount;
|
|
}
|
|
|
|
while (floatCounter > 8) {
|
|
LocalArray *best;
|
|
int score;
|
|
score = 0;
|
|
best = NULL;
|
|
for (array = arrays; array; array = array->next) {
|
|
if (array->isFloat) {
|
|
if (best) {
|
|
if (array->totalUses < score) {
|
|
score = array->totalUses;
|
|
best = array;
|
|
}
|
|
} else {
|
|
best = array;
|
|
score = array->totalUses;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!best)
|
|
break;
|
|
|
|
if (best == arrays) {
|
|
arrays = best->next;
|
|
} else {
|
|
for (array = arrays; array; array = array->next) {
|
|
if (array->next == best) {
|
|
array->next = best->next;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
floatCounter -= best->elementCount;
|
|
}
|
|
|
|
#line 2394
|
|
CError_ASSERT(intCounter <= 8 && floatCounter <= 8);
|
|
|
|
if (!arrays)
|
|
return;
|
|
|
|
optimizedloop_trans_regs = 1;
|
|
|
|
for (array = arrays; array; array = array->next) {
|
|
for (i = 0; i < array->elementCount; i++) {
|
|
if (array->isFloat)
|
|
array->elements[i] = used_virtual_registers[RegClass_FPR]++;
|
|
else
|
|
array->elements[i] = used_virtual_registers[RegClass_GPR]++;
|
|
}
|
|
}
|
|
|
|
for (block = pcbasicblocks; block; block = block->nextBlock) {
|
|
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
|
|
if (
|
|
!(instr->flags & fPCodeFlag1) &&
|
|
instr->argCount &&
|
|
(instr->flags & (fPCodeFlag2 | fPCodeFlag4)) &&
|
|
instr->args[2].kind == PCOp_MEMORY &&
|
|
(PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 &&
|
|
(array = lookup_array_object(arrays, instr->args[2].data.mem.obj)) &&
|
|
!(array->invalid)
|
|
)
|
|
{
|
|
reg2 = instr->args[0].data.reg.reg;
|
|
reg = array->elements[instr->args[2].data.mem.offset / array->elementSize];
|
|
newInstr = NULL;
|
|
switch (instr->op) {
|
|
case PC_LBZ:
|
|
if (array->isSigned)
|
|
newInstr = makepcode(PC_MR, reg2, reg);
|
|
else
|
|
newInstr = makepcode(PC_RLWINM, reg2, reg, 0, 24, 31);
|
|
break;
|
|
case PC_STB:
|
|
if (array->isSigned)
|
|
newInstr = makepcode(PC_EXTSB, reg, reg2);
|
|
else
|
|
newInstr = makepcode(PC_RLWINM, reg, reg2, 0, 24, 31);
|
|
break;
|
|
case PC_LHZ:
|
|
newInstr = makepcode(PC_RLWINM, reg2, reg, 0, 16, 31);
|
|
break;
|
|
case PC_LHA:
|
|
newInstr = makepcode(PC_EXTSH, reg2, reg);
|
|
break;
|
|
case PC_STH:
|
|
if (array->isSigned)
|
|
newInstr = makepcode(PC_EXTSH, reg, reg2);
|
|
else
|
|
newInstr = makepcode(PC_RLWINM, reg, reg2, 0, 16, 31);
|
|
break;
|
|
case PC_LWZ:
|
|
newInstr = makepcode(PC_MR, reg2, reg);
|
|
break;
|
|
case PC_STW:
|
|
newInstr = makepcode(PC_MR, reg, reg2);
|
|
break;
|
|
case PC_LFD:
|
|
newInstr = makepcode(PC_FMR, reg2, reg);
|
|
break;
|
|
case PC_STFD:
|
|
newInstr = makepcode(PC_FMR, reg, reg2);
|
|
break;
|
|
default:
|
|
#line 2494
|
|
CError_FATAL();
|
|
break;
|
|
}
|
|
|
|
if (newInstr) {
|
|
insertpcodebefore(instr, newInstr);
|
|
deletepcode(instr);
|
|
instr = newInstr;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
freeoheap();
|
|
}
|
|
|
|
static void optimizenestedloops(Loop *loop) {
|
|
while (loop) {
|
|
if (loop->children)
|
|
optimizenestedloops(loop->children);
|
|
optimizeloop(loop);
|
|
loop = loop->nextSibling;
|
|
}
|
|
}
|
|
|
|
void optimizeloops(void) {
|
|
optimizedloops = 0;
|
|
optimizedloop_full_unroll = 0;
|
|
optimizedloop_trans_regs = 0;
|
|
if (loopsinflowgraph) {
|
|
computeusedefchains(0);
|
|
last_virtual_GPR = used_virtual_registers[RegClass_GPR];
|
|
liveonexit = oalloc(4 * ((used_virtual_registers[RegClass_GPR] + 31) >> 5));
|
|
inductionvariables = oalloc(4 * ((last_virtual_GPR + 31) >> 5));
|
|
optimizenestedloops(loopsinflowgraph);
|
|
freeoheap();
|
|
}
|
|
}
|
|
|