MWCC/compiler_and_linker/FrontEnd/Optimizer/IroUnrollLoop.c

2306 lines
74 KiB
C

#include "IroUnrollLoop.h"
#include "compiler/CError.h"
#include "IroFlowgraph.h"
#include "IroLinearForm.h"
#include "IroUtil.h"
#include "compiler/LoopDetection.h"
#include "IroLoop.h"
#include "IroDump.h"
#include "IroVars.h"
#include "compiler/CFunc.h"
#include "compiler/CMachine.h"
#ifdef __MWERKS__
#pragma options align=mac68k
#endif
typedef struct LoopList {
UInt8 flags;
BitVector *bv;
struct LoopList *next;
IRONode *fnode;
int xE;
} LoopList;
#ifdef __MWERKS__
#pragma options align=reset
#endif
// forward decls
static void IRO_FindLoops_Unroll(void);
static void LoopUnroll(int count, IRONode *fnode);
static int IsLoopUnrollable(IROLoop *loop);
static int IsDifferenceOfTermsConstant(IROAddrRecord *lowerRec, IROAddrRecord *upperRec, int isUnsigned, CInt64 *pval);
static IROLinear *BuildOrigIterationCount_DoWhile(IROList *list, IROLoop *loop);
static IROLinear *BuildPreAlignTemp(IROLoopInd *ind, UInt32 unrollFactor, IROList *list);
static IROLinear *BuildNewFinalvalue_DoWhile(IROLinear *iterCount, UInt32 unrollFactor, IROList *list, IROLoop *loop);
static IROLinear *BuildUnrolledFinalvalue_DoWhile(IROLinear *iterCount, UInt32 unrollFactor, IROList *list, IROLoop *loop);
void IRO_LoopUnroller(void) {
VectorPhaseCalledFromUnroll = 1;
IRO_FindLoops_Unroll();
IRO_CheckForUserBreak();
}
static void IRO_FindLoops_Unroll(void) {
IRONode *fnode;
IRONode *pred;
UInt16 i;
UInt16 flag;
LoopList *list;
LoopList *list2;
fnode = IRO_FirstNode;
LoopList_First = NULL;
while (fnode) {
flag = 0;
for (i = 0; i < fnode->numpred; i++) {
pred = IRO_NodeTable[fnode->pred[i]];
if (Bv_IsBitSet(fnode->index, pred->dom)) {
if (!flag) {
Bv_AllocVector(&InLoop, IRO_NumNodes + 1);
Bv_Clear(InLoop);
Bv_SetBit(fnode->index, InLoop);
}
flag = 1;
Bv_SetBit(pred->index, InLoop);
if (pred != fnode)
AddPreds(pred);
}
}
if (flag) {
if (!LoopList_First) {
list = oalloc(sizeof(LoopList));
list->next = NULL;
} else {
list = oalloc(sizeof(LoopList));
list->next = LoopList_First;
}
LoopList_First = list;
Bv_AllocVector(&list->bv, IRO_NumNodes + 1);
list->flags |= 1;
Bv_Copy(InLoop, list->bv);
list->fnode = fnode;
list->xE = 0;
}
fnode = fnode->nextnode;
}
list = LoopList_First;
Bv_AllocVector(&LoopTemp, IRO_NumNodes + 1);
while (list) {
for (list2 = LoopList_First; list2; list2 = list2->next) {
if (list2 != list) {
IRO_Dump(" header = %d \n", list2->fnode->index);
IRO_Dump(" l1 bit vector=\n");
IRO_DumpBits("", list2->bv);
IRO_Dump(" l bit vector=\n");
IRO_DumpBits("", list->bv);
if (Bv_IsSubset(list->bv, list2->bv))
list2->flags &= ~1;
}
}
list = list->next;
}
for (list = LoopList_First; list; list = list->next) {
if (list->flags & 1) {
IRONode *listfnode;
Bv_Copy(list->bv, InLoop);
listfnode = list->fnode;
IRO_Dump("IRO_FindLoops_Unroll:Found loop with header %d\n", listfnode->index);
IRO_DumpBits("Loop includes: ", InLoop);
LoopUnroll(copts.unrollfactor, listfnode);
IRO_UpdateFlagsOnInts();
}
}
}
static int CheckConstant(CInt64 a, CInt64 b, CInt64 *result) {
CInt64 shl = cint64_zero;
CInt64 work = cint64_zero;
CInt64 and = cint64_zero;
CInt64 i;
for (i = cint64_zero; CInt64_Less(i, a); i = CInt64_Add(i, cint64_one)) {
shl = CInt64_Shl(b, i);
and = CInt64_And(shl, work);
if (CInt64_NotEqual(and, cint64_zero))
return 0;
work = CInt64_Or(shl, work);
}
*result = work;
return 1;
}
typedef struct LoopPattern {
IROLinear *nd0;
IROLinear *nd4;
Type *type;
IROLinear *ndC;
IROLinear *nd10;
CInt64 val14;
CInt64 val1C;
} LoopPattern;
static void UnrollWhileLoopBody(IRONode *header, IRONode *fnode2, IRONode *fnode3, IROLoop *loop, LoopPattern *pattern, UInt32 unrollFactor) {
IRONode *scan;
int pass;
IROLinear *firstnode;
IROLinear *lastnd;
IROLinear *nd;
IROLinear *nd1;
IROLinear *nd2;
IROLinear *nd3;
IROLinear *nd4;
IROLinear *nd5;
IROLinear *nd6;
IROLinear *nd8;
IROLinear *nd7;
ENode *expr;
IROList list;
CInt64 zero;
CInt64 shiftval;
CInt64_SetLong(&zero, 0);
pass = 0;
do {
firstnode = NULL;
for (scan = fnode3; scan && scan != header; scan = scan->nextnode) {
IRO_InitList(&list);
lastnd = scan->last;
nd = scan->first;
while (1) {
if (nd->stmt)
nd->stmt->flags |= StmtFlag_10;
if (
(nd->index < loop->index20 || nd->index > loop->index24) &&
nd->type != IROLinearLabel &&
nd->type != IROLinearNop &&
!(nd->flags & IROLF_Reffed)
)
{
CError_ASSERT(345, nd->nodetype == EORASS || nd->nodetype == EANDASS || nd->nodetype == EXORASS);
IRO_DuplicateExpr(pattern->nd0, &list);
nd1 = list.tail;
shiftval = cint64_one;
shiftval = CInt64_Shl(shiftval, pattern->val1C);
nd2 = IRO_NewLinear(IROLinearOperand);
nd2->index = ++IRO_NumLinear;
nd2->rtype = pattern->nd0->rtype;
expr = IRO_NewENode(EINTCONST);
expr->rtype = pattern->nd0->rtype;
CInt64_SetLong(&expr->data.intval, pass * CInt64_GetULong(&shiftval));
nd2->u.node = expr;
IRO_AddToList(nd2, &list);
IRO_DuplicateExpr(pattern->nd4, &list);
nd3 = IRO_NewLinear(IROLinearOp2Arg);
nd3->index = ++IRO_NumLinear;
nd3->nodetype = EADD;
nd3->rtype = pattern->type;
nd3->u.diadic.left = list.tail;
nd3->u.diadic.right = nd2;
IRO_AddToList(nd3, &list);
nd4 = IRO_NewLinear(IROLinearOp2Arg);
nd4->index = ++IRO_NumLinear;
nd4->nodetype = EADD;
nd4->rtype = pattern->type;
nd4->u.diadic.left = nd3;
nd4->u.diadic.right = nd1;
IRO_AddToList(nd4, &list);
nd5 = IRO_NewLinear(IROLinearOp1Arg);
nd5->index = ++IRO_NumLinear;
nd5->nodetype = EINDIRECT;
nd5->rtype = nd->rtype;
nd5->u.monadic = nd4;
IRO_AddToList(nd5, &list);
nd6 = IRO_NewLinear(IROLinearOp2Arg);
*nd6 = *nd;
nd6->index = ++IRO_NumLinear;
nd6->u.diadic.left = list.tail;
nd6->next = NULL;
nd7 = IRO_NewLinear(IROLinearOperand);
nd7->index = ++IRO_NumLinear;
nd7->rtype = pattern->ndC->rtype;
expr = IRO_NewENode(EINTCONST);
expr->rtype = pattern->ndC->rtype;
nd7->u.node = expr;
nd7->next = NULL;
expr->data.intval = pattern->val14;
if (
IS_LINEAR_DIADIC(nd, EANDASS) &&
CInt64_Equal(pattern->val14, cint64_zero)
)
{
nd6->nodetype = EASS;
} else if (
IS_LINEAR_DIADIC(nd, EORASS) &&
!CTool_EndianReadWord32(&pattern->val14.hi)
)
{
UInt32 tmp = CInt64_GetULong(&pattern->val14);
if (
(nd->rtype->size == 1 && tmp == 0xFF) ||
(nd->rtype->size == 2 && tmp == 0xFFFF) ||
(nd->rtype->size == 4 && tmp == 0xFFFFFFFF)
)
{
nd6->nodetype = EASS;
}
}
IRO_AddToList(nd7, &list);
if (IS_LINEAR_MONADIC(pattern->nd10, ETYPCON)) {
nd8 = IRO_NewLinear(IROLinearOp1Arg);
*nd8 = *pattern->nd10;
nd8->index = ++IRO_NumLinear;
nd8->u.monadic = nd7;
nd8->next = NULL;
IRO_AddToList(nd8, &list);
} else {
nd8 = nd7;
}
nd6->u.diadic.right = nd8;
IRO_AddToList(nd6, &list);
if (!firstnode)
firstnode = list.head;
}
if (nd == lastnd)
break;
nd = nd->next;
}
if (list.head && list.tail)
IRO_Paste(list.head, list.tail, fnode2->last);
}
} while (++pass < 8);
}
static int PatternMatchLoop(IRONode *fnode, IROLoop *loop, IROLoopInd *ind, UInt32 *unrollFactor, SInt32 *result1, SInt32 *result2, LoopPattern *pattern) {
IROLinear *scan;
IROLinear *varnode;
IROLinear *nd1;
IROLinear *nd2;
IROLinear *left1;
IROLinear *left2;
IROLinear *right1;
IROLinear *right2;
Object *obj1;
Object *obj2;
CInt64 shl;
CInt64 val;
*result1 = 0;
*result2 = 0;
if ((scan = fnode->first)) {
while (1) {
if (
(scan->index < loop->index20 || scan->index > loop->index24) &&
!(scan->flags & IROLF_Reffed) &&
scan->type != IROLinearNop &&
scan->type != IROLinearLabel
)
{
if (IS_LINEAR_DIADIC_3(scan, EORASS, EXORASS, EANDASS)) {
(*result2)++;
if (IS_LINEAR_MONADIC(scan->u.diadic.left, EINDIRECT)) {
varnode = scan->u.diadic.left->u.monadic;
if (IS_LINEAR_DIADIC(varnode, EADD)) {
pattern->nd4 = varnode->u.diadic.left;
pattern->type = varnode->rtype;
if (IRO_IsVariable(varnode->u.diadic.left)) {
pattern->nd0 = varnode->u.diadic.right;
if (
IS_LINEAR_DIADIC(pattern->nd0, ESHL) &&
IRO_IsConstant(pattern->nd0->u.diadic.right)
)
{
pattern->val1C = pattern->nd0->u.diadic.right->u.node->data.intval;
nd1 = pattern->nd0->u.diadic.left;
} else {
return 0;
}
} else {
return 0;
}
} else {
return 0;
}
} else {
return 0;
}
pattern->nd10 = scan->u.diadic.right;
if (IS_LINEAR_MONADIC(pattern->nd10, ETYPCON)) {
if (IS_LINEAR_DIADIC(scan, EANDASS)) {
if (IS_LINEAR_MONADIC(pattern->nd10->u.monadic, EBINNOT)) {
pattern->ndC = pattern->nd10->u.monadic->u.monadic;
} else {
return 0;
}
} else {
pattern->ndC = pattern->nd10->u.monadic;
}
if (IS_LINEAR_DIADIC(pattern->ndC, ESHL) && IRO_IsConstant(pattern->ndC->u.diadic.left)) {
val = pattern->ndC->u.diadic.left->u.node->data.intval;
nd2 = pattern->ndC->u.diadic.right;
} else {
return 0;
}
} else if (IS_LINEAR_DIADIC(pattern->nd10, ESHL) && IS_LINEAR_DIADIC_2(scan, EORASS, EXORASS)) {
pattern->ndC = pattern->nd10;
if (IRO_IsConstant(pattern->ndC->u.diadic.left)) {
val = pattern->ndC->u.diadic.left->u.node->data.intval;
nd2 = pattern->ndC->u.diadic.right;
} else {
return 0;
}
} else if (IS_LINEAR_MONADIC(pattern->nd10, EBINNOT) && IS_LINEAR_DIADIC(scan, EANDASS)) {
pattern->ndC = pattern->nd10->u.monadic;
if (IS_LINEAR_DIADIC(pattern->ndC, ESHL) && IRO_IsConstant(pattern->ndC->u.diadic.left)) {
val = pattern->ndC->u.diadic.left->u.node->data.intval;
nd2 = pattern->ndC->u.diadic.right;
} else {
return 0;
}
} else {
return 0;
}
if (IS_LINEAR_DIADIC(nd2, EAND) && IS_LINEAR_DIADIC(nd1, ESHR)) {
left1 = nd1->u.diadic.left;
left2 = nd2->u.diadic.left;
obj1 = IRO_IsVariable(left1);
obj2 = IRO_IsVariable(left2);
if (obj1 == obj2 && obj1 == ind->var->object) {
right1 = nd1->u.diadic.right;
right2 = nd2->u.diadic.right;
if (IRO_IsConstant(right1) && IRO_IsConstant(right2)) {
shl = cint64_one;
shl = CInt64_Shl(shl, right1->u.node->data.intval);
shl = CInt64_Sub(shl, cint64_one);
if (CInt64_Equal(shl, right2->u.node->data.intval)) {
if (CTool_EndianReadWord32(&shl.hi) == 0) {
*unrollFactor = CInt64_GetULong(&shl) + 1;
if (CheckConstant(CInt64_Add(shl, cint64_one), val, &pattern->val14)) {
(*result1)++;
if (IS_LINEAR_DIADIC(scan, EANDASS))
pattern->val14 = CInt64_Not(pattern->val14);
}
} else {
return 0;
}
} else {
return 0;
}
} else {
return 0;
}
} else {
return 0;
}
} else {
return 0;
}
} else {
return 0;
}
}
if (scan == fnode->last)
break;
scan = scan->next;
}
}
return 1;
}
static UInt32 UnrollWhileLoop(IRONode *header, IRONode *fnode2, IRONode *fnode3, IROLoop *loop, UInt32 unrollFactor) {
IROLoopInd *ind;
IRONode *scan;
CLabel *lastlabel;
IROLinear *lastlabelnode;
IROLinear *earlyLoopExitTest;
CLabel *earlyLoopExitTestLabel;
IROLinear *origIterationCount;
IROLinear *unrolledFinalValue;
IROLinear *preAlignTemp;
IROLinear *newFinalValue;
IROLinear *savedHead60;
IROLinear *unrolledBodyEntryTest;
CLabel *label;
IROLinear *savedHead2;
IROLinear *loophead25;
IROLinear *loopend;
IROLinear *loopscan;
IROLinear *indvar;
IROLinear *less;
IROLinear *loopExitTest;
IROLinear *saveTail;
CLabel *label2;
IROLinear *gotond;
CLabel *label3;
IROLinear *savedHead3;
IROLinear *updIndInc;
IROLinear *label2nd;
IROLinear *less2;
IROLinear *saveTail2;
IROLinear *less3;
IROLinear *wtf;
IROLinear *constnd;
IROLinear *ass;
IROLinear *nd18;
IRONode *fn19;
IRONode *newfnode1;
IRONode *newfnode2;
IRONode *newfnode3;
IRONode *newfnode4;
IRONode *newfnode5;
IRONode *newfnode6;
IRONode *newfnode7;
IRONode *newfnode8;
IROLinear *lastnd;
ENode *expr;
SInt32 result1;
SInt32 result2;
LoopPattern pattern;
IROList list;
IRO_Dump("while(n--) loop \n");
if (loop->flags & LoopFlags_800) {
IRO_Dump("loop not unrolled because induction used in loop \n");
return 0;
}
if (loop->flags & LoopFlags_1000) {
IRO_Dump("loop not unrolled because loop has multiple exits \n");
return 0;
}
if (!(loop->flags & LP_HAS_MULTIPLE_INDUCTIONS))
return 0;
for (ind = FirstInd; ind; ind = ind->next) {
if ((ind->flags & LoopInd_HasMod) && (ind->flags & LoopInd_HasDiv))
break;
}
if (!ind) {
IRO_Dump("Could not find loop with and induction with MOD and DIV operation\n");
return 0;
}
if (!IRO_IsUnsignedType(ind->nd->rtype))
return 0;
if (ind->nd->type == IROLinearOp2Arg) {
if (ind->nd->nodetype == EADDASS && IRO_IsConstant(ind->nd->u.diadic.right)) {
if (ind->addConst != 1)
return 0;
} else if (ind->nd->nodetype == EASS) {
if (
ind->nd->u.diadic.right->type != IROLinearOp2Arg ||
ind->nd->u.diadic.right->nodetype != EADD ||
!IRO_IsConstant(ind->nd->u.diadic.right->u.diadic.right)
)
return 0;
if (ind->addConst != 1)
return 0;
} else {
return 0;
}
} else if (ind->nd->type == IROLinearOp1Arg && ind->nd->nodetype != EPREINC && ind->nd->nodetype != EPOSTINC) {
return 0;
}
loop->induction = ind;
loop->index24 = ind->nd->index;
loop->index20 = IRO_FindStart(ind->nd)->index;
scan = IRO_FirstNode;
memset(&pattern, 0, sizeof(pattern));
while (scan) {
if (Bv_IsBitSet(scan->index, InLoop) && scan != header) {
if (!PatternMatchLoop(scan, loop, ind, &unrollFactor, &result1, &result2, &pattern))
return 0;
}
scan = scan->nextnode;
}
if (result1 > 1 || result2 > 1)
return 0;
lastlabel = fnode2->last->u.label.label;
lastlabelnode = IRO_FindLabelNode(fnode2->last->u.label.label, fnode2->last);
IRO_InitList(&list);
IRO_DuplicateExprRange(lastlabelnode->next, LoopNode->last->u.label.x4->u.diadic.left, &list);
IRO_DuplicateExpr(LoopNode->last->u.label.x4, &list);
IRO_Paste(list.head, list.tail, fnode2->last);
lastlabelnode = list.tail;
IRO_InitList(&list);
earlyLoopExitTest = BuildEarlyLoopExitTest(LoopNode->last->type, &list);
earlyLoopExitTestLabel = IRO_NewLabel();
earlyLoopExitTest->u.label.label = earlyLoopExitTestLabel;
earlyLoopExitTest->u.label.x4 = lastlabelnode;
earlyLoopExitTest->u.label.x4->flags |= IROLF_Reffed;
earlyLoopExitTest->rtype = LoopNode->last->rtype;
IRO_Paste(list.head, list.tail, fnode2->last);
IRO_InitList(&list);
origIterationCount = BuildOrigIterationCount_DoWhile(&list, loop);
IRO_Paste(list.head, list.tail, fnode2->last);
savedHead60 = list.head;
IRO_InitList(&list);
preAlignTemp = BuildPreAlignTemp(ind, unrollFactor, &list);
IRO_Paste(list.head, list.tail, fnode2->last);
IRO_InitList(&list);
unrolledFinalValue = BuildUnrolledFinalvalue_DoWhile(origIterationCount, unrollFactor, &list, loop);
IRO_Paste(list.head, list.tail, fnode2->last);
IRO_InitList(&list);
newFinalValue = BuildNewFinalvalue_DoWhile(origIterationCount, unrollFactor, &list, loop);
IRO_Paste(list.head, list.tail, fnode2->last);
IRO_InitList(&list);
BuildUnrolledBodyEntryTest(&list, origIterationCount, unrollFactor, lastlabel);
IRO_Paste(list.head, list.tail, fnode2->last);
unrolledBodyEntryTest = list.tail;
IRO_InitList(&list);
label = BuildLabel(&list);
IRO_Paste(list.head, list.tail, fnode2->last);
savedHead2 = list.head;
loophead25 = NULL;
for (scan = fnode3; scan && scan != header; scan = scan->nextnode) {
IRO_InitList(&list);
loopend = scan->last;
loopscan = scan->first;
while (1) {
if (loopscan->stmt)
loopscan->stmt->flags |= StmtFlag_10;
if (loopscan->type != IROLinearLabel && !(loopscan->flags & IROLF_Reffed)) {
IRO_DuplicateExpr(loopscan, &list);
if (!loophead25)
loophead25 = list.head;
}
if (loopscan == loopend)
break;
loopscan = loopscan->next;
}
if (list.head && list.tail)
IRO_Paste(list.head, list.tail, fnode2->last);
}
IRO_InitList(&list);
if (ind->nd->type == IROLinearOp1Arg)
IRO_DuplicateExpr(ind->nd->u.monadic, &list);
else
IRO_DuplicateExpr(ind->nd->u.diadic.left, &list);
list.tail->flags &= ~IROLF_Assigned;
indvar = list.tail;
IRO_DuplicateExpr(preAlignTemp, &list);
list.tail->flags &= ~IROLF_Assigned;
less = IRO_NewLinear(IROLinearOp2Arg);
less->nodetype = ELESS;
less->rtype = TYPE(&stbool);
less->index = ++IRO_NumLinear;
less->next = NULL;
less->u.diadic.left = indvar;
less->u.diadic.right = list.tail;
IRO_AddToList(less, &list);
less->flags |= IROLF_Reffed;
loopExitTest = BuildLoopExitTest(LoopNode->last->type, &list);
loopExitTest->u.label.label = label;
loopExitTest->u.label.x4 = less;
loopExitTest->u.label.x4->flags |= IROLF_Reffed;
loopExitTest->rtype = LoopNode->last->rtype;
IRO_Paste(list.head, list.tail, fnode2->last);
saveTail = list.tail;
IRO_InitList(&list);
label2 = IRO_NewLabel();
gotond = IRO_NewLinear(IROLinearOp1Arg);
gotond->index = ++IRO_NumLinear;
gotond->type = IROLinearGoto;
gotond->u.label.label = label2;
IRO_AddToList(gotond, &list);
IRO_Paste(list.head, list.tail, fnode2->last);
IRO_InitList(&list);
label3 = BuildLabel(&list);
IRO_Paste(list.head, list.tail, fnode2->last);
savedHead3 = list.head;
UnrollWhileLoopBody(header, fnode2, fnode3, loop, &pattern, unrollFactor);
updIndInc = UpdateInductionIncrement(loop, 8 * unrollFactor, fnode2->last);
IRO_InitList(&list);
label2nd = IRO_NewLinear(IROLinearLabel);
label2nd->index = IRO_NumLinear++;
label2nd->u.label.label = label2;
label2nd->flags |= IROLF_1;
IRO_AddToList(label2nd, &list);
IRO_Paste(list.head, list.tail, fnode2->last);
IRO_InitList(&list);
if (ind->nd->type == IROLinearOp1Arg)
IRO_DuplicateExpr(ind->nd->u.monadic, &list);
else
IRO_DuplicateExpr(ind->nd->u.diadic.left, &list);
list.tail->flags &= ~IROLF_Assigned;
indvar = list.tail;
IRO_DuplicateExpr(unrolledFinalValue, &list);
list.tail->flags &= ~IROLF_Assigned;
less2 = IRO_NewLinear(IROLinearOp2Arg);
less2->nodetype = ELESS;
less2->rtype = TYPE(&stbool);
less2->index = ++IRO_NumLinear;
less2->next = NULL;
less2->u.diadic.left = indvar;
less2->u.diadic.right = list.tail;
IRO_AddToList(less2, &list);
less2->flags |= IROLF_Reffed;
loopExitTest = BuildLoopExitTest(LoopNode->last->type, &list);
loopExitTest->u.label.label = label3;
loopExitTest->u.label.x4 = less2;
loopExitTest->u.label.x4->flags |= IROLF_Reffed;
loopExitTest->rtype = LoopNode->last->rtype;
IRO_Paste(list.head, list.tail, fnode2->last);
saveTail2 = list.tail;
IRO_InitList(&list);
if (ind->nd->type == IROLinearOp1Arg)
IRO_DuplicateExpr(ind->nd->u.monadic, &list);
else
IRO_DuplicateExpr(ind->nd->u.diadic.left, &list);
list.tail->flags &= ~IROLF_Assigned;
indvar = list.tail;
IRO_DuplicateExpr(newFinalValue, &list);
list.tail->flags &= ~IROLF_Assigned;
less3 = IRO_NewLinear(IROLinearOp2Arg);
less3->nodetype = ELESS;
less3->rtype = TYPE(&stbool);
less3->index = ++IRO_NumLinear;
less3->next = NULL;
less3->u.diadic.left = indvar;
less3->u.diadic.right = list.tail;
IRO_AddToList(less3, &list);
less3->flags |= IROLF_Reffed;
wtf = LoopNode->last->u.label.x4;
IRO_Paste(list.head, list.tail, LoopNode->last);
LoopNode->last->u.label.x4 = list.tail;
IRO_InitList(&list);
constnd = IRO_NewLinear(IROLinearOperand);
constnd->index = ++IRO_NumLinear;
expr = IRO_NewENode(EINTCONST);
expr->rtype = wtf->u.diadic.left->rtype;
expr->data.intval = cint64_zero;
constnd->u.node = expr;
constnd->rtype = expr->rtype;
IRO_AddToList(constnd, &list);
constnd->flags |= IROLF_Reffed;
IRO_DuplicateExpr(wtf->u.diadic.left, &list);
ass = IRO_NewLinear(IROLinearOp2Arg);
ass->nodetype = EASS;
ass->rtype = list.tail->rtype;
ass->index = ++IRO_NumLinear;
ass->next = NULL;
ass->u.diadic.left = list.tail;
ass->u.diadic.right = constnd;
IRO_AddToList(ass, &list);
ass->flags |= IROLF_Assigned;
IRO_NopOut(wtf);
fn19 = fnode2->nextnode;
nd18 = fnode2->last;
fnode2->last = earlyLoopExitTest;
newfnode1 = IRO_NewFlowGraphNode();
newfnode1->first = savedHead60;
newfnode1->last = unrolledBodyEntryTest;
fnode2->nextnode = newfnode1;
newfnode2 = IRO_NewFlowGraphNode();
newfnode2->first = savedHead2;
newfnode2->last = saveTail;
savedHead2->u.label.label->stmt = (Statement *) newfnode2;
newfnode1->nextnode = newfnode2;
newfnode3 = IRO_NewFlowGraphNode();
newfnode3->first = gotond;
newfnode3->last = gotond;
newfnode2->nextnode = newfnode3;
newfnode4 = IRO_NewFlowGraphNode();
newfnode4->first = savedHead3;
newfnode4->last = updIndInc;
savedHead3->u.label.label->stmt = (Statement *) newfnode4;
newfnode3->nextnode = newfnode4;
newfnode5 = IRO_NewFlowGraphNode();
newfnode5->first = label2nd;
newfnode5->last = saveTail2;
label2nd->u.label.label->stmt = (Statement *) newfnode5;
newfnode4->nextnode = newfnode5;
newfnode6 = IRO_NewFlowGraphNode();
newfnode6->first = nd18;
newfnode6->last = nd18;
newfnode5->nextnode = newfnode6;
newfnode6->nextnode = fn19;
newfnode7 = oalloc(sizeof(IRONode));
memset(newfnode7, 0, sizeof(IRONode));
newfnode7->index = IRO_NumNodes;
IRO_NumNodes++;
newfnode7->first = list.head;
newfnode7->last = list.tail;
list.tail->next = LoopNode->last->next;
LoopNode->last->next = list.head;
newfnode7->nextnode = LoopNode->nextnode;
LoopNode->nextnode = newfnode7;
newfnode8 = oalloc(sizeof(IRONode));
memset(newfnode8, 0, sizeof(IRONode));
newfnode8->index = IRO_NumNodes;
IRO_NumNodes++;
lastnd = IRO_NewLinear(IROLinearLabel);
lastnd->index = IRO_NumLinear++;
lastnd->next = NULL;
lastnd->u.label.label = earlyLoopExitTestLabel;
lastnd->flags |= IROLF_1;
earlyLoopExitTestLabel->stmt = (Statement *) newfnode8;
newfnode8->first = lastnd;
newfnode8->last = lastnd;
lastnd->next = newfnode7->last->next;
newfnode7->last->next = lastnd;
newfnode8->nextnode = newfnode7->nextnode;
newfnode7->nextnode = newfnode8;
return 1;
}
void IRO_IterateForLoopBody(IRONode *start, IRONode *end, IROLoop *loop, IROLinear *destnode, SInt32 addConst, CInt64 *val, Boolean funkyFlag) {
IROLinear *first = NULL;
IROLinear *last = NULL;
IRONode *fnode;
IROLinear *lastnd;
IROLinear *nd;
IROList list;
for (fnode = start; fnode && fnode != end; fnode = fnode->nextnode) {
IRO_InitList(&list);
lastnd = fnode->last;
nd = fnode->first;
while (1) {
if (nd->stmt)
nd->stmt->flags |= StmtFlag_10;
if (
(nd->index < loop->index20 || nd->index > loop->index24) &&
nd->type != IROLinearLabel &&
!(nd->flags & IROLF_Reffed)
)
{
IRO_DuplicateExpr(nd, &list);
if (!first)
first = list.head;
last = list.tail;
}
if (nd == lastnd)
break;
nd = nd->next;
}
if (list.head && list.tail)
IRO_Paste(list.head, list.tail, destnode);
}
if (funkyFlag) {
*val = CInt64_Add(*val, IRO_MakeLong(loop->induction->addConst));
ChangeInductionReference(first, last, *val, loop);
}
}
void IRO_LinearizeForLoopPostLoop(IRONode *fnode1, IRONode *fnode2, IROLoop *loop, IRONode *fnode3, UInt32 unrollFactor) {
IRONode *newfnode;
IROLinear *newnd;
SInt32 i;
CInt64 val;
newfnode = oalloc(sizeof(IRONode));
memset(newfnode, 0, sizeof(IRONode));
newfnode->index = IRO_NumNodes;
IRO_NumNodes++;
newnd = IRO_NewLinear(IROLinearNop);
newnd->index = IRO_NumLinear++;
newnd->next = NULL;
newnd->flags |= IROLF_1;
newfnode->first = newfnode->last = newnd;
newfnode->nextnode = fnode3->nextnode;
fnode3->nextnode = newfnode;
newnd->next = fnode3->last->next;
fnode3->last->next = newnd;
val = cint64_zero;
for (i = 0; i < unrollFactor; i++)
IRO_IterateForLoopBody(fnode2, fnode1, loop, newfnode->last, loop->induction->addConst, &val, i > 0);
UpdateInductionIncrement(loop, unrollFactor, newfnode->last);
}
static UInt32 UnrollForLoop(IRONode *header, IRONode *fnode2, IRONode *fnode3, IROLoop *loop, UInt32 unrollFactor) {
IROLinear *lastlabelnode;
IROLinear *earlyLoopExitTest;
IROLinear *origIterationCount;
IROLinear *saveHead1;
IROLinear *newFinalValue;
IROLinear *unrolledBodyEntryTest;
IROLinear *gotoNd;
IROLinear *saveHead2;
IROLinear *updIndInc;
IROLinear *labelNd;
IROLinear *saveTail2;
IROLinear *ndCopy;
IROLinear *saveTail3;
IROLinear *loopExitTest;
IROLinear *lastnd;
IROLinear *labelNd2;
IROLinear *saveTail4;
IROLinear *labelNd3;
IROLinear *scan;
IRONode *nd18;
IRONode *newfnode1;
IRONode *newfnode2;
IRONode *newfnode3;
IRONode *newfnode4;
IRONode *newfnode5;
IRONode *newfnode6;
CLabel *lastlabel;
CLabel *earlyLoopExitTestLabel;
CLabel *label;
CLabel *label2;
SInt32 i;
IROList list;
CInt64 iterCount;
int isConstant;
UInt32 needOrigLoop = 0;
UInt32 needUnrollBodyTest = 0;
UInt32 resetUnrolledFinalValue = 0;
SInt32 leftOver;
CInt64 val;
lastlabelnode = IRO_FindLabelNode(fnode2->last->u.label.label, fnode2->last);
lastlabel = IRO_NewLabel();
IRO_InitList(&list);
IRO_DuplicateExprRange(lastlabelnode->next, LoopNode->last->u.label.x4, &list);
IRO_DuplicateExpr(LoopNode->last->u.label.x4, &list);
IRO_Paste(list.head, list.tail, fnode2->last);
lastlabelnode = list.tail;
IRO_InitList(&list);
earlyLoopExitTest = BuildEarlyLoopExitTest(LoopNode->last->type, &list);
earlyLoopExitTestLabel = IRO_NewLabel();
earlyLoopExitTest->u.label.label = earlyLoopExitTestLabel;
earlyLoopExitTest->u.label.x4 = lastlabelnode;
earlyLoopExitTest->u.label.x4->flags |= IROLF_Reffed;
earlyLoopExitTest->rtype = LoopNode->last->rtype;
IRO_Paste(list.head, list.tail, fnode2->last);
isConstant = IsIterationCountConstant(loop, &iterCount);
needOrigLoop = 1;
needUnrollBodyTest = 1;
resetUnrolledFinalValue = 0;
if (isConstant)
IRO_TestConstantIterationCount(loop, &iterCount, 1, &unrollFactor, &leftOver, &needOrigLoop, &needUnrollBodyTest, &resetUnrolledFinalValue);
IRO_InitList(&list);
origIterationCount = BuildOrigIterationCount(&list, loop);
IRO_Paste(list.head, list.tail, fnode2->last);
saveHead1 = list.head;
IRO_InitList(&list);
newFinalValue = BuildNewFinalvalue(origIterationCount, unrollFactor, &list, loop);
IRO_Paste(list.head, list.tail, fnode2->last);
IRO_InitList(&list);
BuildUnrolledBodyEntryTest(&list, origIterationCount, unrollFactor, lastlabel);
IRO_Paste(list.head, list.tail, fnode2->last);
unrolledBodyEntryTest = list.tail;
label = IRO_NewLabel();
IRO_InitList(&list);
gotoNd = IRO_NewLinear(IROLinearOp1Arg);
gotoNd->index = ++IRO_NumLinear;
gotoNd->type = IROLinearGoto;
gotoNd->u.label.label = label;
IRO_AddToList(gotoNd, &list);
IRO_Paste(list.head, list.tail, fnode2->last);
IRO_InitList(&list);
label2 = BuildLabel(&list);
IRO_Paste(list.head, list.tail, fnode2->last);
saveHead2 = list.head;
val = cint64_zero;
for (i = 0; i < unrollFactor; i++)
IRO_IterateForLoopBody(fnode3, header, loop, fnode2->last, loop->induction->addConst, &val, i > 0);
updIndInc = UpdateInductionIncrement(loop, unrollFactor, fnode2->last);
IRO_InitList(&list);
labelNd = IRO_NewLinear(IROLinearLabel);
labelNd->index = IRO_NumLinear++;
labelNd->u.label.label = label;
labelNd->flags |= IROLF_1;
IRO_AddToList(labelNd, &list);
IRO_Paste(list.head, list.tail, fnode2->last);
IRO_InitList(&list);
IRO_DuplicateExpr(LoopNode->last->u.label.x4->u.diadic.left, &list);
saveTail2 = list.tail;
if (resetUnrolledFinalValue)
IRO_DuplicateExpr(loop->nd18->u.diadic.right, &list);
else
IRO_DuplicateExpr(newFinalValue, &list);
ndCopy = IRO_NewLinear(LoopNode->last->u.label.x4->type);
*ndCopy = *LoopNode->last->u.label.x4;
ndCopy->index = ++IRO_NumLinear;
ndCopy->next = NULL;
ndCopy->expr = NULL;
ndCopy->u.diadic.left = saveTail2;
ndCopy->u.diadic.right = list.tail;
IRO_AddToList(ndCopy, &list);
IRO_Paste(list.head, list.tail, fnode2->last);
saveTail3 = list.tail;
IRO_InitList(&list);
loopExitTest = BuildLoopExitTest(LoopNode->last->type, &list);
loopExitTest->u.label.label = label2;
loopExitTest->u.label.x4 = saveTail3;
loopExitTest->u.label.x4->flags |= IROLF_Reffed;
loopExitTest->rtype = LoopNode->last->rtype;
IRO_Paste(list.head, list.tail, fnode2->last);
saveTail4 = list.tail;
IRO_InitList(&list);
labelNd2 = IRO_NewLinear(IROLinearLabel);
labelNd2->index = IRO_NumLinear++;
labelNd2->u.label.label = lastlabel;
labelNd2->flags |= IROLF_1;
IRO_AddToList(labelNd2, &list);
IRO_Paste(list.head, list.tail, fnode2->last);
lastnd = fnode2->last;
nd18 = fnode2->nextnode;
fnode2->last = earlyLoopExitTest;
newfnode1 = IRO_NewFlowGraphNode();
newfnode1->first = saveHead1;
newfnode1->last = unrolledBodyEntryTest;
fnode2->nextnode = newfnode1;
newfnode2 = IRO_NewFlowGraphNode();
newfnode2->first = gotoNd;
newfnode2->last = gotoNd;
newfnode1->nextnode = newfnode2;
newfnode3 = IRO_NewFlowGraphNode();
newfnode3->first = saveHead2;
newfnode3->last = updIndInc;
saveHead2->u.label.label->stmt = (Statement *) newfnode3;
if (newfnode2)
newfnode2->nextnode = newfnode3;
else
newfnode1->nextnode = newfnode3;
newfnode4 = IRO_NewFlowGraphNode();
newfnode4->first = labelNd;
newfnode4->last = saveTail4;
labelNd->u.label.label->stmt = (Statement *) newfnode4;
newfnode3->nextnode = newfnode4;
newfnode5 = IRO_NewFlowGraphNode();
newfnode5->first = labelNd2;
newfnode5->last = lastnd;
newfnode4->nextnode = newfnode5;
newfnode5->nextnode = nd18;
newfnode6 = oalloc(sizeof(IRONode));
memset(newfnode6, 0, sizeof(IRONode));
newfnode6->index = IRO_NumNodes;
IRO_NumNodes++;
labelNd3 = IRO_NewLinear(IROLinearLabel);
labelNd3->index = IRO_NumLinear++;
labelNd3->next = NULL;
labelNd3->u.label.label = earlyLoopExitTestLabel;
labelNd3->flags |= IROLF_1;
earlyLoopExitTestLabel->stmt = (Statement *) newfnode6;
newfnode6->first = labelNd3;
newfnode6->last = labelNd3;
labelNd3->next = LoopNode->last->next;
LoopNode->last->next = labelNd3;
newfnode6->nextnode = LoopNode->nextnode;
LoopNode->nextnode = newfnode6;
if (!needOrigLoop) {
NoOpBlock(newfnode5);
NoOpBlock(header);
NoOpBlock(fnode3);
NoOpBlock(loop->induction->fnode);
IRO_NopOut(newfnode1->last->u.label.x4);
newfnode1->last->type = IROLinearNop;
}
if (!needUnrollBodyTest) {
IRO_NopOut(earlyLoopExitTest->u.label.x4);
earlyLoopExitTest->type = IROLinearNop;
IRO_NopOut(newfnode4->last->u.label.x4);
newfnode4->last->type = IROLinearNop;
if (newfnode2)
newfnode2->last->type = IROLinearNop;
for (scan = newfnode1->first; scan; scan = scan->next) {
if (!(scan->flags & IROLF_Reffed))
IRO_NopOut(scan);
if (scan == newfnode1->last)
break;
}
}
return 1;
}
static UInt32 UnrollStandardLoop(IRONode *header, IRONode *fnode2, IRONode *fnode3, int count) {
IROLoop *loop;
ConditionalHeaderAtBottom = 1;
loop = ExtractLoopInfo(header);
loop->xC = fnode2;
loop->x10 = fnode3;
FindAssignmenttoInductionVar(loop, fnode2);
if (!IsLoopUnrollable(loop)) {
IRO_Dump("LoopUnroll:loop with header %d not unrolled because IsLoopUnrollable failed\n", header->index);
return 0;
}
if (loop->flags & LoopFlags_10000)
return UnrollWhileLoop(header, fnode2, fnode3, loop, count);
else
return UnrollForLoop(header, fnode2, fnode3, loop, count);
}
static void LoopUnroll(int count, IRONode *header) {
VarRecord *var;
IRONode *tmp;
UInt16 i;
UInt16 j;
IRONode *prevpred;
IRONode *prevsucc;
int foundpred;
UInt32 predcount;
UInt32 success = 0;
LoopNode = header;
FindMustReach();
for (var = IRO_FirstVar; var; var = var->next)
var->xA = 1;
ComputeLoopKills();
ComputeLoopInvariance();
ComputeLoopInduction();
LoopNode = header;
ConditionalHeaderAtBottom = 0;
prevpred = NULL;
foundpred = 0;
for (i = 0; i < LoopNode->numpred; i++) {
tmp = IRO_NodeTable[LoopNode->pred[i]];
if (!Bv_IsBitSet(tmp->index, InLoop)) {
foundpred = 1;
if (tmp->nextnode == header) {
CError_ASSERT(2101, !prevpred || tmp == prevpred);
prevpred = tmp;
}
}
}
if (!foundpred) {
IRO_Dump("No predecessor outside the loop\n");
return;
}
if (LoopNode->last->type == IROLinearIf || LoopNode->last->type == IROLinearIfNot) {
if (LoopNode->nextnode && !Bv_IsBitSet(LoopNode->nextnode->index, InLoop)) {
prevsucc = NULL;
for (i = 0; i < LoopNode->numsucc; i++) {
tmp = IRO_NodeTable[LoopNode->succ[i]];
if (Bv_IsBitSet(tmp->index, InLoop)) {
CError_ASSERT(2159, !prevsucc);
prevsucc = tmp;
}
}
prevpred = NULL;
predcount = 0;
for (j = 0; j < LoopNode->numpred; j++) {
tmp = IRO_NodeTable[LoopNode->pred[j]];
if (!Bv_IsBitSet(tmp->index, InLoop)) {
prevpred = tmp;
predcount++;
}
}
if (
predcount == 1 &&
prevpred->last->type == IROLinearGoto &&
prevpred->nextnode == prevsucc &&
prevsucc != LoopNode
)
{
success = UnrollStandardLoop(header, prevpred, prevsucc, count);
}
}
} else {
IRO_Dump(" LoopUnroll:Loop with header = %d is not a conditional loop\n", header->index);
}
if (!success)
return;
IRO_NodeTable = oalloc(sizeof(IRONode *) * IRO_NumNodes);
memset(IRO_NodeTable, 0, sizeof(IRONode *) * IRO_NumNodes);
for (tmp = IRO_FirstNode; tmp; tmp = tmp->nextnode)
IRO_NodeTable[tmp->index] = tmp;
IRO_ComputeSuccPred();
IRO_ComputeDom();
if (success)
IRO_Dump(" LoopUnroll:Loop with header = %d Unrolled\n", header->index);
}
static int IsLoopUnrollable(IROLoop *loop) {
CInt64 tmp;
if (loop->flags & LP_LOOP_HAS_ASM) {
IRO_Dump("IsLoopUnrollable:No due to LP_LOOP_HAS_ASM \n");
return 0;
}
if (loop->flags & LP_IFEXPR_NON_CANONICAL) {
IRO_Dump("IsLoopUnrollable:No due to LP_IFEXPR_NON_CANONICAL \n");
return 0;
}
if (loop->flags & LP_LOOP_HAS_CALLS) {
IRO_Dump("IsLoopUnrollable:No due to LP_LOOP_HAS_CALLS \n");
return 0;
}
if (loop->flags & LP_LOOP_HAS_CNTRLFLOW) {
IRO_Dump("IsLoopUnrollable:No due to LP_LOOP_HAS_CNTRLFLOW \n");
return 0;
}
if (loop->flags & LP_INDUCTION_NOT_FOUND) {
IRO_Dump("IsLoopUnrollable:No due to LP_INDUCTION_NOT_FOUND \n");
return 0;
}
if (loop->flags & LP_LOOP_HDR_HAS_SIDEEFFECTS) {
IRO_Dump("IsLoopUnrollable:No due to LP_LOOP_HDR_HAS_SIDEEFFECTS \n");
return 0;
}
if (!(loop->flags & LoopFlags_200)) {
IRO_Dump("IsLoopUnrollable:No because header does not follow induction update \n");
return 0;
}
if (!(loop->flags & LoopFlags_10000)) {
IROLinear *upperBound = loop->nd18->u.diadic.right;
if (!IRO_IsIntConstant(upperBound) && !(upperBound->flags & IROLF_LoopInvariant)) {
IRO_Dump("IsLoopUnrollable:No because Loop Upper Bound is Variant in the loop\n");
return 0;
}
if (!loop->nd14) {
IRO_Dump("IsLoopUnrollable:No because there is no initialization of loop index in PreHeader\n");
return 0;
}
if (!IRO_IsVariable(loop->nd14->u.diadic.left)) {
IRO_Dump("IsLoopUnrollable:No because initial value of induction stored thru pointer\n");
return 0;
}
if (!IRO_IsUnsignedType(loop->nd14->rtype)) {
if (IRO_IsIntConstant(loop->nd14->u.diadic.right)) {
if (!CInt64_GreaterEqual(loop->nd14->u.diadic.right->u.node->data.intval, cint64_zero)) {
IRO_Dump("IsLoopUnrollable:No because initial value of induction is signed but init < 0\n");
return 0;
}
} else if (IsIterationCountConstant(loop, &tmp)) {
IRO_Dump("IsLoopUnrollable:Yes, the limits substract out to be constants\n");
} else {
IRO_Dump("IsLoopUnrollable:No because initial value of induction is signed and not constant\n");
return 0;
}
}
if (!(loop->flags & LP_LOOP_STEP_ISADD)) {
IRO_Dump("IsLoopUnrollable:No because LP_LOOP_STEP_ISADD is not set i.e induciton is not updated by 1\n");
return 0;
}
} else {
if (!IRO_IsUnsignedType(loop->nd18->u.diadic.left->rtype)) {
IRO_Dump("IsLoopUnrollable:No because the while loop induction is signed\n");
return 0;
}
if (!(loop->flags & LoopFlags_2000)) {
IRO_Dump("IsLoopUnrollable:No because the while loop operator is not of decrement form\n");
return 0;
}
}
if (loop->sizeBySomeMeasurement > copts.unrollinstrfactor) {
IRO_Dump("IsLoopUnrollable:No because loop size greater than threshold\n");
return 0;
}
return 1;
}
IROLinear *BuildEarlyLoopExitTest(IROLinearType type, IROList *list) {
IROLinear *nd = IRO_NewLinear(IROLinearOp1Arg);
nd->index = ++IRO_NumLinear;
if (type == IROLinearIf)
nd->type = IROLinearIfNot;
else
nd->type = IROLinearIf;
IRO_AddToList(nd, list);
return nd;
}
IROLinear *BuildLoopExitTest(IROLinearType type, IROList *list) {
IROLinear *nd = IRO_NewLinear(IROLinearOp1Arg);
nd->index = ++IRO_NumLinear;
nd->type = type;
IRO_AddToList(nd, list);
return nd;
}
int IsIterationCountConstant(IROLoop *loop, CInt64 *pval) {
IROLinear *lowerBound;
IROLinear *upperBound;
Type *type;
int isUnsigned;
IROAddrRecord *lowerRec;
IROAddrRecord *upperRec;
CInt64 lowerval;
CInt64 upperval;
CInt64 incval;
CInt64 negOne;
lowerBound = loop->nd14->u.diadic.right;
if (loop->flags & LoopFlags_1) {
upperBound = loop->nd18->u.diadic.right;
type = loop->nd18->u.diadic.right->rtype;
} else {
upperBound = loop->nd18->u.diadic.left;
type = loop->nd18->u.diadic.left->rtype;
}
isUnsigned = IRO_IsUnsignedType(type);
if (IRO_IsIntConstant(lowerBound) && IRO_IsIntConstant(upperBound)) {
lowerval = lowerBound->u.node->data.intval;
upperval = upperBound->u.node->data.intval;
if (isUnsigned) {
if (CInt64_LessEqualU(upperval, lowerval))
return 0;
} else {
if (CInt64_LessEqual(upperval, lowerval))
return 0;
}
CInt64_SetLong(&incval, loop->induction->addConst);
CInt64_SetLong(&negOne, -1);
*pval = CInt64_Sub(upperval, lowerval);
*pval = CInt64_Add(*pval, incval);
if (IS_LINEAR_DIADIC(loop->nd18, ELESS))
*pval = CInt64_Add(*pval, negOne);
CError_ASSERT(2486, !CInt64_IsZero(&incval));
if (isUnsigned)
*pval = CInt64_DivU(*pval, incval);
else
*pval = CInt64_Div(*pval, incval);
if (CInt64_Equal(*pval, cint64_zero))
return 0;
if (isUnsigned) {
CError_ASSERT(2508, !CInt64_LessEqualU(*pval, cint64_zero));
} else {
CError_ASSERT(2517, !CInt64_LessEqual(*pval, cint64_zero));
}
return 1;
}
lowerRec = IRO_InitAddrRecordPointer(lowerBound);
upperRec = IRO_InitAddrRecordPointer(upperBound);
if (IS_LINEAR_DIADIC(lowerBound, EADD)) {
IRO_DecomposeAddressExpression(lowerBound, lowerRec);
} else if (IRO_IsIntConstant(lowerBound)) {
lowerRec->numInts++;
IRO_AddElmToList(lowerBound, &lowerRec->ints);
lowerRec->numObjRefs = 0;
lowerRec->numMisc = 0;
} else {
lowerRec->numMisc++;
IRO_AddElmToList(lowerBound, &lowerRec->misc);
lowerRec->numObjRefs = 0;
lowerRec->numInts = 0;
}
if (IS_LINEAR_DIADIC(upperBound, EADD)) {
IRO_DecomposeAddressExpression(upperBound, upperRec);
} else if (IRO_IsIntConstant(upperBound)) {
upperRec->numInts++;
IRO_AddElmToList(upperBound, &upperRec->ints);
upperRec->numObjRefs = 0;
upperRec->numMisc = 0;
} else {
upperRec->numMisc++;
IRO_AddElmToList(upperBound, &upperRec->misc);
upperRec->numObjRefs = 0;
upperRec->numInts = 0;
}
if (IsDifferenceOfTermsConstant(lowerRec, upperRec, isUnsigned, pval)) {
if (IS_LINEAR_DIADIC(loop->nd18, ELESSEQU))
*pval = CInt64_Add(*pval, cint64_one);
return 1;
}
return 0;
}
static int IsDifferenceOfTermsConstant(IROAddrRecord *lowerRec, IROAddrRecord *upperRec, int isUnsigned, CInt64 *pval) {
UInt32 i;
CInt64 upperval;
CInt64 lowerval;
IROElmList *el;
IROLinear *nd;
if (upperRec->numObjRefs == lowerRec->numObjRefs && upperRec->numObjRefs != 0)
return 0;
else if (upperRec->numObjRefs != lowerRec->numObjRefs)
return 0;
if (upperRec->numMisc == lowerRec->numMisc && upperRec->numMisc != 0) {
for (i = 0; i < upperRec->numMisc; i++) {
// bug? surely this should index on i...?
if (!IRO_ExprsSame(lowerRec->misc->element, upperRec->misc->element))
return 0;
}
} else if (upperRec->numMisc != lowerRec->numMisc) {
return 0;
}
upperval = cint64_zero;
for (el = upperRec->ints; el; el = el->next) {
nd = el->element;
upperval = CMach_CalcIntDiadic(nd->rtype, upperval, '+', nd->u.node->data.intval);
}
lowerval = cint64_zero;
for (el = lowerRec->ints; el; el = el->next) {
nd = el->element;
lowerval = CMach_CalcIntDiadic(nd->rtype, lowerval, '+', nd->u.node->data.intval);
}
if (CInt64_Equal(upperval, lowerval))
return 0;
if (CInt64_Greater(upperval, lowerval)) {
*pval = CInt64_Sub(upperval, lowerval);
return 1;
} else {
return 0;
}
}
void NoOpBlock(IRONode *fnode) {
IROLinear *last, *scan;
for (scan = fnode->first, last = fnode->last; scan; scan = scan->next) {
scan->type = IROLinearNop;
if (scan == last)
break;
}
}
void IRO_TestConstantIterationCount(IROLoop *loop, CInt64 *iterCount, SInt32 vectorStride, UInt32 *unrollFactor, SInt32 *leftOver, UInt32 *needOrigLoop, UInt32 *needUnrollBodyTest, UInt32 *resetUnrolledFinalValue) {
UInt32 isUnsigned;
CInt64 val;
CInt64 val3;
CInt64 mod;
CInt64 val2;
CInt64 loopvar3;
CInt64 loopvar1;
CInt64 loopvar2;
CInt64 strideVal;
CError_ASSERT(2737, *unrollFactor);
isUnsigned = IRO_IsUnsignedType(
(loop->flags & LoopFlags_1) ? loop->nd18->u.diadic.right->rtype :loop->nd18->u.diadic.left->rtype);
CError_ASSERT(2756, vectorStride);
strideVal = IRO_MakeLong(vectorStride);
if (isUnsigned ? CInt64_LessU(*iterCount, strideVal) : CInt64_Less(*iterCount, strideVal)) {
*needOrigLoop = 1;
*needUnrollBodyTest = 0;
*unrollFactor = 0;
*leftOver = CInt64_GetULong(iterCount);
} else {
switch (vectorStride) {
case 1:
val = *iterCount;
break;
case 2:
val = CInt64_ShrU(*iterCount, cint64_one);
break;
case 4:
val = CInt64_ShrU(*iterCount, IRO_MakeLong(2));
break;
case 8:
val = CInt64_ShrU(*iterCount, IRO_MakeLong(3));
break;
case 16:
val = CInt64_ShrU(*iterCount, IRO_MakeLong(4));
break;
default:
val = CInt64_Div(*iterCount, strideVal);
}
if (CInt64_LessU(val, IRO_MakeLong(*unrollFactor)))
*unrollFactor = CInt64_GetULong(&val);
CInt64_SetLong(&val2, *unrollFactor);
switch (vectorStride) {
case 1:
val3 = cint64_zero;
break;
case 2:
val3 = CInt64_And(*iterCount, cint64_one);
break;
case 4:
val3 = CInt64_And(*iterCount, IRO_MakeLong(3));
break;
case 8:
val3 = CInt64_And(*iterCount, IRO_MakeLong(7));
break;
case 16:
val3 = CInt64_And(*iterCount, IRO_MakeLong(15));
break;
default:
val3 = CInt64_Mod(*iterCount, strideVal);
}
if (CInt64_LessEqualU(val, IRO_MakeLong(8))) {
*needUnrollBodyTest = vectorStride > 1;
*unrollFactor = CInt64_GetULong(&val);
*leftOver = CInt64_GetULong(&val3);
*needOrigLoop = *leftOver != 0;
*resetUnrolledFinalValue = !(*needOrigLoop && *needUnrollBodyTest);
} else {
loopvar1 = IRO_MakeLong(0x7FFFFFFF);
loopvar2 = IRO_MakeLong(0x7FFFFFFF);
do {
mod = CInt64_Mod(val, val2);
loopvar3 = CInt64_Add(CInt64_Mul(mod, strideVal), val3);
if (CInt64_Less(loopvar3, loopvar2)) {
loopvar2 = loopvar3;
loopvar1 = val2;
}
if (vectorStride > 1)
break;
val2 = CInt64_Add(val2, cint64_negone);
} while (CInt64_GreaterEqualU(CInt64_Mul(val2, val2), val));
*unrollFactor = CInt64_GetULong(&loopvar1);
*leftOver = CInt64_GetULong(&loopvar2);
*needOrigLoop = *leftOver != 0;
*needUnrollBodyTest = CInt64_Less(loopvar1, val) || vectorStride > 1;
*resetUnrolledFinalValue = !(*needOrigLoop && *needUnrollBodyTest);
}
}
IRO_Dump(
"---- IterCount = %d, VectorStride = %d, UnrollFactor = %d, LeftOver = %d,\n"
"\tNeedOrigLoop = %d, NeedUnrollBodyTest = %d, ResetUnrolledFinalValue = %d\n",
CInt64_GetULong(iterCount), vectorStride, *unrollFactor, *leftOver,
*needOrigLoop, *needUnrollBodyTest, *resetUnrolledFinalValue
);
}
IROLinear *BuildOrigIterationCount(IROList *list, IROLoop *loop) {
IROLinear *upperBound;
IROLinear *nd29b;
IROLinear *lowerBound;
IROLinear *finalCount;
IROLinear *divisor;
Type *type;
IROLinear *nd25;
IROLinear *tmp;
Boolean isZeroBase;
Object *tempobj;
IROLinear *iterCount;
IROLinear *negone;
IROLinear *ass;
ENode *expr;
SInt32 powval;
isZeroBase = 0;
lowerBound = loop->nd14->u.diadic.right;
if (IRO_IsIntConstant(lowerBound) && CInt64_Equal(lowerBound->u.node->data.intval, cint64_zero))
isZeroBase = 1;
if (!isZeroBase)
lowerBound = IRO_DuplicateExpr(lowerBound, list);
if (loop->flags & LoopFlags_1) {
upperBound = IRO_DuplicateExpr(loop->nd18->u.diadic.right, list);
type = loop->nd18->u.diadic.right->rtype;
} else {
upperBound = IRO_DuplicateExpr(loop->nd18->u.diadic.left, list);
type = loop->nd18->u.diadic.left->rtype;
}
CError_ASSERT(2924, loop->induction);
CError_ASSERT(2929, loop->induction->addConst);
divisor = IRO_NewLinear(IROLinearOperand);
divisor->index = ++IRO_NumLinear;
divisor->rtype = type;
expr = IRO_NewENode(EINTCONST);
expr->rtype = type;
CInt64_SetLong(&expr->data.intval, loop->induction->addConst);
divisor->u.node = expr;
if (isZeroBase) {
iterCount = upperBound;
} else {
iterCount = IRO_NewLinear(IROLinearOp2Arg);
iterCount->index = ++IRO_NumLinear;
iterCount->nodetype = ESUB;
iterCount->u.diadic.left = upperBound;
iterCount->u.diadic.right = lowerBound;
iterCount->rtype = type;
IRO_AddToList(iterCount, list);
}
nd25 = IRO_DuplicateExpr(divisor, list);
nd29b = IRO_NewLinear(IROLinearOp2Arg);
nd29b->index = ++IRO_NumLinear;
nd29b->nodetype = EADD;
nd29b->u.diadic.left = iterCount;
nd29b->u.diadic.right = nd25;
nd29b->rtype = type;
IRO_AddToList(nd29b, list);
if (loop->nd18->type == IROLinearOp2Arg && loop->nd18->nodetype == ELESS) {
tmp = nd29b;
negone = IRO_NewLinear(IROLinearOperand);
negone->index = ++IRO_NumLinear;
negone->rtype = type;
expr = IRO_NewENode(EINTCONST);
expr->rtype = type;
CInt64_SetLong(&expr->data.intval, -1);
negone->u.node = expr;
IRO_AddToList(negone, list);
nd29b = IRO_NewLinear(IROLinearOp2Arg);
nd29b->index = ++IRO_NumLinear;
nd29b->nodetype = EADD;
nd29b->u.diadic.left = tmp;
nd29b->u.diadic.right = negone;
nd29b->rtype = type;
IRO_AddToList(nd29b, list);
}
if (CInt64_Equal(divisor->u.node->data.intval, cint64_one)) {
finalCount = nd29b;
} else {
if (divisor->rtype->size <= 4 && IS_TYPE_INT(divisor->rtype) && IRO_IsPow2(divisor, &powval)) {
finalCount = IRO_NewLinear(IROLinearOp2Arg);
finalCount->index = ++IRO_NumLinear;
finalCount->nodetype = ESHL;
finalCount->u.diadic.left = nd29b;
finalCount->u.diadic.right = divisor;
CInt64_SetLong(&divisor->u.node->data.intval, powval);
finalCount->rtype = type;
IRO_AddToList(divisor, list);
IRO_AddToList(finalCount, list);
} else {
finalCount = IRO_NewLinear(IROLinearOp2Arg);
finalCount->index = ++IRO_NumLinear;
finalCount->nodetype = EDIV;
finalCount->u.diadic.left = nd29b;
finalCount->u.diadic.right = divisor;
finalCount->rtype = type;
IRO_AddToList(divisor, list);
IRO_AddToList(finalCount, list);
}
}
tempobj = create_temp_object(type);
IRO_FindVar(tempobj, 1, 1);
ass = IRO_NewLinear(IROLinearOp2Arg);
ass->index = ++IRO_NumLinear;
ass->nodetype = EASS;
ass->u.diadic.left = IRO_TempReference(tempobj, list);
ass->u.diadic.left->flags |= IROLF_Assigned | IROLF_Ind;
ass->u.diadic.left->u.monadic->flags |= IROLF_Assigned | IROLF_Ind;
ass->u.diadic.right = finalCount;
ass->u.diadic.right->flags |= IROLF_Reffed;
ass->rtype = type;
IRO_AddToList(ass, list);
return ass->u.diadic.left;
}
static IROLinear *BuildOrigIterationCount_DoWhile(IROList *list, IROLoop *loop) {
IROLinear *finalCount;
IROLinear *count;
IROLinear *ass;
Type *type;
Object *tempobj;
ENode *expr;
type = loop->nd18->u.diadic.left->rtype;
count = IRO_NewLinear(IROLinearOperand);
count->index = ++IRO_NumLinear;
expr = IRO_NewENode(EINTCONST);
expr->rtype = type;
expr->data.intval = cint64_one;
count->u.node = expr;
count->rtype = type;
IRO_AddToList(count, list);
count->flags |= IROLF_Reffed;
finalCount = IRO_NewLinear(IROLinearOp2Arg);
finalCount->index = ++IRO_NumLinear;
finalCount->nodetype = EADD;
finalCount->rtype = type;
finalCount->u.diadic.left = IRO_DuplicateExpr(loop->nd18->u.diadic.left, list);
finalCount->u.diadic.left->flags |= IROLF_Reffed;
finalCount->u.diadic.left->flags &= ~IROLF_Assigned;
finalCount->u.diadic.left->u.monadic->flags &= ~IROLF_Assigned;
finalCount->u.diadic.right = count;
IRO_AddToList(finalCount, list);
tempobj = create_temp_object(type);
IRO_FindVar(tempobj, 1, 1);
ass = IRO_NewLinear(IROLinearOp2Arg);
ass->index = ++IRO_NumLinear;
ass->nodetype = EASS;
ass->u.diadic.left = IRO_TempReference(tempobj, list);
ass->u.diadic.left->flags |= IROLF_Assigned | IROLF_Ind;
ass->u.diadic.left->u.monadic->flags |= IROLF_Assigned | IROLF_Ind;
ass->u.diadic.right = finalCount;
ass->rtype = type;
IRO_AddToList(ass, list);
return ass->u.diadic.left;
}
IROLinear *BuildNewFinalvalue(IROLinear *iterCount, UInt32 unrollFactor, IROList *list, IROLoop *loop) {
IROLinear *sub;
IROLinear *addvalue;
Type *type;
IROLinear *ass;
IROLinear *dupbound;
Object *tempobj;
ENode *expr;
type = iterCount->rtype;
addvalue = IRO_NewLinear(IROLinearOperand);
addvalue->index = ++IRO_NumLinear;
addvalue->rtype = type;
expr = IRO_NewENode(EINTCONST);
expr->rtype = type;
CInt64_SetLong(&expr->data.intval, loop->induction->addConst * unrollFactor);
addvalue->u.node = expr;
IRO_AddToList(addvalue, list);
if (loop->flags & LoopFlags_1)
dupbound = IRO_DuplicateExpr(loop->nd18->u.diadic.right, list);
else
dupbound = IRO_DuplicateExpr(loop->nd18->u.diadic.left, list);
sub = IRO_NewLinear(IROLinearOp2Arg);
sub->index = ++IRO_NumLinear;
sub->nodetype = ESUB;
sub->u.diadic.left = dupbound;
sub->u.diadic.right = addvalue;
sub->rtype = type;
IRO_AddToList(sub, list);
tempobj = create_temp_object(type);
IRO_FindVar(tempobj, 1, 1);
ass = IRO_NewLinear(IROLinearOp2Arg);
ass->index = ++IRO_NumLinear;
ass->nodetype = EASS;
ass->u.diadic.left = IRO_TempReference(tempobj, list);
ass->u.diadic.left->flags |= IROLF_Assigned | IROLF_Ind;
ass->u.diadic.left->u.monadic->flags |= IROLF_Assigned | IROLF_Ind;
ass->u.diadic.right = sub;
ass->u.diadic.right->flags |= IROLF_Reffed;
ass->rtype = type;
IRO_AddToList(ass, list);
return ass->u.diadic.left;
}
static IROLinear *BuildPreAlignTemp(IROLoopInd *ind, UInt32 unrollFactor, IROList *list) {
Type *type;
IROLinear *indnd;
IROLinear *factornd;
IROLinear *div;
IROLinear *constnd;
IROLinear *add;
IROLinear *mul;
IROLinear *ass;
Object *tempobj;
ENode *expr;
indnd = ind->nd;
type = indnd->rtype;
factornd = IRO_NewLinear(IROLinearOperand);
factornd->index = ++IRO_NumLinear;
factornd->rtype = type;
expr = IRO_NewENode(EINTCONST);
expr->rtype = type;
CInt64_SetLong(&expr->data.intval, unrollFactor);
factornd->u.node = expr;
IRO_AddToList(factornd, list);
if (indnd->type == IROLinearOp1Arg)
IRO_DuplicateExpr(indnd->u.monadic, list);
else
IRO_DuplicateExpr(indnd->u.diadic.left, list);
list->tail->flags &= ~IROLF_Assigned;
list->tail->u.monadic->flags &= ~IROLF_Assigned;
div = IRO_NewLinear(IROLinearOp2Arg);
div->index = ++IRO_NumLinear;
div->nodetype = EDIV;
div->u.diadic.left = list->tail;
div->u.diadic.right = factornd;
div->rtype = type;
IRO_AddToList(div, list);
div->flags |= IROLF_Reffed;
constnd = IRO_NewLinear(IROLinearOperand);
constnd->index = ++IRO_NumLinear;
expr = IRO_NewENode(EINTCONST);
expr->rtype = type;
expr->data.intval = cint64_one;
constnd->u.node = expr;
constnd->rtype = type;
IRO_AddToList(constnd, list);
constnd->flags |= IROLF_Reffed;
add = IRO_NewLinear(IROLinearOp2Arg);
add->index = ++IRO_NumLinear;
add->nodetype = EADD;
add->u.diadic.left = div;
add->u.diadic.right = constnd;
add->rtype = type;
IRO_AddToList(add, list);
add->flags |= IROLF_Reffed;
IRO_DuplicateExpr(factornd, list);
mul = IRO_NewLinear(IROLinearOp2Arg);
mul->index = ++IRO_NumLinear;
mul->nodetype = EMUL;
mul->u.diadic.left = add;
mul->u.diadic.right = list->tail;
mul->rtype = type;
IRO_AddToList(mul, list);
mul->flags |= IROLF_Reffed;
tempobj = create_temp_object(type);
IRO_FindVar(tempobj, 1, 1);
ass = IRO_NewLinear(IROLinearOp2Arg);
ass->index = ++IRO_NumLinear;
ass->nodetype = EASS;
ass->u.diadic.left = IRO_TempReference(tempobj, list);
ass->u.diadic.left->flags |= IROLF_Assigned | IROLF_Ind;
ass->u.diadic.left->u.monadic->flags |= IROLF_Assigned | IROLF_Ind;
ass->u.diadic.right = mul;
ass->u.diadic.right->flags |= IROLF_Reffed;
ass->rtype = type;
IRO_AddToList(ass, list);
return ass->u.diadic.left;
}
static IROLinear *BuildNewFinalvalue_DoWhile(IROLinear *iterCount, UInt32 unrollFactor, IROList *list, IROLoop *loop) {
IROLinear *addvalue;
IROLinear *add;
IROLinear *mul;
IROLinear *ass;
Type *type;
Object *tempobj;
ENode *expr;
type = iterCount->rtype;
addvalue = IRO_NewLinear(IROLinearOperand);
addvalue->index = ++IRO_NumLinear;
addvalue->rtype = type;
expr = IRO_NewENode(EINTCONST);
expr->rtype = type;
CInt64_SetLong(&expr->data.intval, loop->induction->addConst);
addvalue->u.node = expr;
IRO_AddToList(addvalue, list);
addvalue->flags |= IROLF_Reffed;
mul = IRO_NewLinear(IROLinearOp2Arg);
mul->index = ++IRO_NumLinear;
mul->nodetype = EMUL;
mul->u.diadic.left = IRO_DuplicateExpr(iterCount, list);
mul->u.diadic.right = addvalue;
mul->rtype = type;
IRO_AddToList(mul, list);
mul->flags |= IROLF_Reffed;
mul->u.diadic.left->flags &= ~IROLF_Assigned;
mul->u.diadic.left->u.diadic.left->flags &= ~IROLF_Assigned;
if (loop->induction->nd->type == IROLinearOp1Arg)
IRO_DuplicateExpr(loop->induction->nd->u.monadic, list);
else
IRO_DuplicateExpr(loop->induction->nd->u.diadic.left, list);
list->tail->flags &= ~IROLF_Assigned;
list->tail->u.diadic.left->flags &= ~IROLF_Assigned;
add = IRO_NewLinear(IROLinearOp2Arg);
add->index = ++IRO_NumLinear;
add->nodetype = EADD;
add->u.diadic.left = mul;
add->u.diadic.right = list->tail;
add->rtype = type;
IRO_AddToList(add, list);
add->flags |= IROLF_Reffed;
tempobj = create_temp_object(type);
IRO_FindVar(tempobj, 1, 1);
ass = IRO_NewLinear(IROLinearOp2Arg);
ass->index = ++IRO_NumLinear;
ass->nodetype = EASS;
ass->u.diadic.left = IRO_TempReference(tempobj, list);
ass->u.diadic.left->flags |= IROLF_Assigned | IROLF_Ind;
ass->u.diadic.left->u.monadic->flags |= IROLF_Assigned | IROLF_Ind;
ass->u.diadic.right = add;
ass->u.diadic.right->flags |= IROLF_Reffed;
ass->rtype = type;
IRO_AddToList(ass, list);
return ass->u.diadic.left;
}
static IROLinear *BuildUnrolledFinalvalue_DoWhile(IROLinear *iterCount, UInt32 unrollFactor, IROList *list, IROLoop *loop) {
IROLinear *addvalue_mult;
IROLinear *addvalue;
IROLinear *mul;
IROLinear *sub;
IROLinear *add;
IROLinear *ass;
Type *type;
Object *tempobj;
ENode *expr;
type = iterCount->rtype;
addvalue_mult = IRO_NewLinear(IROLinearOperand);
addvalue_mult->index = ++IRO_NumLinear;
addvalue_mult->rtype = type;
expr = IRO_NewENode(EINTCONST);
expr->rtype = type;
CInt64_SetLong(&expr->data.intval, loop->induction->addConst * unrollFactor);
addvalue_mult->u.node = expr;
IRO_AddToList(addvalue_mult, list);
addvalue_mult->flags |= IROLF_Reffed;
addvalue = IRO_NewLinear(IROLinearOperand);
addvalue->index = ++IRO_NumLinear;
addvalue->rtype = type;
expr = IRO_NewENode(EINTCONST);
expr->rtype = type;
CInt64_SetLong(&expr->data.intval, loop->induction->addConst);
addvalue->u.node = expr;
IRO_AddToList(addvalue, list);
addvalue->flags |= IROLF_Reffed;
mul = IRO_NewLinear(IROLinearOp2Arg);
mul->index = ++IRO_NumLinear;
mul->nodetype = EMUL;
mul->u.diadic.left = IRO_DuplicateExpr(iterCount, list);
mul->u.diadic.right = addvalue;
mul->rtype = type;
IRO_AddToList(mul, list);
mul->flags |= IROLF_Reffed;
mul->u.diadic.left->flags &= ~IROLF_Assigned;
mul->u.diadic.left->u.diadic.left->flags &= ~IROLF_Assigned;
sub = IRO_NewLinear(IROLinearOp2Arg);
sub->index = ++IRO_NumLinear;
sub->nodetype = ESUB;
sub->u.diadic.left = mul;
sub->u.diadic.right = addvalue_mult;
sub->rtype = type;
IRO_AddToList(sub, list);
sub->flags |= IROLF_Reffed;
if (loop->induction->nd->type == IROLinearOp1Arg)
IRO_DuplicateExpr(loop->induction->nd->u.monadic, list);
else
IRO_DuplicateExpr(loop->induction->nd->u.diadic.left, list);
list->tail->flags &= ~IROLF_Assigned;
list->tail->u.diadic.left->flags &= ~IROLF_Assigned;
add = IRO_NewLinear(IROLinearOp2Arg);
add->index = ++IRO_NumLinear;
add->nodetype = EADD;
add->u.diadic.left = sub;
add->u.diadic.right = list->tail;
add->rtype = type;
IRO_AddToList(add, list);
add->flags |= IROLF_Reffed;
tempobj = create_temp_object(type);
IRO_FindVar(tempobj, 1, 1);
ass = IRO_NewLinear(IROLinearOp2Arg);
ass->index = ++IRO_NumLinear;
ass->nodetype = EASS;
ass->u.diadic.left = IRO_TempReference(tempobj, list);
ass->u.diadic.left->flags |= IROLF_Assigned | IROLF_Ind;
ass->u.diadic.left->u.monadic->flags |= IROLF_Assigned | IROLF_Ind;
ass->u.diadic.right = add;
ass->u.diadic.right->flags |= IROLF_Reffed;
ass->rtype = type;
IRO_AddToList(ass, list);
return ass->u.diadic.left;
}
void BuildUnrolledBodyEntryTest(IROList *list, IROLinear *iterCount, UInt32 unrollFactor, CLabel *label) {
Type *type;
IROLinear *ifnot;
IROLinear *comp;
IROLinear *var;
IROLinear *value;
ENode *expr;
type = iterCount->rtype;
value = IRO_NewLinear(IROLinearOperand);
value->index = ++IRO_NumLinear;
value->rtype = type;
expr = IRO_NewENode(EINTCONST);
expr->rtype = type;
CInt64_SetLong(&expr->data.intval, unrollFactor);
value->u.node = expr;
IRO_AddToList(value, list);
var = IRO_DuplicateExpr(iterCount, list);
comp = IRO_NewLinear(IROLinearOp2Arg);
comp->index = ++IRO_NumLinear;
comp->nodetype = EGREATER;
comp->u.diadic.left = var;
comp->u.diadic.right = value;
comp->u.diadic.right->flags |= IROLF_Reffed;
comp->rtype = type;
IRO_AddToList(comp, list);
ifnot = IRO_NewLinear(IROLinearOp1Arg);
ifnot->index = ++IRO_NumLinear;
ifnot->type = IROLinearIfNot;
ifnot->u.label.x4 = comp;
ifnot->u.label.x4->flags |= IROLF_Reffed;
ifnot->rtype = type;
ifnot->u.label.label = label;
IRO_AddToList(ifnot, list);
}
void ChangeInductionReference(IROLinear *first, IROLinear *last, CInt64 val, IROLoop *loop) {
IROLinear *nd;
IROLinear *value;
IROLinear *add;
UInt32 isUnsigned;
IROLinear *father;
Boolean flag;
IROLinear *father2;
IROLinear *father3;
Type *tmp;
UInt32 flag2;
Object *varobj;
IROLinear *next;
ENode *expr;
Type *type;
CInt64 val2;
CInt64 val1;
IROList list;
type = loop->induction->nd->rtype;
isUnsigned = IRO_IsUnsignedType(type);
for (nd = first; nd; nd = next) {
next = nd->next;
varobj = IRO_IsVariable(nd);
if (varobj && loop->induction->var->object == varobj) {
value = IRO_NewLinear(IROLinearOperand);
value->index = ++IRO_NumLinear;
value->rtype = type;
expr = IRO_NewENode(EINTCONST);
expr->rtype = type;
expr->data.intval = val;
value->u.node = expr;
add = IRO_NewLinear(IROLinearOp2Arg);
add->index = ++IRO_NumLinear;
add->nodetype = EADD;
add->rtype = type;
father = IRO_LocateFather(nd);
flag = 1;
if (father && IS_LINEAR_MONADIC(father, ETYPCON)) {
tmp = father->rtype;
father = IRO_LocateFather(father);
if (tmp->type != nd->rtype->type || tmp->size < nd->rtype->size)
flag = 0;
}
flag2 = 0;
if (
flag &&
father &&
IS_LINEAR_DIADIC_2(father, ESHL, EMUL) &&
IRO_IsIntConstant(father->u.diadic.right) &&
(father2 = IRO_LocateFather(father)) &&
IS_LINEAR_DIADIC(father2, EADD) &&
father2->u.diadic.right == father &&
(father3 = IRO_LocateFather(father2))
)
{
IRO_InitList(&list);
val2 = father->u.diadic.right->u.node->data.intval;
if (father->nodetype == ESHL)
val2 = CInt64_Shl(cint64_one, val2);
val1 = value->u.node->data.intval;
if (isUnsigned)
val1 = CInt64_MulU(val2, val1);
else
val1 = CInt64_Mul(val2, val1);
value->u.node->data.intval = val1;
IRO_AddToList(value, &list);
IRO_AddToList(add, &list);
add->u.diadic.right = value;
IRO_Paste(list.head, list.tail, father3);
IRO_LocateFather_Cut_And_Paste_Without_Nopping(father2, add);
add->u.diadic.left = father2;
add->rtype = father2->rtype;
flag2 = 1;
}
if (!flag2) {
add->u.diadic.right = value;
add->u.diadic.right->flags |= IROLF_Reffed;
value->next = add;
add->u.diadic.left = nd;
IRO_LocateFather_Cut_And_Paste_Without_Nopping(nd, add);
add->flags |= IROLF_Reffed;
nd->next = value;
add->next = next;
}
}
if (nd == last)
break;
}
}
IROLinear *UpdateInductionIncrement(IROLoop *loop, SInt32 value, IROLinear *before) {
IROLinear *ind_nd;
IROLinear *addvalue;
IROLinear *ass;
Type *type;
ENode *expr;
IROList list;
IRO_InitList(&list);
ind_nd = loop->induction->nd;
type = ind_nd->rtype;
addvalue = IRO_NewLinear(IROLinearOperand);
addvalue->index = ++IRO_NumLinear;
addvalue->rtype = type;
expr = IRO_NewENode(EINTCONST);
expr->rtype = type;
CInt64_SetLong(&expr->data.intval, value * loop->induction->addConst);
addvalue->u.node = expr;
IRO_AddToList(addvalue, &list);
if (IS_LINEAR_MONADIC_2(ind_nd, EPREINC, EPOSTINC)) {
ind_nd = IRO_DuplicateExpr(ind_nd->u.monadic, &list);
ass = IRO_NewLinear(IROLinearOp2Arg);
ass->index = ++IRO_NumLinear;
ass->nodetype = EADDASS;
ass->u.diadic.left = ind_nd;
ass->u.diadic.right = addvalue;
ass->rtype = type;
IRO_AddToList(ass, &list);
} else if (IS_LINEAR_MONADIC_2(ind_nd, EPREDEC, EPOSTDEC)) {
ind_nd = IRO_DuplicateExpr(ind_nd->u.monadic, &list);
ass = IRO_NewLinear(IROLinearOp2Arg);
ass->index = ++IRO_NumLinear;
ass->nodetype = ESUBASS;
ass->u.diadic.left = ind_nd;
ass->u.diadic.right = addvalue;
ass->rtype = type;
IRO_AddToList(ass, &list);
} else if (IS_LINEAR_DIADIC(ind_nd, EADDASS)) {
ind_nd = IRO_DuplicateExpr(ind_nd->u.monadic, &list);
ass = IRO_NewLinear(IROLinearOp2Arg);
ass->index = ++IRO_NumLinear;
ass->nodetype = EADDASS;
ass->u.diadic.left = ind_nd;
ass->u.diadic.right = addvalue;
ass->rtype = type;
IRO_AddToList(ass, &list);
}
IRO_Paste(list.head, list.tail, before);
return list.tail;
}
void GenInitialAssignment(IROLoop *loop, Object *var, IROList *list) {
Type *type;
IROLinear *nd;
CError_ASSERT(3924, loop->nd14 && loop->nd14->type == IROLinearOp2Arg);
type = loop->induction->nd->rtype;
nd = IRO_NewLinear(IROLinearOp2Arg);
nd->index = ++IRO_NumLinear;
nd->nodetype = EASS;
nd->u.diadic.left = IRO_TempReference(var, list);
nd->u.diadic.right = IRO_DuplicateExpr(loop->nd14->u.diadic.right, list);
nd->rtype = type;
IRO_AddToList(nd, list);
}
void GenNewInduction(void) {
CError_FATAL(3941);
}