move lots of source files around to match their actual placement in the original tree

This commit is contained in:
Ash Wolf
2023-01-26 11:30:47 +00:00
parent fc0c4c0df7
commit 094b96ca1d
120 changed files with 400 additions and 392 deletions

View File

@@ -0,0 +1,747 @@
#include "compiler/Alias.h"
#include "compiler/CClass.h"
#include "compiler/CError.h"
#include "compiler/CParser.h"
#include "compiler/CMachine.h"
#include "compiler/CodeGen.h"
#include "compiler/CopyPropagation.h"
#include "compiler/PCode.h"
#include "compiler/PCodeInfo.h"
#include "compiler/RegisterInfo.h"
#include "compiler/UseDefChains.h"
#include "compiler/ValueNumbering.h"
#include "compiler/BitVectors.h"
#include "compiler/CompilerTools.h"
#include "compiler/objects.h"
#include "compiler/types.h"
static Alias *aliases;
static int n_aliases;
static int n_gathered_aliases;
static Alias *alias_hash[997];
Alias *worst_case;
Object worst_case_obj;
static TypePointer worst_case_memory_type = {
TYPEARRAY,
0xFFFFFF,
TYPE(&stchar)
};
static Boolean is_safe_const(Object *obj) {
Type *type;
type = obj->type;
while (IS_TYPE_ARRAY(type))
type = TPTR_TARGET(type);
if (TYPE_FITS_IN_REGISTER(type) || IS_TYPE_VECTOR(type) || IS_TYPE_FLOAT(type) || IS_TYPE_STRUCT(type))
return is_const_object(obj);
if (IS_TYPE_CLASS(type))
return is_const_object(obj) && CClass_IsPODClass(TYPE_CLASS(type));
return 0;
}
void initialize_aliases(void) {
int i;
memclrw(&worst_case_obj, sizeof(Object));
worst_case_obj.otype = OT_OBJECT;
worst_case_obj.type = TYPE(&worst_case_memory_type);
worst_case_obj.datatype = DDATA;
worst_case_obj.name = GetHashNameNodeExport("@worst_case@");
aliases = NULL;
n_aliases = 0;
n_gathered_aliases = 0;
for (i = 0; i < 997; i++)
alias_hash[i] = NULL;
worst_case = make_alias_set();
add_alias_member(worst_case, make_alias(&worst_case_obj, 0, 0));
}
static UInt32 hash_alias(Object *object, SInt32 offset, SInt32 size) {
return (UInt32) (object->name->hashval * offset * size) % 997;
}
static Alias *create_alias(AliasType type, Object *object, SInt32 offset, SInt32 size, Boolean addToHash) {
Alias *alias;
UInt32 hash;
alias = lalloc(sizeof(Alias));
memclrw(alias, sizeof(Alias));
alias->type = type;
alias->index = n_aliases++;
alias->next = aliases;
aliases = alias;
alias->object = object;
alias->offset = offset;
alias->size = size;
if (addToHash) {
hash = hash_alias(object, offset, size);
alias->hashNext = alias_hash[hash];
alias_hash[hash] = alias;
}
return alias;
}
static Alias *lookup_alias(Object *object, SInt32 offset, SInt32 size) {
Alias *scan;
for (scan = alias_hash[hash_alias(object, offset, size)]; scan; scan = scan->hashNext) {
if (scan->object == object && scan->offset == offset && scan->size == size)
return scan;
}
return NULL;
}
Alias *make_alias(Object *object, SInt32 offset, SInt32 size) {
Alias *alias;
Alias *alias2;
if (!offset && !size)
size = object->type->size;
alias = lookup_alias(object, offset, size);
if (!alias) {
if (offset > 0 || size != object->type->size) {
alias2 = make_alias(object, 0, object->type->size);
alias = create_alias(AliasType1, object, offset, size, 1);
add_alias_member(alias2, alias);
} else {
alias = create_alias(AliasType0, object, offset, size, 1);
}
switch (object->datatype) {
case DLOCAL:
case DNONLAZYPTR:
break;
default:
if (!is_safe_const(object))
add_alias_member(worst_case, make_alias(object, 0, 0));
}
}
if (offset > object->type->size)
return NULL;
else
return alias;
}
Alias *make_alias_set(void) {
return create_alias(AliasType2, NULL, 0, 0, 0);
}
void add_alias_member(Alias *parent, Alias *child) {
AliasMember *member;
if (child->type == AliasType2) {
for (member = child->parents; member; member = member->nextParent)
add_alias_member(parent, member->child);
} else {
if (parent == worst_case && child->type == AliasType1)
child = make_alias(child->object, 0, 0);
for (member = parent->parents; member; member = member->nextParent) {
if (member->child == child)
return;
}
member = lalloc(sizeof(AliasMember));
member->parent = parent;
member->child = child;
member->nextParent = parent->parents;
parent->parents = member;
member->nextChild = child->children;
child->children = member;
}
}
Alias *make_alias_set_from_IR(void) {
CError_FATAL(333);
return NULL;
}
static Boolean aliases_overlap(Alias *a, Alias *b) {
return (
a->offset == b->offset ||
(a->offset > b->offset && a->offset < (b->offset + b->size)) ||
(b->offset > a->offset && b->offset < (a->offset + a->size))
);
}
static int is_address_load(PCode *pcode) {
Object *obj;
switch (pcode->op) {
case PC_LWZ:
if (pcode->args[2].kind == PCOp_MEMORY && pcode->args[2].data.mem.obj->datatype == DNONLAZYPTR)
return 1;
break;
case PC_LBZU:
case PC_LBZUX:
case PC_LHZU:
case PC_LHZUX:
case PC_LHAU:
case PC_LHAUX:
case PC_LWZU:
case PC_LWZUX:
case PC_STBU:
case PC_STBUX:
case PC_STHU:
case PC_STHUX:
case PC_STWU:
case PC_STWUX:
return 1;
case PC_ADDI:
case PC_ADDIS:
if (pcode->args[0].data.reg.reg < n_real_registers[RegClass_GPR]) {
if (pcode->args[2].kind == PCOp_MEMORY) {
obj = pcode->args[2].data.mem.obj;
if (obj->datatype == DLOCAL && !is_safe_const(obj))
add_alias_member(worst_case, make_alias(obj, 0, 0));
return 0;
}
} else {
return 1;
}
break;
case PC_ADD:
return 1;
}
return 0;
}
static int addresspropagatestouse(int candidateID, int useID) {
PCode *candidate_pcode; // r30
PCode *use_pcode; // r29
int reg; // r28
short reg2;
Object *object; // r27
SInt32 offset; // r26
Alias *alias; // r25
Boolean flag24; // r24
SInt32 size; // r23
Alias *aliasSet; // r22
int i;
PCode *scan;
PCodeArg *op;
candidate_pcode = Candidates[candidateID].pcode;
use_pcode = Uses[useID].pcode;
flag24 = 0;
size = 1;
reg = candidate_pcode->args[0].data.reg.reg;
if (candidate_pcode->alias && (candidate_pcode->alias->type == AliasType0 || candidate_pcode->alias->type == AliasType1)) {
object = candidate_pcode->alias->object;
offset = candidate_pcode->alias->offset;
if (offset == 0 && candidate_pcode->alias->size == object->type->size)
flag24 = 1;
} else if (candidate_pcode->args[2].kind == PCOp_MEMORY) {
object = candidate_pcode->args[2].data.mem.obj;
if (candidate_pcode->op == PC_ADDIS)
offset = candidate_pcode->args[2].data.mem.offset << 16;
else
offset = candidate_pcode->args[2].data.mem.offset;
} else {
return 0;
}
CError_ASSERT(478, object->otype == OT_OBJECT);
if ((candidate_pcode->flags & (fIsRead | fIsWrite)) && (candidate_pcode->flags & fUpdatesPtr)) {
reg = candidate_pcode->args[1].data.reg.reg;
offset = 0;
flag24 = 1;
} else if (candidate_pcode->op == PC_LWZ) {
if (object->datatype != DNONLAZYPTR)
return 0;
object = object->u.var.realObj;
CError_ASSERT(495, object->otype == OT_OBJECT);
offset = 0;
} else if (candidate_pcode->op == PC_ADDI) {
if (!candidate_pcode->alias && object)
candidate_pcode->alias = make_alias(object, offset, 1);
} else if (candidate_pcode->op == PC_ADDIS) {
if (!candidate_pcode->alias && object)
candidate_pcode->alias = make_alias(object, offset, 1);
} else if (candidate_pcode->op == PC_ADD) {
offset = 0;
flag24 = 1;
} else {
CError_FATAL(509);
}
if (
!(use_pcode->flags & (fIsRead | fIsWrite)) &&
use_pcode->op != PC_ADDI &&
use_pcode->op != PC_ADD &&
use_pcode->op != PC_ADDIS
) {
if (object->datatype == DLOCAL && !is_safe_const(object))
add_alias_member(worst_case, make_alias(object, 0, 0));
return 1;
}
if (
(use_pcode->flags & (fIsWrite | fPCodeFlag40000)) &&
use_pcode->args[0].kind == PCOp_REGISTER &&
use_pcode->args[0].arg == RegClass_GPR &&
use_pcode->args[0].data.reg.reg == reg &&
object->datatype == DLOCAL &&
!is_safe_const(object)
)
add_alias_member(worst_case, make_alias(object, 0, 0));
if (use_pcode->argCount < 3)
return 1;
CError_ASSERT(543, use_pcode->args[1].kind == PCOp_REGISTER);
if (candidate_pcode->block == use_pcode->block && precedes(candidate_pcode, use_pcode)) {
for (scan = candidate_pcode->nextPCode; scan && scan != use_pcode; scan = scan->nextPCode) {
for (op = scan->args, i = scan->argCount; i--; op++) {
if (op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
(op->data.reg.effect & EffectWrite) &&
op->data.reg.reg == reg)
return 1;
}
}
} else {
if (!bitvectorgetbit(candidateID, propinfo[use_pcode->block->blockIndex].vec8)) {
if (bitvectorgetbit(candidate_pcode->defID, usedefinfo[use_pcode->block->blockIndex].defvec8)) {
for (scan = use_pcode->block->firstPCode; scan && scan != use_pcode; scan = scan->nextPCode) {
for (op = scan->args, i = scan->argCount; i--; op++) {
if (op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
(op->data.reg.effect & EffectWrite) &&
op->data.reg.reg == reg)
return 1;
}
}
} else {
return 1;
}
}
for (scan = use_pcode->block->firstPCode; scan; scan = scan->nextPCode) {
if (scan == use_pcode)
break;
for (op = scan->args, i = scan->argCount; i--; op++) {
if (op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
(op->data.reg.effect & EffectWrite) &&
op->data.reg.reg == reg)
return 1;
}
}
}
CError_ASSERT(598, object != NULL);
if (use_pcode->op == PC_ADDI || use_pcode->op == PC_ADD || use_pcode->op == PC_ADDIS) {
if (use_pcode->args[0].data.reg.reg < n_real_registers[RegClass_GPR] && !is_safe_const(object))
add_alias_member(worst_case, make_alias(object, 0, 0));
}
if (use_pcode->flags & (fIsRead | fIsWrite))
size = nbytes_loaded_or_stored_by(use_pcode);
if (use_pcode->args[2].kind == PCOp_REGISTER) {
if (use_pcode->args[1].data.reg.reg == 0) {
if (use_pcode->args[2].data.reg.reg == reg)
alias = make_alias(object, offset, size);
} else {
if (use_pcode->args[1].data.reg.reg == reg)
reg2 = use_pcode->args[2].data.reg.reg;
else if (use_pcode->args[2].data.reg.reg == reg)
reg2 = use_pcode->args[1].data.reg.reg;
else
return 1;
for (scan = use_pcode->prevPCode; scan; scan = scan->prevPCode) {
if (scan->op == PC_LI && scan->args[0].data.reg.reg == reg2)
break;
for (i = 0; i < scan->argCount; i++) {
if (scan->args[i].kind == PCOp_REGISTER &&
scan->args[i].arg == RegClass_GPR &&
scan->args[i].data.reg.reg == reg2 &&
(scan->args[i].data.reg.effect & EffectWrite)) {
scan = NULL;
break;
}
}
if (!scan)
break;
}
if (scan) {
offset += scan->args[1].data.mem.offset;
alias = make_alias(object, offset, size);
} else {
alias = make_alias(object, 0, 0);
}
}
} else {
if (use_pcode->args[1].kind != PCOp_REGISTER ||
use_pcode->args[1].arg != RegClass_GPR ||
use_pcode->args[1].data.reg.reg != reg)
return 1;
if (use_pcode->args[1].data.reg.effect & EffectWrite) {
alias = make_alias(object, 0, 0);
} else if (use_pcode->args[2].kind == PCOp_IMMEDIATE) {
if (use_pcode->op == PC_ADDIS) {
offset += use_pcode->args[2].data.imm.value << 16;
alias = make_alias(object, offset, 1);
} else {
offset += use_pcode->args[2].data.imm.value;
alias = make_alias(object, offset, size);
}
} else {
return 1;
}
}
if (flag24)
alias = make_alias(object, 0, 0);
if (!alias)
return 1;
if (!use_pcode->alias) {
if (
use_pcode->op == PC_ADDI ||
use_pcode->op == PC_ADD ||
use_pcode->op == PC_ADDIS ||
((candidate_pcode->flags & (fIsRead | fIsWrite)) && (candidate_pcode->flags & fUpdatesPtr))
)
recursive_propagation = 1;
}
if (use_pcode->alias) {
if (use_pcode->alias == worst_case) {
add_alias_member(worst_case, make_alias(object, 0, 0));
} else if (use_pcode->alias == alias) {
return 1;
} else if (use_pcode->alias->type == AliasType0 || use_pcode->alias->type == AliasType1) {
if (object == use_pcode->alias->object) {
use_pcode->alias = make_alias(object, 0, 0);
} else {
aliasSet = make_alias_set();
if (
use_pcode->op == PC_ADDI ||
use_pcode->op == PC_ADD ||
use_pcode->op == PC_ADDIS ||
((use_pcode->flags & (fIsRead | fIsWrite)) && (use_pcode->flags & fUpdatesPtr))
) {
if (alias->type == AliasType2)
add_alias_member(worst_case, alias);
else
add_alias_member(worst_case, make_alias(use_pcode->alias->object, 0, 0));
}
add_alias_member(aliasSet, use_pcode->alias);
add_alias_member(aliasSet, alias);
use_pcode->alias = aliasSet;
}
} else {
add_alias_member(use_pcode->alias, alias);
}
} else {
use_pcode->alias = alias;
}
propagated_instructions = 1;
return 1;
}
static void finishpropagatealiases(int id) {
propagated_instructions = 1;
}
static Propagation alias_prop = {
&is_address_load,
&addresspropagatestouse,
&finishpropagatealiases,
"ALIAS",
"ALIASES",
"A%" PRId32,
1
};
static void propagatealiasinfo(Object *proc) {
propagateinstructions(proc, &alias_prop, (copts.optimizationlevel >= 4) ? 4 : 1, 1);
}
void gather_alias_info(void) {
UInt32 *myvec; // r31
Alias *alias; // r22
AliasMember *member;
AliasMember *member2;
PCodeBlock *block; // r21
PCode *pcode; // r20
PCodeArg *op; // r19
RegUseOrDef *list; // r18
int i; // r17
Alias *alias_choice; // r16
int aliases_idx; // r15 (helper in r23)
PCode *defpcode; // r14
Alias *alias_array[3];
UseOrDef *def;
int defID;
if (coloring) {
propagatealiasinfo(gFunction);
myvec = oalloc(4 * ((number_of_Defs + 31) >> 5));
for (block = pcbasicblocks; block; block = block->nextBlock) {
bitvectorcopy(myvec, usedefinfo[block->blockIndex].defvec8, number_of_Defs);
for (pcode = block->firstPCode; pcode; pcode = pcode->nextPCode) {
if (pcode->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000)) {
if (!pcode->alias) {
pcode->alias = worst_case;
} else {
if ((pcode->alias->type == AliasType0 || pcode->alias->type == AliasType1) &&
pcode->alias->size == nbytes_loaded_or_stored_by(pcode)) {
pcode->flags &= ~fIsPtrOp;
} else {
pcode->flags |= fIsPtrOp;
}
if (pcode->alias != worst_case) {
aliases_idx = 0;
alias_choice = NULL;
op = pcode->args;
for (i = 0; i < pcode->argCount; i++, op++) {
if (
(!(pcode->flags & (fIsWrite | fPCodeFlag40000)) || op != pcode->args) &&
op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
(op->data.reg.effect & EffectRead)
) {
alias_array[aliases_idx] = NULL;
if (aliases_idx >= 2) {
alias_choice = worst_case;
break;
}
alias_array[aliases_idx] = pcode->alias;
for (list = reg_Defs[RegClass_GPR][op->data.reg.reg]; list; list = list->next) {
if (bitvectorgetbit(list->id, myvec)) {
defpcode = Defs[list->id].pcode;
if (!defpcode->alias || !is_address_load(defpcode) || defpcode->alias == worst_case) {
alias_array[aliases_idx] = worst_case;
break;
}
}
}
aliases_idx++;
}
}
if (!alias_choice) {
if (aliases_idx > 0) {
alias_choice = alias_array[0];
if (aliases_idx == 2) {
if (alias_array[0] != worst_case) {
if (alias_array[1] != worst_case)
alias_choice = worst_case;
} else if (alias_array[1] != worst_case) {
alias_choice = alias_array[1];
}
}
}
if (alias_choice == worst_case) {
pcode->flags |= fIsPtrOp;
if (pcode->alias->type == AliasType2)
add_alias_member(worst_case, pcode->alias);
else
add_alias_member(worst_case, make_alias(pcode->alias->object, 0, 0));
}
if (alias_choice)
pcode->alias = alias_choice;
}
}
}
} else {
if ((pcode->flags & fIsCall) && !pcode->alias)
pcode->alias = worst_case;
}
for (def = &Defs[defID = pcode->defID]; defID < number_of_Defs && def->pcode == pcode; defID++, def++) {
if (def->v.kind == PCOp_REGISTER && def->v.arg == RegClass_GPR) {
for (list = reg_Defs[RegClass_GPR][def->v.u.reg]; list; list = list->next)
bitvectorclearbit(list->id, myvec);
}
bitvectorsetbit(defID, myvec);
}
}
}
freeoheap();
} else {
for (block = pcbasicblocks; block; block = block->nextBlock) {
for (pcode = block->firstPCode; pcode; pcode = pcode->nextPCode) {
if ((pcode->flags & (fIsRead | fIsWrite | fIsCall | fPCodeFlag20000 | fPCodeFlag40000)) && !pcode->alias)
pcode->alias = worst_case;
}
}
}
if (n_gathered_aliases != n_aliases) {
for (alias = aliases; alias; alias = alias->next) {
if (alias->type == AliasType2) {
alias->vec24 = lalloc(4 * ((n_aliases + 31) >> 5));
bitvectorinitialize(alias->vec24, n_aliases, 0);
for (member = alias->parents; member; member = member->nextParent) {
bitvectorsetbit(member->child->index, alias->vec24);
for (member2 = member->child->parents; member2; member2 = member2->nextParent)
bitvectorsetbit(member2->child->index, alias->vec24);
}
}
}
n_gathered_aliases = n_aliases;
}
}
static Boolean may_alias_alias(Alias *a, Alias *b) {
switch ((a->type * 3) + b->type) {
case (AliasType0 * 3) + AliasType0:
return a == b;
case (AliasType0 * 3) + AliasType1:
case (AliasType1 * 3) + AliasType0:
return a->object == b->object;
case (AliasType1 * 3) + AliasType1:
return (a->object == b->object) && aliases_overlap(a, b);
case (AliasType0 * 3) + AliasType2:
case (AliasType1 * 3) + AliasType2:
return bitvectorgetbit(a->index, b->vec24) != 0;
case (AliasType2 * 3) + AliasType0:
case (AliasType2 * 3) + AliasType1:
return bitvectorgetbit(b->index, a->vec24) != 0;
case (AliasType2 * 3) + AliasType2:
return (a == b) || !bitvectorintersectionisempty(a->vec24, b->vec24, n_aliases);
default:
CError_FATAL(1054);
return 1;
}
}
Boolean may_alias(PCode *a, PCode *b) {
return may_alias_alias(a->alias, b->alias);
}
Boolean uniquely_aliases(PCode *a, PCode *b) {
if (may_alias_alias(a->alias, b->alias)) {
if (
a->alias->type != AliasType2 &&
b->alias->type != AliasType2 &&
a->alias &&
b->alias &&
a->alias->size == nbytes_loaded_or_stored_by(a) &&
b->alias->size == nbytes_loaded_or_stored_by(b)
)
return 1;
}
return 0;
}
Boolean may_alias_worst_case(PCode *pcode) {
return may_alias_alias(pcode->alias, worst_case);
}
Boolean may_alias_object(PCode *pcode, Object *object) {
return may_alias_alias(pcode->alias, make_alias(object, 0, 0));
}
void initialize_alias_values(void) {
Alias *alias;
for (alias = aliases; alias; alias = alias->next) {
alias->valuenumber = nextvaluenumber++;
alias->valuepcode = NULL;
}
}
void update_alias_value(Alias *alias, PCode *pcode) {
AliasMember *member;
AliasMember *member2;
AliasMember *member3;
switch (alias->type) {
case AliasType0:
killmemory(alias, pcode);
for (member = alias->children; member; member = member->nextChild) {
CError_ASSERT(1152, member->parent->type == AliasType2);
killmemory(member->parent, NULL);
}
for (member = alias->parents; member; member = member->nextParent) {
CError_ASSERT(1157, member->child->type == AliasType1);
killmemory(member->child, NULL);
for (member2 = member->child->children; member2; member2 = member2->nextChild) {
if (member2->parent != alias) {
CError_ASSERT(1163, member2->parent->type == AliasType2);
killmemory(member2->parent, NULL);
}
}
}
break;
case AliasType1:
killmemory(alias, pcode);
for (member = alias->children; member; member = member->nextChild) {
killmemory(member->parent, NULL);
if (member->parent->type == AliasType0) {
for (member2 = member->parent->parents; member2; member2 = member2->nextParent) {
if (member2->child != alias && aliases_overlap(alias, member2->child)) {
killmemory(member2->child, NULL);
}
}
}
}
break;
case AliasType2:
killmemory(alias, NULL);
for (member = alias->parents; member; member = member->nextParent) {
killmemory(member->child, NULL);
for (member2 = member->child->children; member2; member2 = member2->nextChild) {
if (member2->parent != alias)
killmemory(member2->parent, NULL);
}
for (member3 = member->child->parents; member3; member3 = member3->nextParent) {
killmemory(member3->child, NULL);
for (member2 = member3->child->children; member2; member2 = member2->nextChild) {
if (member2->parent != member->child)
killmemory(member2->parent, NULL);
}
}
}
break;
}
}
void update_all_alias_values(void) {
Alias *alias;
for (alias = aliases; alias; alias = alias->next)
killmemory(alias, NULL);
}

View File

@@ -0,0 +1,906 @@
#include "compiler/CodeMotion.h"
#include "compiler/Alias.h"
#include "compiler/BitVectors.h"
#include "compiler/LoopDetection.h"
#include "compiler/LoopOptimization.h"
#include "compiler/CompilerTools.h"
#include "compiler/PCode.h"
#include "compiler/UseDefChains.h"
#include "compiler/RegisterInfo.h"
int movedloopinvariantcode;
int unswitchedinvariantcode;
static int isloopinvariant(PCode *pcode, Loop *loop, UInt32 *vec, int flag1, int flag2) {
PCodeArg *op;
RegUseOrDef *list;
int i;
if (pcode->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000)) {
if (pcode->alias) {
if (pcode->alias->type == AliasType2 || (pcode->flags & (fIsVolatile | fSideEffects)))
return 0;
if (pcode->flags & fIsRead) {
for (list = findobjectusedef(pcode->alias->object)->defs; list; list = list->next) {
if (
may_alias(pcode, Defs[list->id].pcode) &&
bitvectorgetbit(list->id, vec) &&
bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)
)
return 0;
}
}
if (pcode->flags & fIsWrite) {
for (list = findobjectusedef(pcode->alias->object)->uses; list; list = list->next) {
if (
may_alias(pcode, Uses[list->id].pcode) &&
bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)
)
return 0;
}
}
} else {
return 0;
}
}
if ((pcode->flags & fIsWrite) && !bitvectorgetbit(pcode->block->blockIndex, loop->vec2C))
return 0;
op = pcode->args;
i = pcode->argCount;
while (i--) {
switch (op->kind) {
case PCOp_MEMORY:
if ((pcode->flags & fIsRead) && ((pcode->flags == 0) & 0x40)) {
for (list = findobjectusedef(op->data.mem.obj)->defs; list; list = list->next) {
if (
may_alias(pcode, Defs[list->id].pcode) &&
bitvectorgetbit(list->id, vec) &&
bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)
)
return 0;
}
}
if (pcode->flags & fIsWrite) {
for (list = findobjectusedef(op->data.mem.obj)->uses; list; list = list->next) {
if (
may_alias(pcode, Uses[list->id].pcode) &&
bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)
)
return 0;
}
}
break;
case PCOp_REGISTER:
if (op->data.reg.effect & (EffectRead | EffectWrite)) {
if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR) {
if (op->data.reg.reg == _FP_)
break;
if (op->data.reg.reg == _CALLER_SP_)
break;
if (op->data.reg.reg == 2)
break;
}
if (op->data.reg.reg < n_real_registers[op->arg]) {
if (op->arg == RegClass_CRFIELD) {
if (!flag2 || (op->data.reg.effect & EffectRead))
return 0;
} else if (op->arg == RegClass_SPR) {
if (!flag1)
return 0;
} else {
return 0;
}
} else if (op->data.reg.effect & EffectRead) {
if (flag1 && op->kind == PCOp_REGISTER && op->arg == RegClass_SPR)
break;
if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR) {
if (op->data.reg.reg == _FP_)
break;
if (op->data.reg.reg == _CALLER_SP_)
break;
if (op->data.reg.reg == 2)
break;
}
for (list = reg_Defs[op->arg][op->data.reg.reg]; list; list = list->next) {
if (
bitvectorgetbit(list->id, vec) &&
bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)
)
return 0;
}
}
}
break;
}
op++;
}
return 1;
}
static int isuniquedefinition(PCode *pcode, Loop *loop) {
RegUseOrDef *list;
int defID;
UseOrDef *def;
defID = pcode->defID;
def = &Defs[defID];
if (defID >= number_of_Defs)
return 0;
if (def->pcode != pcode)
return 0;
if ((defID + 1) < number_of_Defs && def[1].pcode == pcode)
return 0;
if (def->v.kind == PCOp_REGISTER) {
for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next) {
if (
bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) &&
list->id != defID
)
return 0;
}
} else if (def->v.kind == PCOp_MEMORY) {
for (list = findobjectusedef(def->v.u.object)->defs; list; list = list->next) {
if (
may_alias(pcode, Defs[list->id].pcode) &&
bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) &&
list->id != defID
)
return 0;
}
} else {
CError_FATAL(292);
}
return 1;
}
static int uniquelyreachesuse(int defID, int useID) {
UseOrDef *def;
UseOrDef *use;
RegUseOrDef *list;
PCode *pcode;
def = &Defs[defID];
use = &Uses[useID];
if (def->v.kind == PCOp_REGISTER) {
for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next) {
if (
list->id != defID &&
bitvectorgetbit(list->id, usedefinfo[use->pcode->block->blockIndex].defvec8)
)
break;
}
} else if (def->v.kind == PCOp_MEMORY) {
for (list = findobjectusedef(def->v.u.object)->defs; list; list = list->next) {
if (
may_alias(def->pcode, Defs[list->id].pcode) &&
list->id != defID &&
bitvectorgetbit(list->id, usedefinfo[use->pcode->block->blockIndex].defvec8)
)
break;
}
}
if (!list)
return 1;
if (def->pcode->block == use->pcode->block) {
for (pcode = use->pcode->prevPCode; pcode; pcode = pcode->prevPCode) {
if (pcode == def->pcode)
return 1;
}
}
return 0;
}
static int uniquelyreachesalluses(int defID, Loop *loop) {
UseOrDef *def;
RegUseOrDef *list;
def = &Defs[defID];
if (def->v.kind == PCOp_REGISTER) {
for (list = reg_Uses[def->v.arg][def->v.u.reg]; list; list = list->next) {
if (
bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].usevec1C) ||
(bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks) && !uniquelyreachesuse(defID, list->id))
)
return 0;
}
} else if (def->v.kind == PCOp_MEMORY) {
for (list = findobjectusedef(def->v.u.object)->uses; list; list = list->next) {
if (may_alias(def->pcode, Uses[list->id].pcode)) {
if (
bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].usevec1C) ||
(bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks) && !uniquelyreachesuse(defID, list->id))
)
return 0;
}
}
} else {
CError_FATAL(382);
}
return 1;
}
static int isliveonexit(TinyValue *v, Loop *loop) {
RegUseOrDef *list;
UInt32 *vec;
vec = usedefinfo[loop->preheader->blockIndex].usevec1C;
if (v->kind == PCOp_REGISTER) {
for (list = reg_Uses[v->arg][v->u.reg]; list; list = list->next) {
if (
bitvectorgetbit(list->id, vec) &&
!bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)
)
return 1;
}
} else if (v->kind == PCOp_MEMORY) {
for (list = findobjectusedef(v->u.object)->uses; list; list = list->next) {
if (
bitvectorgetbit(list->id, vec) &&
!bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)
)
return 1;
}
}
return 0;
}
static int dominatesallexits(PCode *pcode, Loop *loop) {
return bitvectorgetbit(pcode->block->blockIndex, loop->vec28) != 0;
}
static int maymove(PCode *pcode, Loop *loop) {
short reg;
if (!isuniquedefinition(pcode, loop))
return 0;
if (!uniquelyreachesalluses(pcode->defID, loop))
return 0;
if (!dominatesallexits(pcode, loop) && isliveonexit(&Defs[pcode->defID].v, loop))
return 0;
if (loop->bodySize > 25) {
switch (pcode->op) {
case PC_LI:
if (
pcode->nextPCode &&
pcode->nextPCode->op == PC_LVX &&
(pcode->nextPCode->flags & fIsConst)
) {
reg = pcode->args[0].data.reg.reg;
if (pcode->nextPCode->args[1].data.reg.reg == reg ||
pcode->nextPCode->args[2].data.reg.reg == reg)
return 1;
}
case PC_VSPLTISB:
case PC_VSPLTISH:
case PC_VSPLTISW:
return 0;
default:
if (!bitvectorgetbit(pcode->block->blockIndex, loop->vec2C))
return 0;
}
}
return 1;
}
static void moveinvariantcomputation(PCode *pcode, Loop *loop) {
ObjectUseDef *oud;
BlockList *blocklist;
RegUseOrDef *list;
UseOrDef *def;
int defID;
defID = pcode->defID;
def = &Defs[defID];
deletepcode(pcode);
insertpcodebefore(loop->preheader->lastPCode, pcode);
loop->bodySize--;
movedloopinvariantcode = 1;
if (def->v.kind == PCOp_REGISTER) {
for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next)
bitvectorclearbit(list->id, usedefinfo[blocklist->block->blockIndex].defvec8);
bitvectorsetbit(defID, usedefinfo[blocklist->block->blockIndex].defvec8);
}
} else if (def->v.kind == PCOp_MEMORY) {
oud = findobjectusedef(def->v.u.object);
for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
for (list = oud->defs; list; list = list->next) {
if (uniquely_aliases(pcode, Defs[list->id].pcode))
bitvectorclearbit(list->id, usedefinfo[blocklist->block->blockIndex].defvec8);
}
bitvectorsetbit(defID, usedefinfo[blocklist->block->blockIndex].defvec8);
}
} else {
CError_FATAL(545);
}
}
static int srawi_addze_maymove(PCode *pcode, Loop *loop) {
RegUseOrDef *list;
UseOrDef *def;
int defID;
int nextDefID;
defID = pcode->defID;
nextDefID = pcode->nextPCode->defID;
def = &Defs[defID];
if (defID >= number_of_Defs)
return 0;
if (def->pcode != pcode)
return 0;
if ((defID + 1) < number_of_Defs && def[1].pcode == pcode)
return 0;
if (def->v.kind == PCOp_REGISTER && def->v.arg == RegClass_GPR) {
for (list = reg_Defs[RegClass_GPR][def->v.u.reg]; list; list = list->next) {
if (
bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks) &&
list->id != defID &&
list->id != nextDefID
)
return 0;
}
} else {
CError_FATAL(582);
}
if (!uniquelyreachesalluses(pcode->defID, loop))
return 0;
if (!dominatesallexits(pcode, loop) && isliveonexit(&Defs[pcode->defID].v, loop))
return 0;
if (!dominatesallexits(pcode->nextPCode, loop) && isliveonexit(&Defs[pcode->nextPCode->defID].v, loop))
return 0;
return 1;
}
static int srawi_addze_isloopinvariant(PCode *pcode, Loop *loop, UInt32 *vec) {
static PCode *oldNextInstr;
PCode *nextInstr;
nextInstr = pcode->nextPCode;
if (
pcode->op == PC_ADDZE &&
oldNextInstr == pcode
) {
oldNextInstr = NULL;
return 1;
} else if (
pcode->op == PC_SRAWI &&
nextInstr &&
nextInstr->op == PC_ADDZE &&
pcode->args[0].data.reg.reg == nextInstr->args[0].data.reg.reg &&
nextInstr->args[0].data.reg.reg == nextInstr->args[1].data.reg.reg &&
!(pcode->flags & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects)) &&
!(nextInstr->flags & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects)) &&
isloopinvariant(pcode, loop, vec, 1, 0) &&
srawi_addze_maymove(pcode, loop)
) {
oldNextInstr = nextInstr;
return 1;
} else {
oldNextInstr = NULL;
return 0;
}
}
static void removeblockfromloop(Loop *loop, PCodeBlock *block) {
BlockList *list;
BlockList **ptr;
bitvectorclearbit(block->blockIndex, loop->memberblocks);
bitvectorclearbit(block->blockIndex, loop->vec24);
bitvectorclearbit(block->blockIndex, loop->vec28);
bitvectorclearbit(block->blockIndex, loop->vec2C);
loop->bodySize -= block->pcodeCount;
ptr = &loop->blocks;
while ((list = *ptr)) {
if (list->block == block)
*ptr = list->next;
else
ptr = &list->next;
}
}
static void changesuccessor(PCodeBlock *block, PCodeBlock *from, PCodeBlock *to) {
PCLink **ptr;
PCLink *link;
for (link = block->successors; link; link = link->nextLink) {
if (link->block == from)
link->block = to;
}
ptr = &from->predecessors;
while ((link = *ptr)) {
if (link->block == block) {
*ptr = link->nextLink;
link->nextLink = to->predecessors;
to->predecessors = link;
} else {
ptr = &link->nextLink;
}
}
}
static void movesuccessor(PCodeBlock *to, PCodeBlock *from, PCodeBlock *block) {
PCLink **ptr;
PCLink *link;
for (link = block->predecessors; link; link = link->nextLink) {
if (link->block == from)
link->block = to;
}
ptr = &from->successors;
while ((link = *ptr)) {
if (link->block == block) {
*ptr = link->nextLink;
link->nextLink = to->successors;
to->successors = link;
} else {
ptr = &link->nextLink;
}
}
}
static void movecmptopreheader(Loop *loop, PCodeBlock *block, PCode *pc1, PCode *pc2, PCodeArg *op) {
PCodeBlock *preheader;
PCode *pc3;
preheader = loop->preheader;
if (PCODE_FLAG_SET_F(pc1) & fRecordBit) {
moveinvariantcomputation(pc1, loop);
} else {
deletepcode(pc1);
insertpcodebefore(loop->preheader->lastPCode, pc1);
loop->bodySize--;
movedloopinvariantcode = 1;
}
loop->preheader = NULL;
insertpreheaderblock(loop);
pc3 = preheader->lastPCode;
CError_ASSERT(775, pc3->op == PC_B);
deletepcode(pc3);
deletepcode(pc2);
appendpcode(preheader, pc2);
movesuccessor(preheader, block, op->data.label.label->block);
}
static PCodeBlock *appendheadercopy(Loop *loop, PCodeBlock *block1, PCodeBlock *block2, PCodeBlock *block3) {
PCodeBlock *newblock1;
PCodeBlock *newblock2;
PCLink *link;
PCode *scan;
newblock1 = lalloc(sizeof(PCodeBlock));
newblock2 = lalloc(sizeof(PCodeBlock));
newblock1->labels = NULL;
newblock1->predecessors = newblock1->successors = NULL;
newblock1->firstPCode = newblock1->lastPCode = NULL;
newblock1->pcodeCount = 0;
newblock1->loopWeight = loop->body->loopWeight;
newblock1->flags = 0;
newblock1->blockIndex = pcblockcount++;
newblock2->labels = NULL;
newblock2->predecessors = newblock2->successors = NULL;
newblock2->firstPCode = newblock2->lastPCode = NULL;
newblock2->pcodeCount = 0;
newblock2->loopWeight = loop->body->loopWeight;
newblock2->flags = 0;
newblock2->blockIndex = pcblockcount++;
newblock1->nextBlock = newblock2;
newblock2->prevBlock = newblock1;
newblock1->prevBlock = block1;
newblock2->nextBlock = block1->nextBlock;
block1->nextBlock = newblock1;
newblock2->nextBlock->prevBlock = newblock2;
pclabel(newblock1, makepclabel());
pclabel(newblock2, makepclabel());
changesuccessor(block1, block1->successors->block, newblock1);
link = lalloc(sizeof(PCLink));
link->block = newblock2;
link->nextLink = newblock1->successors;
newblock1->successors = link;
link = lalloc(sizeof(PCLink));
link->block = newblock1;
link->nextLink = newblock2->predecessors;
newblock2->predecessors = link;
appendpcode(newblock2, makepcode(PC_B, block2->nextBlock->labels));
pcbranch(newblock2, block2->nextBlock->labels);
pccomputepredecessors1(newblock2);
for (scan = block2->firstPCode; scan; scan = scan->nextPCode)
appendpcode(newblock1, copypcode(scan));
pcbranch(newblock1, block3->labels);
link = lalloc(sizeof(PCLink));
link->block = newblock1;
link->nextLink = block3->predecessors;
block3->predecessors = link;
addblocktoloop(loop, newblock1);
if (bitvectorgetbit(block2->blockIndex, loop->vec28))
bitvectorsetbit(newblock1->blockIndex, loop->vec28);
if (bitvectorgetbit(block2->blockIndex, loop->vec2C))
bitvectorsetbit(newblock1->blockIndex, loop->vec2C);
for (loop = loop->parent; loop; loop = loop->parent) {
addblocktoloop(loop, newblock1);
if (bitvectorgetbit(block2->blockIndex, loop->vec28))
bitvectorsetbit(newblock1->blockIndex, loop->vec28);
if (bitvectorgetbit(block2->blockIndex, loop->vec2C))
bitvectorsetbit(newblock1->blockIndex, loop->vec2C);
addblocktoloop(loop, newblock2);
if (bitvectorgetbit(block2->blockIndex, loop->vec28))
bitvectorsetbit(newblock2->blockIndex, loop->vec28);
if (bitvectorgetbit(block2->blockIndex, loop->vec2C))
bitvectorsetbit(newblock2->blockIndex, loop->vec2C);
}
return newblock1;
}
static BlockList *findswitchpath(Loop *loop, PCodeBlock *block) {
BlockList *head;
BlockList *tail;
BlockList *node;
PCodeBlock *scan;
head = NULL;
tail = NULL;
for (scan = block; scan && scan != loop->body; scan = scan->successors->block) {
if (!bitvectorgetbit(scan->blockIndex, loop->memberblocks))
return NULL;
if (scan->successors && scan->successors->nextLink)
return NULL;
node = oalloc(sizeof(BlockList));
node->block = scan;
node->next = NULL;
if (head) {
tail->next = node;
tail = node;
} else {
head = node;
tail = node;
}
}
return head;
}
static void simpleunswitchloop(Loop *loop) {
PCode *pc29;
PCodeArg *op27;
UInt32 *myvec;
PCodeBlock *block26;
PCode *pc25; // r25
BlockList *path2_24;
PCodeArg *op23;
PCode *pc23; // r23
BlockList *scanlist; // r23
BlockList *bestpath1; // r23
BlockList *bestpath2; // r22
PCodeBlock *headercopy; // r22
Loop *newloop; // r21
PCodeBlock *preheader21;
BlockList *path20;
PCode *scan20;
PCode *lastpcode;
int i;
BlockList *pathiter1;
BlockList *pathiter2;
if (!(lastpcode = loop->body->lastPCode))
return;
if (lastpcode->op != PC_BT && lastpcode->op != PC_BF)
return;
if (lastpcode->args[2].kind != PCOp_LABEL)
return;
if (!bitvectorgetbit(lastpcode->args[2].data.label.label->block->blockIndex, loop->memberblocks))
return;
if (loop->x57)
return;
if (loop->x4D)
return;
if (bitvectorgetbit(loop->body->nextBlock->blockIndex, loop->memberblocks))
return;
for (block26 = pcbasicblocks; block26; block26 = block26->nextBlock) {
if (bitvectorgetbit(block26->blockIndex, loop->memberblocks))
break;
}
if (!block26)
return;
myvec = oalloc(4 * ((number_of_Defs + 31) >> 5));
bitvectorcopy(myvec, usedefinfo[block26->blockIndex].defvec8, number_of_Defs);
for (pc25 = loop->preheader->nextBlock->firstPCode; pc25; pc25 = pc25->nextPCode) {
if (!(PCODE_FLAG_SET_F(pc25) & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects | fRecordBit))) {
if (isloopinvariant(pc25, loop, myvec, 0, 1))
break;
}
}
if (!pc25 || pc25->argCount < 1)
return;
if (
pc25->argCount < 1 ||
pc25->args[0].kind != PCOp_REGISTER ||
pc25->args[0].arg != RegClass_CRFIELD
)
return;
pc29 = pc25->block->lastPCode;
if (
!pc29 ||
!(pc29->flags & fIsBranch) ||
pc29->args[0].kind != PCOp_REGISTER ||
pc29->args[0].arg != RegClass_CRFIELD
)
return;
if (pc29->args[0].data.reg.reg != pc25->args[0].data.reg.reg)
return;
op27 = NULL;
for (i = 0; i < pc29->argCount; i++) {
if (pc29->args[i].kind == PCOp_LABEL)
op27 = &pc29->args[i];
}
if (op27) {
preheader21 = loop->preheader;
path20 = findswitchpath(loop, block26->nextBlock);
if (!path20)
return;
path2_24 = findswitchpath(loop, op27->data.label.label->block);
if (!path2_24)
return;
bestpath1 = NULL;
bestpath2 = NULL;
for (pathiter1 = path20; pathiter1; pathiter1 = pathiter1->next) {
for (pathiter2 = path2_24; pathiter2; pathiter2 = pathiter2->next) {
if (pathiter1->block == pathiter2->block) {
bestpath1 = pathiter1;
break;
}
}
if (bestpath1)
break;
bestpath2 = pathiter1;
}
CError_ASSERT(1192, bestpath2->block);
if (bestpath2->block->lastPCode && bestpath2->block->lastPCode->op == PC_B)
deletepcode(bestpath2->block->lastPCode);
while (bestpath1) {
for (scan20 = bestpath1->block->firstPCode; scan20; scan20 = scan20->nextPCode) {
if (scan20->op != PC_B)
appendpcode(bestpath2->block, copypcode(scan20));
}
bestpath1 = bestpath1->next;
}
headercopy = appendheadercopy(loop, bestpath2->block, loop->body, block26);
movecmptopreheader(loop, block26, pc25, pc29, op27);
if (block26->pcodeCount) {
if (path2_24->block->firstPCode) {
pc23 = path2_24->block->firstPCode;
for (scan20 = block26->firstPCode; scan20; scan20 = scan20->nextPCode) {
if (scan20->op != PC_B)
insertpcodebefore(pc23, copypcode(scan20));
}
} else {
for (scan20 = block26->firstPCode; scan20; scan20 = scan20->nextPCode) {
if (scan20->op != PC_B)
appendpcode(path2_24->block, copypcode(scan20));
}
}
}
op23 = NULL;
for (i = 0; i < loop->body->lastPCode->argCount; i++) {
if (loop->body->lastPCode->args[i].kind == PCOp_LABEL)
op23 = &loop->body->lastPCode->args[i];
}
CError_ASSERT(1250, op23 != NULL);
changesuccessor(loop->body, op23->data.label.label->block, path2_24->block);
op23->data.label.label = path2_24->block->labels;
op23 = NULL;
for (i = 0; i < preheader21->lastPCode->argCount; i++) {
if (preheader21->lastPCode->args[i].kind == PCOp_LABEL)
op23 = &preheader21->lastPCode->args[i];
}
CError_ASSERT(1267, op23 != NULL);
changesuccessor(preheader21, op23->data.label.label->block, loop->body);
op23->data.label.label = loop->body->labels;
op23 = NULL;
for (i = 0; i < loop->preheader->lastPCode->argCount; i++) {
if (loop->preheader->lastPCode->args[i].kind == PCOp_LABEL)
op23 = &loop->preheader->lastPCode->args[i];
}
CError_ASSERT(1284, op23 != NULL);
changesuccessor(loop->preheader, op23->data.label.label->block, headercopy);
op23->data.label.label = headercopy->labels;
newloop = lalloc(sizeof(Loop));
newloop->parent = loop->parent;
newloop->children = NULL;
newloop->nextSibling = loop->nextSibling;
loop->nextSibling = newloop;
newloop->body = loop->body;
newloop->preheader = NULL;
newloop->blocks = NULL;
newloop->basicInductionVars = NULL;
newloop->footer = NULL;
newloop->pc18 = NULL;
newloop->loopWeight = loop->loopWeight;
bitvectorinitialize(newloop->memberblocks = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
bitvectorinitialize(newloop->vec24 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
bitvectorinitialize(newloop->vec28 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
bitvectorinitialize(newloop->vec2C = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
removeblockfromloop(loop, newloop->body);
addblocktoloop(newloop, newloop->body);
bitvectorsetbit(newloop->body->blockIndex, newloop->vec24);
bitvectorsetbit(newloop->body->blockIndex, newloop->vec2C);
bitvectorsetbit(newloop->body->blockIndex, newloop->vec28);
for (scanlist = path2_24; scanlist; scanlist = scanlist->next) {
removeblockfromloop(loop, scanlist->block);
addblocktoloop(newloop, scanlist->block);
bitvectorsetbit(scanlist->block->blockIndex, newloop->vec2C);
}
newloop->preheader = NULL;
insertpreheaderblock(newloop);
analyzeloop(newloop);
loop->body = headercopy;
for (scanlist = loop->blocks; scanlist; scanlist = scanlist->next)
bitvectorsetbit(scanlist->block->blockIndex, loop->vec2C);
bitvectorsetbit(headercopy->blockIndex, loop->vec24);
analyzeloop(loop);
unswitchedinvariantcode = 1;
}
}
static void simpleunswitchloops(Loop *loop) {
while (loop) {
if (loop->children)
simpleunswitchloops(loop->children);
else if (!loop->x4F)
simpleunswitchloop(loop);
loop = loop->nextSibling;
}
}
static void moveinvariantsfromloop(Loop *loop) {
RegUseOrDef *list;
BlockList *blocklist;
PCode *instr;
PCode *nextInstr;
UInt32 *myvec;
UseOrDef *def;
int defID;
int flag;
PCodeBlock *block;
myvec = oalloc(4 * ((number_of_Defs + 31) >> 5));
do {
flag = 0;
for (blocklist = loop->blocks; blocklist; blocklist = blocklist->next) {
block = blocklist->block;
bitvectorcopy(myvec, usedefinfo[block->blockIndex].defvec8, number_of_Defs);
for (instr = block->firstPCode; instr; instr = nextInstr) {
nextInstr = instr->nextPCode;
if (!(instr->flags & fIsBranch) && instr->argCount) {
if (
!(instr->flags & (fIsCall | fIsPtrOp | fIsVolatile | fSideEffects)) &&
isloopinvariant(instr, loop, myvec, 0, 0) &&
maymove(instr, loop)
) {
moveinvariantcomputation(instr, loop);
flag = 1;
} else if (srawi_addze_isloopinvariant(instr, loop, myvec)) {
moveinvariantcomputation(instr, loop);
flag = 1;
}
for (def = &Defs[defID = instr->defID]; defID < number_of_Defs && def->pcode == instr; def++, defID++) {
if (def->v.kind == PCOp_REGISTER) {
for (list = reg_Defs[def->v.arg][def->v.u.reg]; list; list = list->next)
bitvectorclearbit(list->id, myvec);
} else if (def->v.kind == PCOp_MEMORY) {
if (def->v.arg == PCOpMemory0) {
for (list = findobjectusedef(def->v.u.object)->defs; list; list = list->next) {
if (uniquely_aliases(instr, Defs[list->id].pcode))
bitvectorclearbit(list->id, myvec);
}
}
} else {
CError_FATAL(1434);
}
bitvectorsetbit(defID, myvec);
}
}
}
}
} while (flag);
}
static void moveinvariantsfromloops(Loop *loop) {
while (loop) {
if (loop->children)
moveinvariantsfromloops(loop->children);
moveinvariantsfromloop(loop);
loop = loop->nextSibling;
}
}
void moveloopinvariantcode(void) {
unswitchedinvariantcode = 0;
movedloopinvariantcode = 0;
if (loopsinflowgraph) {
moveinvariantsfromloops(loopsinflowgraph);
simpleunswitchloops(loopsinflowgraph);
}
freeoheap();
}

View File

@@ -0,0 +1,643 @@
#include "compiler/ConstantPropagation.h"
#include "compiler/Alias.h"
#include "compiler/BitVectors.h"
#include "compiler/CompilerTools.h"
#include "compiler/PCode.h"
#include "compiler/PCodeInfo.h"
#include "compiler/RegisterInfo.h"
#include "compiler/StackFrame.h"
#include "compiler/UseDefChains.h"
#include "compiler/objects.h"
int propagatedconstants;
static int changed;
static PCode **defininginstruction;
static PCode **vrdefininginstruction;
static void computedefininginstructions(PCodeBlock *block) {
RegUseOrDef *list;
PCode *instr;
int i;
for (i = 0; i < used_virtual_registers[RegClass_GPR]; i++) {
instr = NULL;
for (list = reg_Defs[RegClass_GPR][i]; list; list = list->next) {
if (bitvectorgetbit(list->id, usedefinfo[block->blockIndex].defvec8)) {
if (instr == NULL) {
instr = Defs[list->id].pcode;
} else {
instr = NULL;
break;
}
}
}
defininginstruction[i] = instr;
}
for (i = 0; i < used_virtual_registers[RegClass_VR]; i++) {
instr = NULL;
for (list = reg_Defs[RegClass_VR][i]; list; list = list->next) {
if (bitvectorgetbit(list->id, usedefinfo[block->blockIndex].defvec8)) {
if (instr == NULL) {
instr = Defs[list->id].pcode;
} else {
instr = NULL;
break;
}
}
}
vrdefininginstruction[i] = instr;
}
}
static PCode *isstackoperand(PCodeArg *op, SInt16 *resultValue, SInt16 addend) {
PCode *instr;
if ((instr = defininginstruction[op->data.reg.reg]) && instr->op == PC_ADDI) {
if (
instr->args[2].kind == PCOp_MEMORY &&
(instr->args[1].data.reg.reg == _FP_ || instr->args[1].data.reg.reg == _CALLER_SP_) &&
instr->args[2].data.mem.obj->datatype == DLOCAL
)
{
if (can_add_displ_to_local(instr->args[2].data.mem.obj, addend)) {
*resultValue = instr->args[2].data.mem.offset;
return instr;
} else {
return NULL;
}
} else {
return NULL;
}
} else {
return NULL;
}
}
static int isconstantoperand(PCodeArg *op, SInt16 *resultValue) {
PCode *instr;
if (
(instr = defininginstruction[op->data.reg.reg]) &&
instr->op == PC_LI &&
instr->args[1].kind == PCOp_IMMEDIATE
)
{
*resultValue = instr->args[1].data.imm.value;
return 1;
} else {
return 0;
}
}
static int isuint16constantoperand(PCodeArg *op, SInt16 *resultValue) {
PCode *instr;
if (
(instr = defininginstruction[op->data.reg.reg]) &&
instr->op == PC_LI &&
instr->args[1].kind == PCOp_IMMEDIATE &&
FITS_IN_USHORT(instr->args[1].data.imm.value)
)
{
*resultValue = instr->args[1].data.imm.value;
return 1;
} else {
return 0;
}
}
static int isvectorconstantoperand(PCodeArg *op, SInt16 *resultValue, Opcode *resultNewOp) {
PCode *instr;
if (
(instr = vrdefininginstruction[op->data.reg.reg]) &&
(instr->op == PC_VSPLTISB || instr->op == PC_VSPLTISH || instr->op == PC_VSPLTISW) &&
instr->args[1].kind == PCOp_IMMEDIATE
)
{
*resultValue = instr->args[1].data.imm.value;
*resultNewOp = instr->op;
return 1;
} else {
return 0;
}
}
static int isunsignedloadoperand(PCodeArg *op) {
PCode *instr;
if ((instr = defininginstruction[op->data.reg.reg])) {
if (instr->flags & fIsRead) {
if (instr->op >= PC_LHZ && instr->op <= PC_LHZUX)
return 2;
if (instr->op >= PC_LBZ && instr->op <= PC_LBZUX)
return 1;
} else if (instr->op == PC_RLWINM) {
int var3 = instr->args[3].data.imm.value;
int var4 = instr->args[4].data.imm.value;
if (var4 == 31) {
if (var3 == 24)
return 1;
if (var3 == 16)
return 2;
}
}
}
return 0;
}
static int ismaskedoperand(PCodeArg *op, UInt32 *resultMask) {
PCode *instr;
UInt32 mask;
if ((instr = defininginstruction[op->data.reg.reg]) && instr->op == PC_RLWINM) {
if (instr->args[3].data.imm.value <= instr->args[4].data.imm.value) {
mask =
((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) &
~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1)));
} else {
mask =
((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) |
~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1)));
}
*resultMask = mask;
return 1;
}
return 0;
}
static int issignedloadoperand(PCodeArg *op) {
PCode *instr;
if ((instr = defininginstruction[op->data.reg.reg])) {
if (instr->flags & fIsRead) {
if (instr->op >= PC_LHA && instr->op <= PC_LHAUX)
return 2;
} else if (instr->op == PC_EXTSB) {
return 1;
} else if (instr->op == PC_EXTSH) {
return 2;
}
}
return 0;
}
static void propagateconstantstoblock(PCodeBlock *block) {
PCode *instr;
SInt16 immAddend;
SInt16 value1;
SInt16 valueU16;
Opcode newOpcode;
SInt16 value2;
UInt32 mask;
UInt32 mask2;
int loadSize;
PCodeArg *op;
int i;
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
switch (instr->op) {
case PC_MR:
if (isconstantoperand(&instr->args[1], &value1)) {
change_opcode(instr, PC_LI);
instr->args[1].kind = PCOp_IMMEDIATE;
instr->args[1].data.imm.value = value1;
instr->args[1].data.imm.obj = NULL;
propagatedconstants = 1;
changed = 1;
}
break;
case PC_VMR:
if (isvectorconstantoperand(&instr->args[1], &value1, &newOpcode)) {
change_opcode(instr, newOpcode);
instr->args[1].kind = PCOp_IMMEDIATE;
instr->args[1].data.imm.value = value1;
instr->args[1].data.imm.obj = NULL;
propagatedconstants = 1;
changed = 1;
}
break;
case PC_RLWINM:
if (
!(PCODE_FLAG_SET_F(instr) & fRecordBit) &&
instr->args[2].data.imm.value == 0 &&
instr->args[4].data.imm.value == 31
)
{
if (isconstantoperand(&instr->args[1], &value1)) {
if (
(instr->args[3].data.imm.value == 16 && value1 == (value1 & 0x7FFF)) ||
(instr->args[3].data.imm.value == 24 && value1 == (value1 & 0xFF))
)
{
change_opcode(instr, PC_LI);
instr->args[1].kind = PCOp_IMMEDIATE;
instr->args[1].data.imm.value = value1;
instr->args[1].data.imm.obj = NULL;
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
break;
}
}
loadSize = isunsignedloadoperand(&instr->args[1]);
if (
(loadSize == 2 && instr->args[3].data.imm.value <= 16) ||
(loadSize == 1 && instr->args[3].data.imm.value <= 24)
)
{
change_opcode(instr, PC_MR);
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
break;
}
if (ismaskedoperand(&instr->args[1], &mask)) {
if (instr->args[3].data.imm.value <= instr->args[4].data.imm.value) {
mask2 =
((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) &
~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1)));
} else {
mask2 =
((instr->args[3].data.imm.value > 31) ? 0 : (0xFFFFFFFFU >> instr->args[3].data.imm.value)) |
~(((instr->args[4].data.imm.value + 1) > 31) ? 0 : (0xFFFFFFFFU >> (instr->args[4].data.imm.value + 1)));
}
if (mask == (mask & mask2)) {
change_opcode(instr, PC_MR);
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
}
}
}
break;
case PC_EXTSH:
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
break;
if (isconstantoperand(&instr->args[1], &value1)) {
change_opcode(instr, PC_LI);
instr->args[1].kind = PCOp_IMMEDIATE;
instr->args[1].data.imm.value = value1;
instr->args[1].data.imm.obj = NULL;
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
break;
}
loadSize = issignedloadoperand(&instr->args[1]);
if (loadSize == 1 || loadSize == 2) {
change_opcode(instr, PC_MR);
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
}
break;
case PC_EXTSB:
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
break;
if (
isconstantoperand(&instr->args[1], &value1) &&
value1 >= -128 &&
value1 <= 127
)
{
change_opcode(instr, PC_LI);
instr->args[1].kind = PCOp_IMMEDIATE;
instr->args[1].data.imm.value = value1;
instr->args[1].data.imm.obj = NULL;
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
break;
}
loadSize = issignedloadoperand(&instr->args[1]);
if (loadSize == 1) {
change_opcode(instr, PC_MR);
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
}
break;
case PC_ADDI:
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
break;
immAddend = instr->args[2].data.imm.value;
if (
isconstantoperand(&instr->args[1], &value1) &&
FITS_IN_SHORT(immAddend + value1)
)
{
change_opcode(instr, PC_LI);
instr->args[1].kind = PCOp_IMMEDIATE;
instr->args[1].data.imm.value = immAddend + value1;
instr->args[1].data.imm.obj = NULL;
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
}
break;
case PC_ADD:
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
break;
if (isconstantoperand(&instr->args[2], &value1)) {
if (value1 == 0) {
change_opcode(instr, PC_MR);
change_num_operands(instr, 2);
} else {
change_opcode(instr, PC_ADDI);
instr->args[2].kind = PCOp_IMMEDIATE;
instr->args[2].data.imm.value = value1;
instr->args[2].data.imm.obj = NULL;
}
propagatedconstants = 1;
changed = 1;
immAddend = value1;
}
if (isconstantoperand(&instr->args[1], &value1)) {
if (instr->op == PC_ADDI || instr->op == PC_MR) {
if (FITS_IN_SHORT(immAddend + value1)) {
change_opcode(instr, PC_LI);
instr->args[1].kind = PCOp_IMMEDIATE;
instr->args[1].data.imm.value = immAddend + value1;
instr->args[1].data.imm.obj = NULL;
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
}
} else {
instr->args[1] = instr->args[2];
if (value1 == 0) {
change_opcode(instr, PC_MR);
change_num_operands(instr, 2);
} else {
change_opcode(instr, PC_ADDI);
instr->args[2].kind = PCOp_IMMEDIATE;
instr->args[2].data.imm.value = value1;
instr->args[2].data.imm.obj = NULL;
}
propagatedconstants = 1;
changed = 1;
}
}
if (changed) {
if (instr->op == PC_MR) {
PCode *stackInstr;
if ((stackInstr = isstackoperand(&instr->args[1], &value1, 0))) {
change_opcode(instr, PC_ADDI);
instr->flags = stackInstr->flags;
instr->args[1] = stackInstr->args[1];
instr->args[2] = stackInstr->args[2];
change_num_operands(instr, 3);
propagatedconstants = 1;
changed = 1;
}
} else if (instr->op == PC_ADDI && instr->args[2].kind == PCOp_IMMEDIATE) {
PCode *stackInstr;
SInt16 addend = instr->args[2].data.imm.value;
if ((stackInstr = isstackoperand(&instr->args[1], &value1, addend))) {
change_opcode(instr, PC_ADDI);
instr->flags = stackInstr->flags;
instr->args[1] = stackInstr->args[1];
instr->args[2] = stackInstr->args[2];
instr->args[2].data.imm.value = value1 + addend;
if (instr->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000))
instr->alias = make_alias(instr->args[2].data.imm.obj, instr->args[2].data.imm.value, 1);
propagatedconstants = 1;
changed = 1;
}
}
}
break;
case PC_OR:
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
break;
value1 = 0;
immAddend = 0;
if (isconstantoperand(&instr->args[2], &value1)) {
if (isuint16constantoperand(&instr->args[2], &valueU16)) {
if (valueU16 != 0) {
change_opcode(instr, PC_ORI);
instr->args[2].kind = PCOp_IMMEDIATE;
instr->args[2].data.imm.value = valueU16;
instr->args[2].data.imm.obj = NULL;
propagatedconstants = 1;
changed = 1;
} else {
change_opcode(instr, PC_MR);
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
}
value1 = valueU16;
} else if (value1 == 0) {
change_opcode(instr, PC_MR);
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
}
immAddend = value1;
}
if (isconstantoperand(&instr->args[1], &value1)) {
if (instr->op == PC_ORI || instr->op == PC_MR) {
change_opcode(instr, PC_LI);
instr->args[1].kind = PCOp_IMMEDIATE;
instr->args[1].data.imm.value = immAddend | value1;
instr->args[1].data.imm.obj = NULL;
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
} else if (isuint16constantoperand(&instr->args[1], &valueU16)) {
if (valueU16 != 0) {
change_opcode(instr, PC_ORI);
instr->args[1] = instr->args[2];
instr->args[2].kind = PCOp_IMMEDIATE;
instr->args[2].data.imm.value = valueU16;
instr->args[2].data.imm.obj = NULL;
propagatedconstants = 1;
changed = 1;
} else {
change_opcode(instr, PC_MR);
instr->args[1] = instr->args[2];
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
}
} else if (value1 == 0) {
change_opcode(instr, PC_MR);
instr->args[1] = instr->args[2];
change_num_operands(instr, 2);
propagatedconstants = 1;
changed = 1;
}
}
break;
case PC_SUBF:
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
break;
if (isconstantoperand(&instr->args[1], &value1) && FITS_IN_SHORT(-value1)) {
if (isconstantoperand(&instr->args[2], &value2) && FITS_IN_SHORT(value2 - value1)) {
change_opcode(instr, PC_LI);
instr->args[1].kind = PCOp_IMMEDIATE;
instr->args[1].data.imm.value = value2 - value1;
instr->args[1].data.imm.obj = NULL;
change_num_operands(instr, 2);
} else if (value1 == 0) {
change_opcode(instr, PC_MR);
instr->args[1] = instr->args[2];
change_num_operands(instr, 2);
} else {
change_opcode(instr, PC_ADDI);
instr->args[1] = instr->args[2];
instr->args[2].kind = PCOp_IMMEDIATE;
instr->args[2].data.imm.value = -value1;
instr->args[2].data.imm.obj = NULL;
}
propagatedconstants = 1;
changed = 1;
value2 = value1;
} else if (isconstantoperand(&instr->args[2], &value1) && FITS_IN_SHORT(-value1)) {
if (value1 == 0) {
change_opcode(instr, PC_NEG);
change_num_operands(instr, 2);
} else {
instr->flags = opcodeinfo[PC_SUBFIC].flags | (instr->flags & ~opcodeinfo[PC_SUBF].flags);
change_opcode(instr, PC_SUBFIC);
instr->args[2].kind = PCOp_IMMEDIATE;
instr->args[2].data.imm.value = value1;
instr->args[2].data.imm.obj = NULL;
instr->args[3].kind = PCOp_REGISTER;
instr->args[3].arg = RegClass_SPR;
instr->args[3].data.reg.reg = 0;
instr->args[3].data.reg.effect = EffectWrite;
change_num_operands(instr, 4);
}
propagatedconstants = 1;
changed = 1;
}
break;
case PC_LBZ:
case PC_LHZ:
case PC_LHA:
case PC_LWZ:
case PC_STB:
case PC_STH:
case PC_STW:
case PC_LFS:
case PC_LFD:
case PC_STFS:
case PC_STFD:
if (instr->args[2].kind == PCOp_IMMEDIATE) {
PCode *stackInstr;
SInt16 addend = instr->args[2].data.imm.value;
if ((stackInstr = isstackoperand(&instr->args[1], &value1, addend))) {
instr->args[1] = stackInstr->args[1];
instr->args[2] = stackInstr->args[2];
instr->args[2].data.imm.value = value1 + addend;
if (instr->flags & (fIsRead | fIsWrite | fPCodeFlag20000 | fPCodeFlag40000))
instr->alias = make_alias(instr->args[2].data.imm.obj, instr->args[2].data.imm.value,
nbytes_loaded_or_stored_by(instr));
propagatedconstants = 1;
changed = 1;
}
}
break;
case PC_LBZX:
case PC_LHZX:
case PC_LHAX:
case PC_LWZX:
case PC_STBX:
case PC_STHX:
case PC_STWX:
case PC_LFSX:
case PC_LFDX:
case PC_STFSX:
case PC_STFDX:
if (isconstantoperand(&instr->args[2], &value1)) {
instr->op -= 2;
instr->args[2].kind = PCOp_IMMEDIATE;
instr->args[2].data.imm.value = value1;
instr->args[2].data.imm.obj = NULL;
propagatedconstants = 1;
changed = 1;
} else if (isconstantoperand(&instr->args[1], &value1)) {
instr->op -= 2;
instr->args[1] = instr->args[2];
instr->args[2].kind = PCOp_IMMEDIATE;
instr->args[2].data.imm.value = value1;
instr->args[2].data.imm.obj = NULL;
propagatedconstants = 1;
changed = 1;
}
break;
}
for (i = 0, op = instr->args; i < instr->argCount; i++, op++) {
if (
op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
(op->data.reg.effect & EffectWrite)
)
{
defininginstruction[op->data.reg.reg] = instr;
}
else if (
op->kind == PCOp_REGISTER &&
op->arg == RegClass_VR &&
(op->data.reg.effect & EffectWrite)
)
{
vrdefininginstruction[op->data.reg.reg] = instr;
}
}
}
}
void propagateconstants(void) {
PCodeBlock *block;
int i;
propagatedconstants = 0;
computeusedefchains(0);
defininginstruction = galloc(sizeof(PCode *) * used_virtual_registers[RegClass_GPR]);
vrdefininginstruction = galloc(sizeof(PCode *) * used_virtual_registers[RegClass_VR]);
do {
changed = 0;
for (i = 0; i < pcblockcount; i++) {
if ((block = depthfirstordering[i])) {
computedefininginstructions(block);
propagateconstantstoblock(block);
}
}
} while (changed);
freeoheap();
}

View File

@@ -0,0 +1,885 @@
#include "compiler/LoopDetection.h"
#include "compiler/CFunc.h"
#include "compiler/PCode.h"
#include "compiler/TOC.h"
#include "compiler/UseDefChains.h"
#include "compiler/CompilerTools.h"
#include "compiler/BitVectors.h"
#include "compiler/enode.h"
#include "compiler/objects.h"
Loop *loopsinflowgraph;
int loopdetection_nblocks;
static UInt32 **dominators;
static BlockList *loopheaders;
static int nloopheaders;
static PCodeBlock **loopstack;
BitVector *LoopTemp;
struct LoopList *LoopList_First;
static void computedominators(void) {
int i;
PCodeBlock *block;
int blockCount;
int flag;
UInt32 *myvec;
PCLink *link;
blockCount = pcblockcount;
flag = 1;
dominators = oalloc(sizeof(UInt32 *) * pcblockcount);
for (i = 0; i < pcblockcount; i++)
dominators[i] = oalloc(4 * ((blockCount + 31) >> 5));
myvec = oalloc(4 * ((blockCount + 31) >> 5));
bitvectorinitialize(dominators[pcbasicblocks->blockIndex], blockCount, 0);
//dominators[pcbasicblocks->blockIndex][0] |= 1;
bitvectorsetbit(0, dominators[pcbasicblocks->blockIndex]);
for (block = pcbasicblocks->nextBlock; block; block = block->nextBlock)
bitvectorinitialize(dominators[block->blockIndex], blockCount, 0xFFFFFFFF);
computedepthfirstordering();
while (flag) {
flag = 0;
for (i = 0; i < pcblockcount; i++) {
block = depthfirstordering[i];
if (block && block->blockIndex != pcbasicblocks->blockIndex) {
bitvectorcopy(myvec, dominators[block->predecessors->block->blockIndex], blockCount);
for (link = block->predecessors->nextLink; link; link = link->nextLink)
bitvectorintersect(myvec, dominators[link->block->blockIndex], blockCount);
//myvec[block->blockIndex >> 5] |= 1 << (block->blockIndex & 31);
bitvectorsetbit(block->blockIndex, myvec);
if (bitvectorchanged(dominators[block->blockIndex], myvec, blockCount))
flag = 1;
}
}
}
}
static BlockList *findloopheaders(void) {
PCodeBlock *block;
PCLink *link;
BlockList *list;
loopheaders = NULL;
nloopheaders = 0;
for (block = pcbasicblocks->nextBlock; block; block = block->nextBlock) {
for (link = block->predecessors; link; link = link->nextLink) {
//if ((1 << (block->blockIndex & 31)) & dominators[link->block->blockIndex][block->blockIndex >> 5])
if (bitvectorgetbit(block->blockIndex, dominators[link->block->blockIndex]))
break;
}
if (link) {
list = oalloc(sizeof(BlockList));
list->block = block;
list->next = loopheaders;
loopheaders = list;
nloopheaders++;
}
}
return loopheaders;
}
void addblocktoloop(Loop *loop, PCodeBlock *block) {
BlockList *list = lalloc(sizeof(BlockList));
//loop->memberblocks[block->blockIndex >> 5] |= 1 << (block->blockIndex & 31);
bitvectorsetbit(block->blockIndex, loop->memberblocks);
list->block = block;
list->next = loop->blocks;
loop->blocks = list;
}
static void findnaturalloop(Loop *loop) {
BlockList *list;
BlockList *list2;
PCLink *link;
PCodeBlock *block;
int i;
i = 0;
addblocktoloop(loop, loop->body);
for (link = loop->body->predecessors; link; link = link->nextLink) {
if (bitvectorgetbit(loop->body->blockIndex, dominators[link->block->blockIndex]) && link->block != loop->body) {
addblocktoloop(loop, link->block);
loopstack[i++] = link->block;
}
}
while (i) {
link = loopstack[--i]->predecessors;
while (link) {
if (!bitvectorgetbit(link->block->blockIndex, loop->memberblocks)) {
addblocktoloop(loop, link->block);
loopstack[i++] = link->block;
}
link = link->nextLink;
}
}
for (list = loop->blocks; list; list = list->next) {
block = list->block;
for (link = block->successors; link; link = link->nextLink) {
if (!bitvectorgetbit(link->block->blockIndex, loop->memberblocks)) {
bitvectorsetbit(block->blockIndex, loop->vec24);
break;
}
}
}
for (list = loop->blocks; list; list = list->next) {
for (list2 = loop->blocks; list2; list2 = list2->next) {
if (bitvectorgetbit(list2->block->blockIndex, loop->vec24) &&
!bitvectorgetbit(list->block->blockIndex, dominators[list2->block->blockIndex]))
break;
}
if (!list2)
bitvectorsetbit(list->block->blockIndex, loop->vec28);
}
for (list = loop->blocks; list; list = list->next) {
for (link = loop->body->predecessors; link; link = link->nextLink) {
if (bitvectorgetbit(link->block->blockIndex, loop->memberblocks) &&
!bitvectorgetbit(list->block->blockIndex, dominators[link->block->blockIndex]))
break;
}
if (!link)
bitvectorsetbit(list->block->blockIndex, loop->vec2C);
}
}
static void addlooptolist(Loop *loop, Loop **list) {
Loop **scan;
Loop *scanloop;
scan = list;
while ((scanloop = *scan)) {
if (bitvectorgetbit(loop->body->blockIndex, scanloop->memberblocks)) {
loop->parent = scanloop;
addlooptolist(loop, &scanloop->children);
return;
}
if (bitvectorgetbit(scanloop->body->blockIndex, loop->memberblocks)) {
*scan = scanloop->nextSibling;
scanloop->parent = loop;
scanloop->nextSibling = loop->children;
loop->children = scanloop;
} else {
scan = &scanloop->nextSibling;
}
}
loop->nextSibling = *list;
*list = loop;
}
static void findnaturalloops(void) {
Loop *loop;
int size;
loopdetection_nblocks = pcblockcount + 5 * nloopheaders;
loopstack = oalloc(sizeof(PCodeBlock *) * pcblockcount);
while (loopheaders) {
loop = lalloc(sizeof(Loop));
loop->parent = loop->nextSibling = loop->children = NULL;
loop->body = loopheaders->block;
loop->preheader = NULL;
loop->blocks = NULL;
loop->basicInductionVars = NULL;
loop->footer = NULL;
loop->pc18 = NULL;
loop->loopWeight = loop->body->loopWeight;
bitvectorinitialize(loop->memberblocks = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
bitvectorinitialize(loop->vec24 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
bitvectorinitialize(loop->vec28 = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
bitvectorinitialize(loop->vec2C = lalloc(4 * ((loopdetection_nblocks + 31) >> 5)), loopdetection_nblocks, 0);
findnaturalloop(loop);
addlooptolist(loop, &loopsinflowgraph);
loopheaders = loopheaders->next;
}
}
static PCodeBlock *makepreheaderblock(void) {
PCodeLabel *label;
PCodeBlock *block;
label = makepclabel();
block = lalloc(sizeof(PCodeBlock));
block->nextBlock = NULL;
block->prevBlock = NULL;
block->labels = NULL;
block->successors = NULL;
block->predecessors = NULL;
block->firstPCode = block->lastPCode = NULL;
block->pcodeCount = 0;
block->flags = 0;
block->blockIndex = pcblockcount++;
pclabel(block, label);
return block;
}
static void insertpreheaderbefore(PCodeBlock *a, PCodeBlock *b) {
a->nextBlock = b;
a->prevBlock = b->prevBlock;
b->prevBlock->nextBlock = a;
b->prevBlock = a;
}
void insertpreheaderblock(Loop *loop) {
PCodeBlock *preheader;
PCodeBlock *block29;
PCodeBlock *block28;
PCode *pcode27;
PCLink *link; // r26
PCLink **linkptr; // r25
PCodeLabel *newlabel; // r23
PCLink *innerlink;
PCodeBlock *block;
PCodeArg *arg;
int i;
preheader = loop->preheader = makepreheaderblock();
block29 = NULL;
block28 = loop->body;
if (!block28->labels)
pclabel(block28, makepclabel());
appendpcode(preheader, makepcode(PC_B, block28->labels));
preheader->loopWeight = loop->parent ? loop->parent->loopWeight : 1;
linkptr = &block28->predecessors;
while ((link = *linkptr)) {
if (bitvectorgetbit(link->block->blockIndex, loop->memberblocks)) {
linkptr = &link->nextLink;
} else {
if (link->block->pcodeCount) {
pcode27 = link->block->lastPCode;
if (pcode27->op == PC_B) {
CError_ASSERT(462, pcode27->args[0].kind == PCOp_LABEL);
if (pcode27->args[0].data.label.label->block == block28)
pcode27->args[0].data.label.label = preheader->labels;
} else if (pcode27->op == PC_BT || pcode27->op == PC_BF) {
CError_ASSERT(474, pcode27->args[2].kind == PCOp_LABEL);
if (pcode27->args[2].data.label.label->block == block28)
pcode27->args[2].data.label.label = preheader->labels;
} else if (pcode27->op == PC_BCTR) {
if (pcode27->argCount > 1 && pcode27->args[1].kind == PCOp_MEMORY) {
Object *obj = pcode27->args[1].data.mem.obj;
UInt32 *array = (UInt32 *) obj->u.data.u.switchtable.data;
int i;
for (i = 0; i < obj->u.data.u.switchtable.size; i++) {
if (((PCodeLabel *) CTool_ResolveIndexToPointer(array[i]))->block == block28)
array[i] = CTool_CreateIndexFromPointer(preheader->labels);
}
} else {
CodeLabelList *cll;
for (cll = codelabellist; cll; cll = cll->next) {
if (cll->label->pclabel->block == block28)
cll->label->pclabel = preheader->labels;
}
}
} else {
CError_ASSERT(505, link->block->nextBlock == block28);
}
}
for (innerlink = link->block->successors; innerlink; innerlink = innerlink->nextLink) {
if (innerlink->block == block28)
innerlink->block = preheader;
}
*linkptr = link->nextLink;;
link->nextLink = preheader->predecessors;
preheader->predecessors = link;
}
}
if (!bitvectorgetbit(block28->prevBlock->blockIndex, loop->memberblocks)) {
insertpreheaderbefore(preheader, block28);
if (
(!block28->nextBlock || !bitvectorgetbit(block28->nextBlock->blockIndex, loop->memberblocks)) &&
block28->lastPCode &&
(block28->lastPCode->flags & fIsBranch) &&
block28->lastPCode->op != PC_BDNZ
) {
i = block28->lastPCode->argCount;
arg = block28->lastPCode->args;
while (i && arg->kind != PCOp_LABEL) {
arg++;
i--;
}
if (i && arg->kind == PCOp_LABEL && arg->data.label.label->block == block28) {
block29 = makepreheaderblock();
insertpreheaderbefore(block29, block28);
newlabel = makepclabel();
pclabel(block29, newlabel);
arg->data.label.label = newlabel;
link = lalloc(sizeof(PCLink));
link->block = block28;
link->nextLink = block29->predecessors;
block29->predecessors = link;
link = lalloc(sizeof(PCLink));
link->block = block28;
link->nextLink = block29->successors;
block29->successors = link;
for (link = block28->successors; link; link = link->nextLink) {
if (link->block == block28)
link->block = block29;
}
for (link = block28->predecessors; link; link = link->nextLink) {
if (link->block == block28)
link->block = block29;
}
bitvectorsetbit(block29->blockIndex, loop->vec2C);
addblocktoloop(loop, block29);
}
}
} else {
for (block = pcbasicblocks; block; block = block->nextBlock) {
if (bitvectorgetbit(block->blockIndex, loop->memberblocks))
break;
}
insertpreheaderbefore(preheader, block);
}
link = lalloc(sizeof(PCLink));
link->block = preheader;
link->nextLink = block28->predecessors;
block28->predecessors = link;
link = lalloc(sizeof(PCLink));
link->block = block28;
link->nextLink = preheader->successors;
preheader->successors = link;
for (loop = loop->parent; loop; loop = loop->parent) {
addblocktoloop(loop, preheader);
if (bitvectorgetbit(block28->blockIndex, loop->vec28)) {
bitvectorsetbit(preheader->blockIndex, loop->vec28);
if (block29)
bitvectorsetbit(block29->blockIndex, loop->vec28);
}
if (bitvectorgetbit(block28->blockIndex, loop->vec2C)) {
bitvectorsetbit(preheader->blockIndex, loop->vec2C);
if (block29)
bitvectorsetbit(block29->blockIndex, loop->vec2C);
}
}
}
static void insertpreheaderblocks(Loop *loop) {
while (loop) {
if (loop->children)
insertpreheaderblocks(loop->children);
insertpreheaderblock(loop);
loop = loop->nextSibling;
}
}
void findloopsinflowgraph(void) {
loopsinflowgraph = NULL;
computedominators();
if (findloopheaders()) {
findnaturalloops();
insertpreheaderblocks(loopsinflowgraph);
}
freeoheap();
}
static int checklooplimits(SInt32 opcode, SInt32 condition, SInt32 c, SInt32 d, SInt32 addend, SInt32 *result) {
if (opcode == PC_BT) {
if (condition == 0) {
if (addend <= 0)
return 0;
if (c < d)
*result = (d - c + addend - 1) / addend;
else
*result = 0;
} else if (condition == 1) {
if (addend >= 0)
return 0;
if (c > d)
*result = (c - d - addend - 1) / -addend;
else
*result = 0;
} else {
return 0;
}
} else {
if (condition == 0) {
if (addend >= 0)
return 0;
if (c >= d)
*result = (c - d - addend) / -addend;
else
*result = 0;
} else if (condition == 1) {
if (addend <= 0)
return 0;
if (c <= d)
*result = (d - c + addend) / addend;
else
*result = 0;
} else if (c < d) {
if (addend <= 0)
return 0;
if ((d - c) % addend)
return 0;
*result = (d - c) / addend;
} else if (c > d) {
if (addend >= 0)
return 0;
if ((c - d) % -addend)
return 0;
*result = (c - d) / -addend;
} else {
*result = 0;
}
}
return 1;
}
static int checkunsignedlooplimits(SInt32 opcode, SInt32 condition, UInt32 c, UInt32 d, SInt32 addend, UInt32 *result) {
if (opcode == PC_BT) {
if (condition == 0) {
if (addend <= 0)
return 0;
if (c < d)
*result = (d - c + addend - 1) / addend;
else
*result = 0;
} else if (condition == 1) {
if (addend >= 0)
return 0;
if (c > d)
*result = (c - d - addend - 1) / -addend;
else
*result = 0;
} else {
return 0;
}
} else {
if (condition == 0) {
if (addend >= 0)
return 0;
if (c >= d)
*result = (c - d - addend) / -addend;
else
*result = 0;
} else if (condition == 1) {
if (addend <= 0)
return 0;
if (c <= d)
*result = (d - c + addend) / addend;
else
*result = 0;
} else if (c < d) {
if (addend <= 0)
return 0;
if ((d - c) % addend)
return 0;
*result = (d - c) / addend;
} else if (c > d) {
if (addend >= 0)
return 0;
if ((c - d) % -addend)
return 0;
*result = (c - d) / -addend;
} else {
*result = 0;
}
}
return (*result & 0x80000000) == 0;
}
static int checkunknownloop(int a, int b, int c, unsigned char *op) {
if (a == PC_BT) {
if (b == 0) {
if (c <= 0)
return 0;
*op = ELESS;
} else if (b == 1) {
if (c >= 0)
return 0;
*op = EGREATER;
} else {
return 0;
}
} else {
if (b == 0) {
if (c >= 0)
return 0;
*op = EGREATEREQU;
} else if (b == 1) {
if (c <= 0)
return 0;
*op = ELESSEQU;
} else if (c == 1) {
*op = ENOTEQU;
} else if (c == -1) {
*op = ENOTEQU;
} else {
return 0;
}
}
return 1;
}
static void checkcountingloop(Loop *loop) {
RegUseOrDef *list;
PCode *lastpcode;
PCode *prevpcode;
PCode *pc8;
PCode *check;
short op12;
short reg11;
SInt16 reg4;
short reg11b;
Loop *child;
if (!(lastpcode = loop->body->lastPCode))
return;
if (lastpcode->op != PC_BT && lastpcode->op != PC_BF)
return;
if (lastpcode->args[2].kind != PCOp_LABEL)
return;
if (!bitvectorgetbit(lastpcode->args[2].data.label.label->block->blockIndex, loop->memberblocks))
return;
if (bitvectorgetbit(loop->body->nextBlock->blockIndex, loop->memberblocks))
return;
reg11 = lastpcode->args[0].data.reg.reg;
reg4 = lastpcode->args[1].data.imm.value;
prevpcode = lastpcode->prevPCode;
if (!prevpcode)
return;
op12 = prevpcode->op;
if (op12 == PC_ADDI && prevpcode->args[2].kind == PCOp_IMMEDIATE) {
pc8 = prevpcode;
prevpcode = prevpcode->prevPCode;
if (!prevpcode)
return;
op12 = prevpcode->op;
if (pc8->args[0].data.reg.reg != pc8->args[1].data.reg.reg)
return;
if (op12 != PC_CMP && op12 != PC_CMPL && op12 != PC_CMPI && op12 != PC_CMPLI)
return;
if (prevpcode->args[1].data.reg.reg != pc8->args[0].data.reg.reg)
return;
if ((loop->step = pc8->args[2].data.imm.value) == 0)
return;
}
if (op12 != PC_CMP && op12 != PC_CMPL && op12 != PC_CMPI && op12 != PC_CMPLI)
return;
if (prevpcode->args[0].data.reg.reg != reg11)
return;
reg11b = prevpcode->args[1].data.reg.reg;
if (reg11b < 32)
return;
if (loop->preheader->nextBlock != lastpcode->args[2].data.label.label->block)
return;
if (op12 == PC_CMPI) {
if (prevpcode->prevPCode)
return;
loop->upper = prevpcode->args[2].data.imm.value;
loop->upperType = LOOP_BOUND_CONSTANT;
} else if (op12 == PC_CMPLI) {
if (prevpcode->prevPCode)
return;
loop->upper = prevpcode->args[2].data.imm.value & 0xFFFF;
loop->upperType = LOOP_BOUND_CONSTANT;
} else if (op12 == PC_CMP || op12 == PC_CMPL) {
if (prevpcode->prevPCode) {
if (
prevpcode->prevPCode->op == PC_LI &&
prevpcode->prevPCode->args[1].kind == PCOp_IMMEDIATE &&
prevpcode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg &&
!prevpcode->prevPCode->prevPCode
) {
loop->upper = prevpcode->prevPCode->args[1].data.imm.value;
loop->upperType = LOOP_BOUND_CONSTANT;
} else if (
prevpcode->prevPCode->op == PC_LIS &&
prevpcode->prevPCode->args[1].kind == PCOp_IMMEDIATE &&
prevpcode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg &&
!prevpcode->prevPCode->prevPCode
) {
loop->upper = prevpcode->prevPCode->args[1].data.imm.value << 16;
loop->upperType = LOOP_BOUND_CONSTANT;
} else if (
prevpcode->prevPCode->op == PC_ADDI &&
prevpcode->prevPCode->args[2].kind == PCOp_IMMEDIATE &&
prevpcode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg &&
prevpcode->prevPCode->args[1].data.reg.reg == prevpcode->args[2].data.reg.reg &&
prevpcode->prevPCode->prevPCode &&
prevpcode->prevPCode->prevPCode->op == PC_LIS &&
prevpcode->prevPCode->prevPCode->args[1].kind == PCOp_IMMEDIATE &&
prevpcode->prevPCode->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg &&
!prevpcode->prevPCode->prevPCode->prevPCode
) {
loop->upper = prevpcode->prevPCode->args[2].data.imm.value +
(prevpcode->prevPCode->prevPCode->args[1].data.imm.value << 16);
loop->upperType = LOOP_BOUND_CONSTANT;
} else {
return;
}
} else {
pc8 = NULL;
for (list = reg_Defs[RegClass_GPR][prevpcode->args[2].data.reg.reg]; list; list = list->next) {
if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks))
return;
}
for (list = reg_Defs[RegClass_GPR][prevpcode->args[2].data.reg.reg]; list; list = list->next) {
if (bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].defvec8)) {
if (!pc8) {
pc8 = Defs[list->id].pcode;
if (
pc8->op == PC_LI &&
pc8->args[1].kind == PCOp_IMMEDIATE
) {
loop->upper = pc8->args[1].data.imm.value;
loop->upperType = LOOP_BOUND_CONSTANT;
} else if (
pc8->op == PC_LIS &&
pc8->args[1].kind == PCOp_IMMEDIATE
) {
loop->upper = pc8->args[1].data.imm.value << 16;
loop->upperType = LOOP_BOUND_CONSTANT;
} else if (
pc8->op == PC_ADDI &&
pc8->args[2].kind == PCOp_IMMEDIATE &&
pc8->args[1].data.reg.reg == prevpcode->args[2].data.reg.reg &&
pc8->prevPCode &&
pc8->prevPCode->op == PC_LIS &&
pc8->prevPCode->args[1].kind == PCOp_IMMEDIATE &&
pc8->prevPCode->args[0].data.reg.reg == prevpcode->args[2].data.reg.reg
) {
loop->upper = pc8->args[2].data.imm.value +
(pc8->prevPCode->args[1].data.imm.value << 16);
loop->upperType = LOOP_BOUND_CONSTANT;
} else {
loop->upperType = LOOP_BOUND_VARIABLE;
break;
}
} else {
loop->upperType = LOOP_BOUND_VARIABLE;
break;
}
}
}
if (loop->upperType == LOOP_BOUND_INDETERMINATE)
loop->upperType = LOOP_BOUND_VARIABLE;
}
}
pc8 = NULL;
for (list = reg_Defs[RegClass_GPR][reg11b]; list; list = list->next) {
check = Defs[list->id].pcode;
if (bitvectorgetbit(check->block->blockIndex, loop->memberblocks)) {
if (!pc8) {
pc8 = check;
if (check->op != PC_ADDI)
return;
if (check->args[1].data.reg.reg != reg11b)
return;
if (check->args[2].kind != PCOp_IMMEDIATE)
return;
if ((loop->step = check->args[2].data.imm.value) == 0)
return;
} else {
return;
}
}
}
if (!pc8)
return;
if (pc8->block != prevpcode->block && !bitvectorgetbit(prevpcode->block->blockIndex, loop->vec2C))
return;
if (loop->children) {
for (child = loop->children; child; child = child->nextSibling) {
if (bitvectorgetbit(pc8->block->blockIndex, child->memberblocks))
return;
}
}
loop->pc18 = pc8;
pc8 = NULL;
for (list = reg_Defs[RegClass_GPR][reg11b]; list; list = list->next) {
if (bitvectorgetbit(list->id, usedefinfo[loop->preheader->blockIndex].defvec8)) {
if (!pc8) {
pc8 = Defs[list->id].pcode;
if (
pc8->op == PC_LI &&
pc8->args[1].kind == PCOp_IMMEDIATE
) {
loop->lower = pc8->args[1].data.imm.value;
loop->lowerType = LOOP_BOUND_CONSTANT;
} else if (
pc8->op == PC_LIS &&
pc8->args[1].kind == PCOp_IMMEDIATE
) {
loop->lower = pc8->args[1].data.imm.value << 16;
loop->lowerType = LOOP_BOUND_CONSTANT;
} else if (
pc8->op == PC_ADDI &&
pc8->args[2].kind == PCOp_IMMEDIATE &&
pc8->args[1].data.reg.reg == reg11b &&
pc8->prevPCode &&
pc8->prevPCode->op == PC_LIS &&
pc8->prevPCode->args[1].kind == PCOp_IMMEDIATE &&
pc8->prevPCode->args[0].data.reg.reg == reg11b
) {
loop->lower = pc8->args[2].data.imm.value +
(pc8->prevPCode->args[1].data.imm.value << 16);
loop->lowerType = LOOP_BOUND_CONSTANT;
} else {
loop->lowerType = LOOP_BOUND_VARIABLE;
break;
}
} else {
loop->lowerType = LOOP_BOUND_INDETERMINATE;
break;
}
}
}
if (loop->lowerType == LOOP_BOUND_INDETERMINATE)
loop->lowerType = LOOP_BOUND_VARIABLE;
if (loop->lowerType == LOOP_BOUND_CONSTANT && loop->upperType == LOOP_BOUND_CONSTANT) {
if (op12 == PC_CMP || op12 == PC_CMPI) {
if (!checklooplimits(lastpcode->op, reg4, loop->lower, loop->upper, loop->step, &loop->iterationCount))
return;
} else {
if (!checkunsignedlooplimits(lastpcode->op, reg4, loop->lower, loop->upper, loop->step, (UInt32 *) &loop->iterationCount))
return;
}
loop->isKnownCountingLoop = 1;
} else if (loop->lowerType != LOOP_BOUND_INDETERMINATE || loop->upperType != LOOP_BOUND_INDETERMINATE) {
if (!checkunknownloop(lastpcode->op, reg4, loop->step, &loop->unknownCondition))
return;
loop->isUnknownCountingLoop = 1;
}
}
void analyzeForCountableLoops(Loop *loop) {
if (!loop)
return;
while (loop) {
if (loop->children)
analyzeForCountableLoops(loop->children);
checkcountingloop(loop);
loop = loop->nextSibling;
}
}
void analyzeloop(Loop *loop) {
BlockList *list;
PCodeBlock *block;
PCode *pcode;
loop->bodySize = 0;
loop->x4D = 0;
loop->x4E = 0;
loop->x4F = 1;
loop->isKnownCountingLoop = 0;
loop->isUnknownCountingLoop = 0;
loop->lowerType = LOOP_BOUND_INDETERMINATE;
loop->upperType = LOOP_BOUND_INDETERMINATE;
loop->iterationCount = -1;
loop->x57 = 0;
loop->x52 = 0;
for (list = loop->blocks; list; list = list->next) {
block = list->block;
if (!loop->children)
block->flags |= fPCBlockFlag2000;
loop->bodySize += block->pcodeCount;
if (block != loop->body) {
if (!block->successors || !block->predecessors || block->successors->nextLink || block->predecessors->nextLink)
loop->x4F = 0;
}
if ((block->flags & fPCBlockFlag4000) == fPCBlockFlag4000)
loop->x52 = 1;
for (pcode = block->firstPCode; pcode; pcode = pcode->nextPCode) {
if (PCODE_FLAG_SET_T(pcode) & fLink)
loop->x4D = 1;
if (pcode->op == PC_BCTRL || pcode->op == PC_BCTR || pcode->op == PC_BCCTR || pcode->op == PC_MTCTR || pcode->op == PC_MFCTR) {
loop->x4E = 1;
} else if (pcode->flags & fIsRead) {
if (pcode->op == PC_LBZX || pcode->op == PC_LHZX || pcode->op == PC_LHAX || pcode->op == PC_LWZX || pcode->op == PC_LFSX || pcode->op == PC_LFDX)
loop->x53 = 1;
} else if (pcode->flags & fIsWrite) {
if (pcode->op == PC_STBX || pcode->op == PC_STHX || pcode->op == PC_STWX || pcode->op == PC_STFSX || pcode->op == PC_STFDX)
loop->x54 = 1;
} else {
if (pcode->op == PC_EIEIO || pcode->op == PC_SYNC || pcode->op == PC_ISYNC)
loop->x57 = 1;
}
}
}
if (!loop->children && !loop->x4D && loop->bodySize < 32) {
for (list = loop->blocks; list; list = list->next)
list->block->flags |= fPCBlockFlag2000;
}
}
static void analyzeloops(Loop *loop) {
while (loop) {
if (loop->children)
analyzeloops(loop->children);
analyzeloop(loop);
loop = loop->nextSibling;
}
}
void analyzeloopsinflowgraph(void) {
if (loopsinflowgraph)
analyzeloops(loopsinflowgraph);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,751 @@
#include "compiler/StrengthReduction.h"
#include "compiler/BitVectors.h"
#include "compiler/CompilerTools.h"
#include "compiler/LoopDetection.h"
#include "compiler/PCode.h"
#include "compiler/PCodeInfo.h"
#include "compiler/Registers.h"
#include "compiler/UseDefChains.h"
int strengthreducedloops;
static PCode *findinitializer(Loop *loop, short reg) {
UInt32 *vec;
PCode *best;
RegUseOrDef *list;
vec = usedefinfo[loop->body->blockIndex].defvec8;
best = NULL;
for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) {
if (
!(bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, loop->memberblocks)) &&
bitvectorgetbit(list->id, vec)
)
{
if (best)
return NULL;
best = Defs[list->id].pcode;
}
}
if (best) {
if (best->op == PC_LI || best->op == PC_ADDI || best->op == PC_ADD)
return best;
}
return NULL;
}
static int isbasicinductionvariable(Loop *loop, short reg, SInt32 step) {
RegUseOrDef *list;
PCode *instr;
for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) {
instr = Defs[list->id].pcode;
if (bitvectorgetbit(instr->block->blockIndex, loop->memberblocks)) {
if (instr->op != PC_ADDI)
return 0;
if (instr->args[1].data.reg.reg != reg)
return 0;
if (instr->args[2].data.imm.value != step)
return 0;
}
}
return 1;
}
static void addbasicinductionvariable(Loop *loop, short reg, SInt32 step) {
BasicInductionVar *biv;
RegUseOrDef *list;
PCode *instr;
InstrList *instrList;
for (biv = loop->basicInductionVars; biv; biv = biv->next) {
if (biv->reg == reg)
return;
}
biv = oalloc(sizeof(BasicInductionVar));
biv->next = loop->basicInductionVars;
loop->basicInductionVars = biv;
biv->loop = loop;
biv->inductionVars = NULL;
biv->instrsC = NULL;
biv->step = step;
biv->reg = reg;
for (list = reg_Defs[RegClass_GPR][reg]; list; list = list->next) {
instr = Defs[list->id].pcode;
if (bitvectorgetbit(instr->block->blockIndex, loop->memberblocks)) {
instrList = oalloc(sizeof(InstrList));
instrList->next = biv->instrsC;
biv->instrsC = instrList;
instrList->instr = instr;
}
}
biv->initializer = findinitializer(loop, reg);
}
static void findbasicinductionvariables(Loop *loop) {
SInt16 step;
BlockList *block;
PCode *instr;
short reg;
for (block = loop->blocks; block; block = block->next) {
for (instr = block->block->firstPCode; instr; instr = instr->nextPCode) {
if (instr->op == PC_ADDI) {
if (
(reg = instr->args[0].data.reg.reg) >= 32 &&
instr->args[1].data.reg.reg == reg &&
isbasicinductionvariable(loop, reg, step = instr->args[2].data.imm.value)
)
addbasicinductionvariable(loop, reg, step);
}
}
}
}
static void findallbasicinductionvariables(Loop *loop) {
while (loop) {
if (loop->children)
findallbasicinductionvariables(loop->children);
findbasicinductionvariables(loop);
loop = loop->nextSibling;
}
}
static int isinductionvariable(BasicInductionVar *biv, int useID, SInt32 *result1, short *result2, short *result3, Loop **result4) {
RegUseOrDef *list;
int counter;
Loop *loop;
Loop *scanloop;
PCode *instr;
instr = Uses[useID].pcode;
*result2 = 0;
*result3 = 0;
*result4 = NULL;
switch (instr->op) {
case PC_MULLI:
*result1 = instr->args[2].data.imm.value;
break;
case PC_RLWINM:
if (instr->args[3].data.imm.value)
return 0;
if (instr->args[2].data.imm.value > 15)
return 0;
if (instr->args[4].data.imm.value != (31 - instr->args[2].data.imm.value))
return 0;
if (PCODE_FLAG_SET_F(instr) & fRecordBit)
return 0;
*result1 = 1 << instr->args[2].data.imm.value;
break;
case PC_LBZX:
case PC_LHZX:
case PC_LHAX:
case PC_LWZX:
case PC_STBX:
case PC_STHX:
case PC_STWX:
case PC_LFSX:
case PC_LFDX:
case PC_STFSX:
case PC_STFDX:
*result2 = 0;
*result3 = 0;
if (instr->args[1].data.reg.reg == biv->reg) {
*result2 = 1;
*result3 = 2;
} else if (instr->args[2].data.reg.reg == biv->reg) {
*result2 = 2;
*result3 = 1;
}
counter = 0;
for (list = reg_Defs[RegClass_GPR][instr->args[*result3].data.reg.reg]; list; list = list->next) {
if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, biv->loop->memberblocks))
counter++;
}
if (counter)
return 0;
loop = biv->loop;
for (scanloop = loop->parent; scanloop; scanloop = scanloop->parent) {
counter = 0;
for (list = reg_Defs[RegClass_GPR][instr->args[*result3].data.reg.reg]; list; list = list->next) {
if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, scanloop->memberblocks))
counter++;
}
if (!biv->initializer || bitvectorgetbit(biv->initializer->block->blockIndex, scanloop->memberblocks))
counter++;
if (counter)
break;
loop = scanloop;
}
*result4 = loop;
*result1 = 1;
return 1;
default:
return 0;
}
counter = 0;
for (list = reg_Defs[RegClass_GPR][instr->args[0].data.reg.reg]; list; list = list->next) {
if (bitvectorgetbit(Defs[list->id].pcode->block->blockIndex, biv->loop->memberblocks))
counter++;
}
return counter == 1;
}
static void addinductionvariable(BasicInductionVar *biv, PCode *instr, SInt32 val1, short val2, short val3, Loop *val4) {
InductionVar *iv;
iv = oalloc(sizeof(InductionVar));
iv->next = biv->inductionVars;
biv->inductionVars = iv;
iv->basicVar = biv;
iv->instr = instr;
iv->instrC = NULL;
iv->step = val1;
iv->x18 = val2;
iv->x1A = val3;
iv->someloop = val4;
if (instr->flags & (fIsRead | fIsWrite))
iv->x1C = -1;
else
iv->x1C = instr->args[0].data.reg.reg;
iv->x1E = -1;
}
static void findnonbasicinductionvariables(Loop *loop) {
BasicInductionVar *biv;
RegUseOrDef *list;
SInt32 result1;
short result2;
short result3;
Loop *result4;
for (biv = loop->basicInductionVars; biv; biv = biv->next) {
for (list = reg_Uses[RegClass_GPR][biv->reg]; list; list = list->next) {
if (bitvectorgetbit(Uses[list->id].pcode->block->blockIndex, loop->memberblocks)) {
if (isinductionvariable(biv, list->id, &result1, &result2, &result3, &result4))
addinductionvariable(biv, Uses[list->id].pcode, result1, result2, result3, result4);
}
}
}
}
static void findallnonbasicinductionvariables(Loop *loop) {
while (loop) {
if (loop->children)
findallnonbasicinductionvariables(loop->children);
if (loop->basicInductionVars)
findnonbasicinductionvariables(loop);
loop = loop->nextSibling;
}
}
static void initializeinductionvariable(InductionVar *iv) {
BasicInductionVar *biv; // r31
PCode *instr; // r27
PCodeBlock *preheader; // r30
SInt32 value30; // r30
short reg29; // r29
short reg26; // r26
biv = iv->basicVar;
preheader = biv->loop->preheader;
if (iv->x1A) {
reg29 = iv->instr->args[iv->x1A].data.reg.reg;
reg26 = iv->instr->args[iv->x18].data.reg.reg;
instr = NULL;
if (
biv->initializer &&
biv->initializer->op == PC_LI &&
biv->initializer->block == preheader
)
{
if (biv->initializer->args[1].data.imm.value == 0)
instr = makepcode(PC_MR, iv->x1E, reg29);
else if (FITS_IN_SHORT(biv->initializer->args[1].data.imm.value))
instr = makepcode(PC_ADDI, iv->x1E, reg29, 0, biv->initializer->args[1].data.imm.value);
}
if (!instr)
instr = makepcode(PC_ADD, iv->x1E, reg29, reg26);
if (biv->initializer && instr->op != PC_ADD)
insertpcodeafter(biv->initializer, instr);
else if (iv->someloop && iv->someloop->preheader->lastPCode)
insertpcodebefore(iv->someloop->preheader->lastPCode, instr);
else
insertpcodebefore(preheader->lastPCode, instr);
iv->instrC = instr;
iv->x1C = reg29;
return;
}
if (!biv->initializer || biv->initializer->op != PC_LI) {
instr = copypcode(iv->instr);
instr->args[0].data.reg.reg = iv->x1E;
insertpcodebefore(preheader->lastPCode, instr);
} else {
value30 = biv->initializer->args[1].data.imm.value * iv->step;
if (!FITS_IN_SHORT(value30)) {
instr = makepcode(PC_LIS, iv->x1E, 0, HIGH_PART(value30));
insertpcodeafter(biv->initializer, instr);
if (value30 != 0)
insertpcodeafter(instr, makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, LOW_PART(value30)));
} else {
instr = makepcode(PC_LI, iv->x1E, value30);
insertpcodeafter(biv->initializer, instr);
}
}
}
static void incrementinductionvariable(InductionVar *iv) {
SInt32 value;
BasicInductionVar *biv;
PCode *instr;
InstrList *list;
biv = iv->basicVar;
value = iv->step * biv->step;
for (list = biv->instrsC; list; list = list->next) {
if (!FITS_IN_SHORT(value)) {
instr = makepcode(PC_ADDIS, iv->x1E, iv->x1E, 0, HIGH_PART(value));
insertpcodeafter(list->instr, instr);
if (value != 0) {
instr = makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, LOW_PART(value));
insertpcodeafter(list->instr->nextPCode, instr);
}
} else {
instr = makepcode(PC_ADDI, iv->x1E, iv->x1E, 0, value);
insertpcodeafter(list->instr, instr);
}
}
}
static void copyinductionvariable(InductionVar *iv) {
if (iv->instr->flags & (fIsRead | fIsWrite)) {
iv->instr->op -= 2;
iv->instr->args[1].data.reg.reg = iv->x1E;
iv->instr->args[2].kind = PCOp_IMMEDIATE;
iv->instr->args[2].data.imm.value = 0;
iv->instr->args[2].data.imm.obj = NULL;
} else {
insertpcodeafter(iv->instr, makepcode(PC_MR, iv->x1C, iv->x1E));
deletepcode(iv->instr);
}
}
static int testnestediv(InductionVar *iv, SInt32 step1, int reg, SInt32 step2, Loop *loop1, Loop *loop2) {
SInt32 addend;
BlockList *list;
PCode *instr;
PCodeArg *op;
int i;
if (iv->instrC && iv->x1C == reg) {
if (iv->instrC->op == PC_MR)
addend = 0;
else if (iv->instrC->op == PC_ADDI)
addend = iv->instrC->args[2].data.imm.value;
else
return 0;
if (step2 == (addend + (step1 * iv->step * loop2->iterationCount))) {
for (list = loop1->blocks; list && list->block != loop2->blocks->block; list = list->next) {
for (instr = list->block->firstPCode; instr; instr = instr->nextPCode) {
op = instr->args;
i = instr->argCount;
while (i--) {
if (
op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
op->data.reg.reg == reg
)
return 0;
op++;
}
}
}
return 1;
}
}
return 0;
}
static void strengthreducenestediv(short reg, SInt32 step, PCode *initializer, Loop *loop) {
Loop *scanloop;
BasicInductionVar *biv;
InductionVar *iv;
PCode *instr;
PCodeArg *op;
int i;
for (scanloop = loop->children; scanloop; scanloop = scanloop->nextSibling) {
if (
scanloop->isKnownCountingLoop &&
scanloop->x4F &&
bitvectorgetbit(scanloop->body->blockIndex, loop->vec2C)
)
{
for (biv = scanloop->basicInductionVars; biv; biv = biv->next) {
for (iv = biv->inductionVars; iv; iv = iv->next) {
if (testnestediv(iv, biv->step, reg, step, loop, scanloop)) {
deletepcode(iv->instrC);
if (initializer) {
insertpcodeafter(initializer, iv->instrC);
} else if (loop->body->lastPCode) {
for (instr = loop->body->lastPCode; instr; instr = instr->prevPCode) {
op = instr->args;
i = instr->argCount;
while (i--) {
if (
op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
(op->data.reg.effect & EffectWrite) &&
op->data.reg.reg == reg
)
break;
op++;
}
}
if (instr)
insertpcodeafter(instr, iv->instrC);
else
insertpcodebefore(loop->body->firstPCode, iv->instrC);
} else {
appendpcode(loop->body, iv->instrC);
}
}
}
}
}
}
}
static void strengthreducenestedbiv(BasicInductionVar *biv) {
Loop *loop;
InductionVar *iv;
loop = biv->loop;
for (iv = biv->inductionVars; iv; iv = iv->next)
strengthreducenestediv(iv->x1E, iv->step * biv->step, iv->instrC, loop);
strengthreducenestediv(biv->reg, biv->step, biv->initializer, loop);
}
static void strengthreduceinductionvariable(BasicInductionVar *biv) {
int counter;
InductionVar *iv;
InductionVar *otherIv;
short reg;
counter = 0;
for (iv = biv->inductionVars; iv; iv = iv->next) {
if (iv->step == 1)
counter++;
}
for (iv = biv->inductionVars; iv; iv = iv->next) {
if (
(counter <= 4 || iv->step != 1) &&
iv->instr->block &&
(iv->x1A == 0 || iv->instr->args[2].kind != PCOp_IMMEDIATE)
)
{
if (iv->x1E == -1) {
iv->x1E = used_virtual_registers[RegClass_GPR]++;
initializeinductionvariable(iv);
incrementinductionvariable(iv);
if (iv->step == 1) {
reg = iv->instr->args[iv->x1A].data.reg.reg;
for (otherIv = iv->next; otherIv; otherIv = otherIv->next) {
if (otherIv->x1A != 0 && otherIv->instr->args[otherIv->x1A].data.reg.reg == reg)
otherIv->x1E = iv->x1E;
}
} else {
for (otherIv = iv->next; otherIv; otherIv = otherIv->next) {
if (otherIv->step == iv->step)
otherIv->x1E = iv->x1E;
}
}
}
copyinductionvariable(iv);
strengthreducedloops = 1;
}
}
}
#ifdef __MWERKS__
#pragma options align=mac68k
#endif
typedef struct BivInit {
SInt32 x0;
short x4;
short x6;
short x8;
Object *xA;
} BivInit;
#ifdef __MWERKS__
#pragma options align=reset
#endif
static void calc_biv_init(BasicInductionVar *biv, BivInit *init) {
PCode *instr;
PCode *scan;
PCodeArg *op;
int i;
instr = biv->initializer;
init->x0 = 0;
init->x4 = -1;
init->x6 = -1;
init->x8 = 0;
init->xA = NULL;
if (!biv->initializer || (biv->initializer->op != PC_ADDI && biv->initializer->op != PC_ADD))
return;
if (instr->op == PC_ADDI) {
if (instr->args[1].data.reg.reg == biv->reg) {
init->x0 = instr->args[2].data.imm.value;
for (scan = instr->prevPCode; scan; scan = scan->prevPCode) {
op = scan->args;
i = scan->argCount;
while (i--) {
if (
op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
op->data.reg.reg == biv->reg &&
(op->data.reg.effect & EffectWrite)
)
{
if (scan->op == PC_ADD) {
init->x4 = scan->args[1].data.reg.reg;
init->x6 = scan->args[2].data.reg.reg;
} else if (scan->op == PC_ADDI) {
if (scan->args[2].kind == PCOp_IMMEDIATE) {
init->x4 = scan->args[1].data.reg.reg;
init->x8 = scan->args[2].data.imm.value;
} else if (scan->args[2].kind == PCOp_MEMORY) {
init->x4 = scan->args[1].data.reg.reg;
init->x8 = scan->args[2].data.mem.offset;
init->xA = scan->args[2].data.mem.obj;
}
}
return;
}
op++;
}
}
} else {
if (instr->args[2].kind == PCOp_IMMEDIATE) {
init->x4 = instr->args[1].data.reg.reg;
init->x8 = instr->args[2].data.imm.value;
} else if (instr->args[2].kind == PCOp_MEMORY) {
init->x4 = instr->args[1].data.reg.reg;
init->x8 = instr->args[2].data.mem.offset;
init->xA = instr->args[2].data.mem.obj;
}
}
} else if (instr->op == PC_ADD) {
if (instr->args[1].data.reg.reg == biv->reg) {
init->x6 = instr->args[2].data.reg.reg;
for (scan = instr->prevPCode; scan; scan = scan->prevPCode) {
op = scan->args;
i = scan->argCount;
while (i--) {
if (
op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
op->data.reg.reg == biv->reg &&
(op->data.reg.effect & EffectWrite) &&
scan->op == PC_ADDI
)
{
if (scan->args[2].kind == PCOp_IMMEDIATE) {
init->x4 = scan->args[1].data.reg.reg;
init->x8 = scan->args[2].data.imm.value;
} else if (scan->args[2].kind == PCOp_MEMORY) {
init->x4 = scan->args[1].data.reg.reg;
init->x8 = scan->args[2].data.mem.offset;
init->xA = scan->args[2].data.mem.obj;
}
return;
}
op++;
}
}
} else {
init->x4 = instr->args[1].data.reg.reg;
init->x6 = instr->args[2].data.reg.reg;
}
}
}
static void combineinductionvariables(Loop *loop, BasicInductionVar *biv1, BasicInductionVar *biv2, SInt32 difference) {
PCode *instr1; // r31
int reg1; // r30
int reg2; // r29
PCode *instr2; // r24
PCodeBlock *nextBlock; // r24
BlockList *list;
PCodeArg *op;
int i;
PCode *instr;
instr1 = NULL;
instr2 = NULL;
reg1 = biv1->reg;
CError_ASSERT(930, reg1 >= 0);
reg2 = biv2->reg;
CError_ASSERT(934, reg2 >= 0);
if (!FITS_IN_SHORT(difference))
return;
for (list = loop->blocks; list; list = list->next) {
for (instr = list->block->firstPCode; instr; instr = instr->nextPCode) {
if (instr1) {
op = instr->args;
i = instr->argCount;
while (i--) {
if (
op->kind == PCOp_REGISTER &&
op->arg == RegClass_GPR &&
op->data.reg.reg == reg1
)
return;
op++;
}
}
if (instr->op == PC_ADDI) {
if (instr->args[0].data.reg.reg == reg1) {
if (instr1)
return;
instr1 = instr;
} else if (instr->args[0].data.reg.reg == reg2) {
if (instr2)
return;
instr2 = instr;
}
}
}
}
if (loop->body->lastPCode->flags & fIsBranch) {
nextBlock = NULL;
for (i = 0; i < loop->body->lastPCode->argCount; i++) {
if (loop->body->lastPCode->args[i].kind == PCOp_LABEL) {
nextBlock = loop->body->lastPCode->args[i].data.label.label->block;
break;
}
}
if (!nextBlock)
return;
} else {
nextBlock = loop->body->nextBlock;
}
deletepcode(instr1);
instr1->args[1].data.reg.reg = reg2;
instr1->args[2].data.imm.value = difference;
if (nextBlock->firstPCode)
insertpcodebefore(nextBlock->firstPCode, instr1);
else
appendpcode(nextBlock, instr1);
biv1->reg = -1;
strengthreducedloops = 1;
}
static void strengthreduceinductionvariables(Loop *loop) {
BasicInductionVar *biv1;
BasicInductionVar *biv2;
BivInit init1;
BivInit init2;
for (biv1 = loop->basicInductionVars; biv1; biv1 = biv1->next) {
if (biv1->inductionVars)
strengthreduceinductionvariable(biv1);
strengthreducenestedbiv(biv1);
}
for (biv1 = loop->basicInductionVars; biv1; biv1 = biv1->next) {
if (biv1->reg != -1) {
calc_biv_init(biv1, &init1);
if (init1.x4 != -1) {
for (biv2 = loop->basicInductionVars; biv2; biv2 = biv2->next) {
if (biv2->reg != -1 && biv2 != biv1) {
calc_biv_init(biv2, &init2);
if (
init2.x4 != -1 &&
init1.x4 == init2.x4 &&
init1.x6 == init2.x6 &&
init1.x8 == init2.x8 &&
init1.xA == init2.xA &&
biv1->step == biv2->step
)
{
if (init1.x0 < init2.x0) {
combineinductionvariables(loop, biv2, biv1, init2.x0 - init1.x0);
} else {
combineinductionvariables(loop, biv1, biv2, init1.x0 - init2.x0);
break;
}
}
}
}
}
}
}
}
static void strengthreduceallinductionvariables(Loop *loop) {
while (loop) {
if (loop->children)
strengthreduceallinductionvariables(loop->children);
if (loop->basicInductionVars)
strengthreduceinductionvariables(loop);
loop = loop->nextSibling;
}
}
void strengthreduceloops(void) {
strengthreducedloops = 0;
if (loopsinflowgraph) {
computeusedefchains(0);
findallbasicinductionvariables(loopsinflowgraph);
findallnonbasicinductionvariables(loopsinflowgraph);
strengthreduceallinductionvariables(loopsinflowgraph);
freeoheap();
}
}

View File

@@ -0,0 +1,661 @@
#include "compiler/ValueNumbering.h"
#include "compiler/Alias.h"
#include "compiler/PCode.h"
#include "compiler/Registers.h"
#include "compiler/RegisterInfo.h"
#include "compiler/CompilerTools.h"
#include "compiler/CError.h"
typedef struct ValueLabel {
struct ValueLabel *next;
PCodeArg op;
} ValueLabel;
typedef struct AvailableValue {
struct AvailableValue *next;
ValueLabel *labelled;
PCode *pcode;
int killedregister;
int aliasnumber;
int opnumbers[0];
} AvailableValue;
typedef struct RegValue {
int number;
int x4;
AvailableValue *available;
} RegValue;
typedef struct State {
void *stackedvalues;
int valueceiling;
} State;
typedef struct StackedValue {
struct StackedValue *next;
PCodeArg op;
RegValue value;
Alias *alias;
PCode *valuepcode;
} StackedValue;
int removedcommonsubexpressions;
int nextvaluenumber;
static AvailableValue *opvalue[428];
static RegValue *regvalue[RegClassMax];
static StackedValue *stackedvalues;
static int valueceiling;
static int moreaggressiveoptimization;
static void allocatecsedatastructures(void) {
char rclass;
for (rclass = 0; rclass < RegClassMax; rclass++)
regvalue[(char) rclass] = oalloc(sizeof(RegValue) * used_virtual_registers[(char) rclass]);
}
static void initializecsedatastructures(void) {
RegValue *rv;
char rclass;
int i;
nextvaluenumber = 0;
for (i = 0; i < 428; i++)
opvalue[i] = NULL;
for (rclass = 0; rclass < RegClassMax; rclass++) {
rv = regvalue[(char) rclass];
for (i = 0; i < used_virtual_registers[(char) rclass]; i++, rv++) {
rv->number = nextvaluenumber++;
rv->x4 = 0;
rv->available = NULL;
}
}
initialize_alias_values();
stackedvalues = NULL;
valueceiling = 0x7FFFFFFF;
}
static void labelvalue(AvailableValue *av, PCodeArg *op) {
ValueLabel *label = oalloc(sizeof(ValueLabel));
label->op = *op;
label->next = av->labelled;
av->labelled = label;
}
static void unlabelvalue(AvailableValue *av, PCodeArg *op) {
ValueLabel *labelled;
ValueLabel **ptr;
ptr = &av->labelled;
while ((labelled = *ptr)) {
if (labelled->op.data.reg.reg == op->data.reg.reg)
*ptr = labelled->next;
else
ptr = &labelled->next;
}
}
static void stackregistervalue(PCodeArg *op, RegValue *value) {
StackedValue *stacked = oalloc(sizeof(StackedValue));
stacked->next = stackedvalues;
stackedvalues = stacked;
stacked->op = *op;
stacked->value = *value;
}
static void stackmemoryvalue(Alias *alias) {
StackedValue *stacked = oalloc(sizeof(StackedValue));
stacked->next = stackedvalues;
stackedvalues = stacked;
stacked->op.kind = PCOp_MEMORY;
stacked->alias = alias;
stacked->value.number = alias->valuenumber;
stacked->valuepcode = alias->valuepcode;
}
static void unstackvalue(StackedValue *stacked) {
PCodeArg *op = &stacked->op;
RegValue *value;
if (stacked->op.kind == PCOp_MEMORY) {
stacked->alias->valuenumber = stacked->value.number;
stacked->alias->valuepcode = stacked->valuepcode;
} else {
value = &regvalue[op->arg][op->data.reg.reg];
if (value->available)
unlabelvalue(value->available, op);
value->number = stacked->value.number;
value->x4 = stacked->value.x4;
value->available = stacked->value.available;
if (value->available)
labelvalue(value->available, op);
}
}
static int samevalue(PCodeArg *op1, PCodeArg *op2) {
return regvalue[op1->arg][op1->data.reg.reg].number == regvalue[op2->arg][op2->data.reg.reg].number;
}
static int killregister(PCodeArg *op) {
RegValue *value;
value = &regvalue[op->arg][op->data.reg.reg];
if (value->number < valueceiling && nextvaluenumber >= valueceiling)
stackregistervalue(op, value);
if (value->available)
unlabelvalue(value->available, op);
value->available = NULL;
value->x4 = 0;
return value->number = nextvaluenumber++;
}
void killmemory(Alias *alias, PCode *newValue) {
if (alias->valuenumber < valueceiling && nextvaluenumber >= valueceiling)
stackmemoryvalue(alias);
if (newValue) {
alias->valuenumber = regvalue[newValue->args[0].arg][newValue->args[0].data.reg.reg].number;
alias->valuepcode = newValue;
} else {
alias->valuenumber = nextvaluenumber++;
alias->valuepcode = NULL;
}
}
static void killspecificCSEs(short op) {
AvailableValue *av;
ValueLabel *labelled;
for (av = opvalue[op]; av; av = av->next) {
for (labelled = av->labelled; labelled; labelled = labelled->next)
killregister(&labelled->op);
}
}
static void killallCSEs(void) {
AvailableValue *av;
ValueLabel *labelled;
int i;
for (i = 0; i < 428; i++) {
for (av = opvalue[i]; av; av = av->next) {
for (labelled = av->labelled; labelled; labelled = labelled->next)
killregister(&labelled->op);
}
}
}
static void killregisters(PCode *pcode) {
PCodeArg *op;
int i;
for (i = 0, op = pcode->args; i < pcode->argCount; i++, op++) {
if (op->kind == PCOp_REGISTER && (op->data.reg.effect & EffectWrite))
killregister(op);
}
}
static void copyregister(PCodeArg *src, PCodeArg *dest) {
RegValue *srcvalue;
RegValue *destvalue;
srcvalue = &regvalue[src->arg][src->data.reg.reg];
destvalue = &regvalue[dest->arg][dest->data.reg.reg];
if (destvalue->number < valueceiling && nextvaluenumber >= valueceiling)
stackregistervalue(dest, destvalue);
if (destvalue->available)
unlabelvalue(destvalue->available, dest);
destvalue->available = srcvalue->available;
if (destvalue->available)
labelvalue(destvalue->available, dest);
destvalue->number = srcvalue->number;
if (srcvalue->x4 && srcvalue->number == regvalue[src->arg][srcvalue->x4].number)
destvalue->x4 = srcvalue->x4;
else
destvalue->x4 = src->data.reg.reg;
}
static int matchvalues(AvailableValue *av, PCode *match) {
PCodeArg *avOp;
PCodeArg *matchOp;
int i;
for (avOp = &av->pcode->args[0], matchOp = &match->args[0], i = 0; i < match->argCount; i++, avOp++, matchOp++) {
if (i != 0) {
switch (avOp->kind) {
case PCOp_REGISTER:
if (av->opnumbers[i] != regvalue[matchOp->arg][matchOp->data.reg.reg].number)
return 0;
break;
case PCOp_MEMORY:
if (matchOp->kind != PCOp_MEMORY)
return 0;
if (matchOp->data.mem.obj != avOp->data.mem.obj)
return 0;
if (matchOp->data.mem.offset != avOp->data.mem.offset)
return 0;
if ((unsigned char) matchOp->arg != (unsigned char) avOp->arg)
return 0;
break;
case PCOp_IMMEDIATE:
if (matchOp->kind != PCOp_IMMEDIATE)
return 0;
if (matchOp->data.imm.value != avOp->data.imm.value)
return 0;
break;
case PCOp_LABEL:
if (matchOp->kind != PCOp_LABEL)
return 0;
if (matchOp->data.label.label != avOp->data.label.label)
return 0;
break;
case PCOp_SYSREG:
CError_FATAL(572);
}
}
}
if ((match->flags & (fIsRead | fPCodeFlag20000)) && match->alias->valuenumber != av->aliasnumber)
return 0;
return 1;
}
static void chooselocation(AvailableValue *av, PCodeArg *op) {
ValueLabel *labelled;
PCodeArg *baseop;
baseop = &av->pcode->args[0];
labelled = av->labelled;
while (labelled) {
if (labelled->op.data.reg.reg == baseop->data.reg.reg) {
*op = labelled->op;
return;
}
labelled = labelled->next;
}
*op = av->labelled[0].op;
}
static int findavailablevalue(PCode *pcode, PCodeArg *op) {
AvailableValue *av;
PCodeArg tmp1;
PCodeArg tmp2;
for (av = opvalue[pcode->op]; av; av = av->next) {
if (av->labelled && av->pcode->flags == pcode->flags && av->pcode->argCount == pcode->argCount) {
if (!matchvalues(av, pcode)) {
if (!(pcode->flags & fCommutative))
continue;
tmp1 = pcode->args[1];
pcode->args[1] = pcode->args[2];
pcode->args[2] = tmp1;
if (!matchvalues(av, pcode)) {
tmp2 = pcode->args[1];
pcode->args[1] = pcode->args[2];
pcode->args[2] = tmp2;
continue;
}
}
chooselocation(av, op);
return 1;
}
}
return 0;
}
static void addavailablevalue(PCode *pcode) {
AvailableValue *av;
PCodeArg *op;
int i;
av = oalloc(sizeof(AvailableValue) + sizeof(int) * pcode->argCount);
av->labelled = NULL;
av->pcode = pcode;
for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) {
if (op->kind == PCOp_REGISTER)
av->opnumbers[i] = regvalue[op->arg][op->data.reg.reg].number;
}
if (pcode->flags & (fIsRead | fPCodeFlag20000))
av->aliasnumber = pcode->alias->valuenumber;
op = &pcode->args[0];
av->killedregister = killregister(op);
labelvalue(av, op);
regvalue[op->arg][op->data.reg.reg].available = av;
av->next = opvalue[pcode->op];
opvalue[pcode->op] = av;
}
static int isCSEop(PCode *pcode) {
PCodeArg *baseOp;
PCodeArg *op;
int i;
baseOp = &pcode->args[0];
switch (pcode->op) {
case PC_CMPI:
case PC_CMP:
case PC_CMPLI:
case PC_CMPL:
case PC_FCMPU:
case PC_FCMPO:
if (!moreaggressiveoptimization)
return 0;
break;
case PC_LI:
case PC_LIS:
if (!moreaggressiveoptimization)
return 0;
if (pcode->args[0].data.reg.reg < first_fe_temporary_register[RegClass_GPR] || pcode->args[0].data.reg.reg > last_temporary_register[RegClass_GPR])
return 0;
break;
}
if (PCODE_FLAG_SET_F(pcode) & (fIsVolatile | fSideEffects | fOverflow | fSetsCarry | fRecordBit))
return 0;
for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) {
if (op != baseOp &&
op->kind == baseOp->kind &&
op->arg == baseOp->arg &&
op->data.reg.reg == baseOp->data.reg.reg)
return 0;
}
return 1;
}
static int isCSEload(PCode *pcode) {
PCodeArg *op;
int i;
int count;
count = 0;
for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) {
if (op->kind == PCOp_REGISTER && (op->data.reg.effect & EffectWrite))
count++;
}
return count == 1;
}
static void registercopy(PCode *pcode) {
PCodeArg *op1;
PCodeArg *op2;
op1 = &pcode->args[0];
op2 = &pcode->args[1];
if (samevalue(op2, op1))
deletepcode(pcode);
else
copyregister(op2, op1);
}
static PCode *recentlystored(Alias *alias, PCodeArg *op) {
PCode *pc;
if ((pc = alias->valuepcode) && alias->valuenumber == regvalue[pc->args[0].arg][pc->args[0].data.reg.reg].number) {
*op = pc->args[0];
return pc;
} else {
return NULL;
}
}
static void simpleload(PCode *pcode) {
PCodeArg *origOp;
PCodeArg op;
PCode *rs;
origOp = &pcode->args[0];
if ((pcode->flags & fIsVolatile) || !isCSEload(pcode)) {
killregisters(pcode);
return;
}
if (findavailablevalue(pcode, &op)) {
if (!samevalue(origOp, &op)) {
insertpcodebefore(pcode, makecopyinstruction(&op, origOp));
copyregister(&op, origOp);
}
deletepcode(pcode);
removedcommonsubexpressions = 1;
} else if ((rs = recentlystored(pcode->alias, &op)) && can_reuse_stored_value(rs, pcode)) {
if (!samevalue(origOp, &op)) {
insertpcodebefore(pcode, makecopyinstruction(&op, origOp));
copyregister(&op, origOp);
}
deletepcode(pcode);
removedcommonsubexpressions = 1;
} else {
addavailablevalue(pcode);
}
}
static void simplestore(PCode *pcode) {
update_alias_value(pcode->alias, pcode);
killregisters(pcode);
}
static void pointerload(PCode *pcode) {
PCodeArg *op;
PCodeArg buf;
op = &pcode->args[0];
if ((pcode->flags & fIsVolatile) || !isCSEload(pcode)) {
killregisters(pcode);
return;
}
if (findavailablevalue(pcode, &buf)) {
if (!samevalue(op, &buf)) {
insertpcodebefore(pcode, makecopyinstruction(&buf, op));
copyregister(&buf, op);
}
deletepcode(pcode);
removedcommonsubexpressions = 1;
} else {
addavailablevalue(pcode);
}
}
static void pointerstore(PCode *pcode) {
update_alias_value(pcode->alias, NULL);
killregisters(pcode);
}
static void arithmeticop(PCode *pcode) {
PCodeArg *op;
PCodeArg buf;
op = &pcode->args[0];
if (findavailablevalue(pcode, &buf)) {
if (!samevalue(op, &buf)) {
insertpcodebefore(pcode, makecopyinstruction(&buf, op));
copyregister(&buf, op);
}
deletepcode(pcode);
removedcommonsubexpressions = 1;
} else {
addavailablevalue(pcode);
}
}
static void functioncall(PCode *pcode) {
killregisters(pcode);
if (coloring) {
update_all_alias_values();
killallCSEs();
} else {
update_alias_value(pcode->alias, NULL);
}
}
static void operatefrommemory(PCode *pcode) {
CError_FATAL(980);
}
static void operatetomemory(PCode *pcode) {
CError_FATAL(1011);
}
static void propagatecopiesto(PCode *pcode) {
PCodeArg *op;
int i;
for (i = 0, op = &pcode->args[0]; i < pcode->argCount; i++, op++) {
if (
op->kind == PCOp_REGISTER &&
(op->data.reg.effect & (EffectRead | EffectWrite | Effect8)) == EffectRead &&
op->data.reg.reg >= n_real_registers[op->arg] &&
regvalue[op->arg][op->data.reg.reg].x4 &&
regvalue[op->arg][op->data.reg.reg].x4 >= n_real_registers[op->arg] &&
regvalue[op->arg][op->data.reg.reg].number == regvalue[op->arg][regvalue[op->arg][op->data.reg.reg].x4].number
) {
op->data.reg.reg = regvalue[op->arg][op->data.reg.reg].x4;
}
}
}
static void removecsesfrombasicblock(PCodeBlock *block) {
PCode *pcode;
PCode *next;
for (pcode = block->firstPCode; pcode; pcode = next) {
next = pcode->nextPCode;
propagatecopiesto(pcode);
if (pcode->flags & fIsMove) {
registercopy(pcode);
} else if ((pcode->flags & fIsCall) && (pcode->flags & (fLink | fSideEffects))) {
functioncall(pcode);
} else if (pcode->flags & fIsRead) {
if (pcode->flags & fIsPtrOp)
pointerload(pcode);
else
simpleload(pcode);
} else if (pcode->flags & fIsWrite) {
if (pcode->flags & fIsPtrOp)
pointerstore(pcode);
else
simplestore(pcode);
} else if (pcode->flags & fPCodeFlag20000) {
operatefrommemory(pcode);
} else if (pcode->flags & fPCodeFlag40000) {
operatetomemory(pcode);
} else if ((pcode->flags & fIsCSE) && isCSEop(pcode)) {
arithmeticop(pcode);
} else {
killregisters(pcode);
}
}
block->flags |= fVisited;
}
static void getvaluestate(State *state) {
state->stackedvalues = stackedvalues;
state->valueceiling = valueceiling;
}
static void setvaluestate(State *state) {
stackedvalues = state->stackedvalues;
valueceiling = state->valueceiling;
}
static void forkvaluestate(int number) {
stackedvalues = NULL;
valueceiling = number;
}
static void regressvaluestate(void) {
AvailableValue *av;
AvailableValue **ptr;
int i;
StackedValue *stacked;
for (i = 0; i < 428; i++) {
ptr = &opvalue[i];
while ((av = *ptr)) {
if (av->killedregister >= valueceiling)
*ptr = av->next;
else
ptr = &av->next;
}
}
for (stacked = stackedvalues; stacked; stacked = stacked->next)
unstackvalue(stacked);
}
static void removecsesfromextendedbasicblock(PCodeBlock *block) {
PCLink *succ;
int counter;
State state;
removecsesfrombasicblock(block);
while (block->successors &&
!block->successors->nextLink &&
block->successors->block->predecessors &&
!block->successors->block->predecessors->nextLink) {
block = block->successors->block;
removecsesfrombasicblock(block);
}
counter = 0;
for (succ = block->successors; succ; succ = succ->nextLink) {
if (!(succ->block->flags & fVisited) && succ->block->predecessors && !succ->block->predecessors->nextLink)
counter++;
}
if (counter) {
getvaluestate(&state);
forkvaluestate(nextvaluenumber);
for (succ = block->successors; succ; succ = succ->nextLink) {
if (!(succ->block->flags & fVisited) && succ->block->predecessors && !succ->block->predecessors->nextLink) {
removecsesfromextendedbasicblock(succ->block);
regressvaluestate();
}
}
setvaluestate(&state);
}
}
void removecommonsubexpressions(Object *proc, int flag) {
PCodeBlock *block;
moreaggressiveoptimization = flag;
removedcommonsubexpressions = 0;
gather_alias_info();
allocatecsedatastructures();
for (block = pcbasicblocks; block; block = block->nextBlock)
block->flags &= ~fVisited;
for (block = pcbasicblocks; block; block = block->nextBlock) {
if (!(block->flags & fVisited)) {
initializecsedatastructures();
removecsesfromextendedbasicblock(block);
}
}
freeoheap();
}

View File

@@ -0,0 +1,548 @@
#include "compiler/VectorArraysToRegs.h"
#include "compiler/CError.h"
#include "compiler/CFunc.h"
#include "compiler/BitVectors.h"
#include "compiler/CompilerTools.h"
#include "compiler/PCode.h"
#include "compiler/PCodeInfo.h"
#include "compiler/Registers.h"
#include "compiler/UseDefChains.h"
#include "compiler/objects.h"
#include "compiler/types.h"
typedef struct LocalVectorArray {
struct LocalVectorArray *next;
Object *object;
unsigned int invalid:1;
SInt32 arraySize;
SInt32 elementCount;
int totalUses;
int elements[1];
} LocalVectorArray;
typedef struct VectorPropInfo {
UInt32 *use;
UInt32 *def;
UInt32 *in;
UInt32 *out;
} VectorPropInfo;
typedef struct ADDI {
PCode *instr;
RegUseOrDef *list;
} ADDI;
static int number_of_ADDIs;
static ADDI *ADDIs;
static VectorPropInfo *vectorpropinfo;
static int *naddsinblock;
static int *firstaddinblock;
static Boolean converted_arrays;
static LocalVectorArray *scanforlocalvectorarrays(void) {
SInt32 elementCount;
LocalVectorArray *head;
LocalVectorArray *array;
ObjectList *list;
int i;
SInt32 arraySize;
head = NULL;
for (list = locals; list; list = list->next) {
if (
list->object &&
!(IS_TYPE_POINTER(list->object->type) ? (TPTR_QUAL(list->object->type) & Q_VOLATILE) : (list->object->qual & Q_VOLATILE)) &&
list->object->type &&
IS_TYPE_ARRAY(list->object->type) &&
IS_TYPE_VECTOR(TPTR_TARGET(list->object->type))
) {
arraySize = list->object->type->size;
elementCount = arraySize / 16;
if (elementCount > 0 && elementCount <= 8) {
array = oalloc(sizeof(int) * (elementCount - 1) + sizeof(LocalVectorArray));
array->next = head;
head = array;
array->object = list->object;
array->arraySize = arraySize;
array->elementCount = elementCount;
array->totalUses = 0;
array->invalid = 0;
for (i = 0; i < elementCount; i++) {
array->elements[i] = 0;
}
}
}
}
return head;
}
static LocalVectorArray *lookup_vector_array_object(LocalVectorArray *arrays, Object *object) {
while (arrays) {
if (arrays->object == object)
return arrays;
arrays = arrays->next;
}
return NULL;
}
static void scaninstructions(LocalVectorArray *arrays) {
PCodeBlock *block;
PCode *instr;
int counter;
int i;
PCodeArg *op;
LocalVectorArray *array;
int element;
naddsinblock = oalloc(sizeof(int) * pcblockcount);
memclrw(naddsinblock, sizeof(int) * pcblockcount);
firstaddinblock = oalloc(sizeof(int) * pcblockcount);
memclrw(firstaddinblock, sizeof(int) * pcblockcount);
number_of_ADDIs = 0;
for (block = pcbasicblocks; block; block = block->nextBlock) {
firstaddinblock[block->blockIndex] = number_of_ADDIs;
counter = 0;
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
if (!(instr->flags & fIsBranch) && instr->argCount) {
op = instr->args;
i = instr->argCount;
while (i--) {
if (
op->kind == PCOp_MEMORY &&
(PCOpMemoryArg) op->arg == PCOpMemory1 &&
(array = lookup_vector_array_object(arrays, op->data.mem.obj)) &&
!array->invalid
)
{
if (instr->op != PC_ADDI) {
array->invalid = 1;
} else if (instr->args[0].data.reg.reg < n_real_registers[RegClass_GPR]) {
array->invalid = 1;
} else {
number_of_ADDIs++;
counter++;
}
if (!array->invalid) {
element = op->data.mem.offset / 16;
if (element < array->elementCount)
array->elements[element]++;
else
array->invalid = 1;
}
}
op++;
}
}
}
naddsinblock[block->blockIndex] = counter;
}
}
static void computeaddilist(LocalVectorArray *arrays) {
PCodeBlock *block;
PCode *instr;
RegUseOrDef *list;
ADDI *addi;
UInt32 *vec;
LocalVectorArray *array;
UseOrDef *def;
int defID;
UseOrDef *use;
int useID;
ADDIs = oalloc(sizeof(ADDI) * number_of_ADDIs);
memclrw(ADDIs, sizeof(ADDI) * number_of_ADDIs);
vec = oalloc(4 * ((number_of_Uses + 31) >> 5));
for (block = pcbasicblocks; block; block = block->nextBlock) {
if (naddsinblock[block->blockIndex]) {
bitvectorcopy(vec, usedefinfo[block->blockIndex].usevec1C, number_of_Uses);
addi = &ADDIs[firstaddinblock[block->blockIndex] + naddsinblock[block->blockIndex] - 1];
for (instr = block->lastPCode; instr; instr = instr->prevPCode) {
if (!(instr->flags & fIsBranch) && instr->argCount) {
int reg; // r18
if (
instr->op == PC_ADDI &&
(reg = instr->args[0].data.reg.reg) >= n_real_registers[RegClass_GPR] &&
instr->args[2].kind == PCOp_MEMORY &&
(PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 &&
(array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) &&
!array->invalid
)
{
addi->instr = instr;
addi->list = NULL;
for (list = reg_Uses[RegClass_GPR][reg]; list; list = list->next) {
if (bitvectorgetbit(list->id, vec)) {
RegUseOrDef *node = oalloc(sizeof(RegUseOrDef));
node->id = list->id;
node->next = addi->list;
addi->list = node;
}
}
addi--;
}
for (def = &Defs[defID = instr->defID]; defID < number_of_Defs && def->pcode == instr; def++, defID++) {
if (def->v.kind == PCOp_REGISTER) {
RegUseOrDef *l;
for (l = reg_Uses[def->v.arg][def->v.u.reg]; l; l = l->next)
bitvectorclearbit(l->id, vec);
}
}
for (use = &Uses[useID = instr->useID]; useID < number_of_Uses && use->pcode == instr; use++, useID++) {
if (use->v.kind == PCOp_REGISTER)
bitvectorsetbit(useID, vec);
}
}
}
}
}
}
static void allocatevectorpropinfo(void) {
VectorPropInfo *info;
int i;
vectorpropinfo = oalloc(sizeof(VectorPropInfo) * pcblockcount);
for (i = 0, info = vectorpropinfo; i < pcblockcount; i++, info++) {
info->use = oalloc(4 * ((number_of_ADDIs + 31) >> 5));
info->def = oalloc(4 * ((number_of_ADDIs + 31) >> 5));
info->in = oalloc(4 * ((number_of_ADDIs + 31) >> 5));
info->out = oalloc(4 * ((number_of_ADDIs + 31) >> 5));
}
}
static void computelocalvectorpropinfo(LocalVectorArray *arrays) {
VectorPropInfo *info;
PCodeBlock *block;
PCode *instr;
UInt32 *vec0;
UInt32 *vec4;
int index;
PCodeArg *op;
int i;
int addi_i;
ADDI *addi;
LocalVectorArray *array;
for (block = pcbasicblocks; block; block = block->nextBlock) {
info = &vectorpropinfo[block->blockIndex];
vec0 = info->use;
vec4 = info->def;
bitvectorinitialize(vec0, number_of_ADDIs, 0);
bitvectorinitialize(vec4, number_of_ADDIs, 0);
index = firstaddinblock[block->blockIndex];
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
if (!(instr->flags & fIsBranch) && instr->argCount) {
i = instr->argCount;
op = instr->args;
while (i--) {
if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR && (op->data.reg.effect & EffectWrite)) {
for (addi_i = 0, addi = ADDIs; addi_i < number_of_ADDIs; addi_i++, addi++) {
if (
addi->instr &&
addi->instr->args[0].arg == op->arg &&
addi->instr->args[0].data.reg.reg == op->data.reg.reg
)
{
if (addi->instr->block == block)
bitvectorclearbit(addi_i, vec0);
else
bitvectorsetbit(addi_i, vec4);
}
}
}
op++;
}
if (
instr->op == PC_ADDI &&
instr->args[2].kind == PCOp_MEMORY &&
(PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 &&
(array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) &&
!array->invalid
)
{
bitvectorsetbit(index, vec0);
index++;
}
}
}
}
}
static void computeglobalvectorpropinfo(void) {
VectorPropInfo *info;
PCodeBlock *block;
UInt32 *vec0;
UInt32 *vec4;
UInt32 *vec8;
UInt32 *vecC;
int bitvecsize;
int blockIndex;
int i;
int j;
int flag;
PCLink *preds;
UInt32 val;
bitvecsize = (number_of_ADDIs + 31) >> 5;
flag = 1;
info = &vectorpropinfo[pcbasicblocks->blockIndex];
bitvectorinitialize(info->in, number_of_ADDIs, 0);
bitvectorcopy(info->out, info->use, number_of_ADDIs);
for (block = pcbasicblocks->nextBlock; block; block = block->nextBlock) {
info = &vectorpropinfo[block->blockIndex];
vecC = info->out;
vec4 = info->def;
for (i = 0; i < bitvecsize; vecC++, vec4++, i++)
*vecC = ~*vec4;
}
while (flag) {
flag = 0;
for (blockIndex = 0; blockIndex < pcblockcount; blockIndex++) {
if (depthfirstordering[blockIndex]) {
info = &vectorpropinfo[depthfirstordering[blockIndex]->blockIndex];
if ((preds = depthfirstordering[blockIndex]->predecessors)) {
vec8 = info->in;
bitvectorcopy(vec8, vectorpropinfo[preds->block->blockIndex].out, number_of_ADDIs);
for (preds = preds->nextLink; preds; preds = preds->nextLink)
bitvectorintersect(vec8, vectorpropinfo[preds->block->blockIndex].out, number_of_ADDIs);
}
vecC = info->out;
vec8 = info->in;
vec0 = info->use;
vec4 = info->def;
for (j = 0; j < bitvecsize; j++) {
val = *vec0 | (*vec8 & ~*vec4);
if (val != *vecC) {
*vecC = val;
flag = 1;
}
vec8++;
vecC++;
vec4++;
vec0++;
}
}
}
}
}
static int precedes(PCode *a, PCode *b) {
PCode *scan;
for (scan = a->nextPCode; scan; scan = scan->nextPCode) {
if (scan == b)
return 1;
}
return 0;
}
static int checkvectorstoreorload(int addiID, int useID) {
PCode *addiInstr;
UseOrDef *use;
addiInstr = ADDIs[addiID].instr;
use = Uses + useID;
if (!addiInstr)
return 0;
if (addiInstr->args[0].data.reg.reg < n_real_registers[RegClass_GPR])
return 0;
if (use->pcode->op != PC_LVX && use->pcode->op != PC_STVX)
return 0;
if (
use->pcode->args[1].kind != PCOp_REGISTER ||
use->pcode->args[1].arg != RegClass_GPR ||
use->pcode->args[1].data.reg.reg != 0
)
return 0;
return use->pcode->args[2].data.reg.reg == addiInstr->args[0].data.reg.reg;
}
static int checkalluses(LocalVectorArray *arrays, int addiID) {
RegUseOrDef *list;
PCode *instr;
LocalVectorArray *array;
instr = ADDIs[addiID].instr;
for (list = ADDIs[addiID].list; list; list = list->next) {
if (list && !checkvectorstoreorload(addiID, list->id)) {
array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj);
array->invalid = 1;
return 0;
}
}
return 1;
}
static void convert_array_to_register(LocalVectorArray *arrays, int addiID) {
ADDI *addi;
int newReg;
RegUseOrDef *list;
PCode *instr;
PCode *useInstr;
LocalVectorArray *array;
int element;
addi = ADDIs + addiID;
if (!(instr = addi->instr))
return;
if (
!(array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) ||
array->invalid
)
return;
element = instr->args[2].data.mem.offset / 16;
if (element > array->elementCount)
return;
newReg = array->elements[element];
for (list = addi->list; list; list = list->next) {
useInstr = Uses[list->id].pcode;
if (useInstr->op == PC_LVX) {
converted_arrays = 1;
change_opcode(useInstr, PC_VMR);
change_num_operands(useInstr, 2);
useInstr->args[1].kind = PCOp_REGISTER;
useInstr->args[1].arg = RegClass_VR;
useInstr->args[1].data.reg.reg = newReg;
useInstr->args[1].data.reg.effect = EffectRead;
} else if (useInstr->op == PC_STVX) {
converted_arrays = 1;
change_opcode(useInstr, PC_VMR);
change_num_operands(useInstr, 2);
useInstr->args[1] = useInstr->args[0];
useInstr->args[0].kind = PCOp_REGISTER;
useInstr->args[0].arg = RegClass_VR;
useInstr->args[0].data.reg.reg = newReg;
useInstr->args[0].data.reg.effect = EffectWrite;
} else {
CError_FATAL(661);
}
}
deletepcode(addi->instr);
}
static void convert_arrays_to_registers(LocalVectorArray *arrays) {
int i;
int counter;
LocalVectorArray **ptr;
LocalVectorArray *array;
for (i = 0; i < number_of_ADDIs; i++)
checkalluses(arrays, i);
counter = 0;
ptr = &arrays;
array = *ptr;
while (array) {
if (array->invalid) {
*ptr = array->next;
array = *ptr;
continue;
}
counter += array->elementCount;
for (i = 0; i < array->elementCount; i++)
array->totalUses += array->elements[i];
array = array->next;
}
if (arrays) {
while (counter > 32) {
LocalVectorArray *best;
int score;
score = 0;
best = NULL;
for (array = arrays; array; array = array->next) {
if (best) {
if (array->totalUses < score) {
score = array->totalUses;
best = array;
}
} else {
best = array;
score = array->totalUses;
}
}
if (!best)
break;
if (best == arrays) {
arrays = best->next;
} else {
for (array = arrays; array; array = array->next) {
if (array->next == best) {
array->next = best->next;
break;
}
}
}
counter -= best->elementCount;
}
if (!(array = arrays))
return;
while (array) {
for (i = 0; i < array->elementCount; i++)
array->elements[i] = used_virtual_registers[RegClass_VR]++;
array = array->next;
}
if (arrays) {
for (i = 0; i < number_of_ADDIs; i++)
convert_array_to_register(arrays, i);
}
}
}
int vectorarraystoregs(void) {
LocalVectorArray *arrays;
converted_arrays = 0;
if ((arrays = scanforlocalvectorarrays())) {
scaninstructions(arrays);
if (number_of_ADDIs > 0) {
computeusedefchains(0);
computeaddilist(arrays);
allocatevectorpropinfo();
computelocalvectorpropinfo(arrays);
computedepthfirstordering();
computeglobalvectorpropinfo();
convert_arrays_to_registers(arrays);
}
}
freeoheap();
return converted_arrays;
}