mirror of https://git.wuffs.org/MWCC
549 lines
17 KiB
C
549 lines
17 KiB
C
#include "compiler/VectorArraysToRegs.h"
|
|
#include "compiler/CError.h"
|
|
#include "compiler/CFunc.h"
|
|
#include "compiler/BitVectors.h"
|
|
#include "compiler/CompilerTools.h"
|
|
#include "compiler/PCode.h"
|
|
#include "compiler/PCodeInfo.h"
|
|
#include "compiler/Registers.h"
|
|
#include "compiler/UseDefChains.h"
|
|
#include "compiler/objects.h"
|
|
#include "compiler/types.h"
|
|
|
|
typedef struct LocalVectorArray {
|
|
struct LocalVectorArray *next;
|
|
Object *object;
|
|
unsigned int invalid:1;
|
|
SInt32 arraySize;
|
|
SInt32 elementCount;
|
|
int totalUses;
|
|
int elements[1];
|
|
} LocalVectorArray;
|
|
|
|
typedef struct VectorPropInfo {
|
|
UInt32 *use;
|
|
UInt32 *def;
|
|
UInt32 *in;
|
|
UInt32 *out;
|
|
} VectorPropInfo;
|
|
|
|
typedef struct ADDI {
|
|
PCode *instr;
|
|
RegUseOrDef *list;
|
|
} ADDI;
|
|
|
|
static int number_of_ADDIs;
|
|
static ADDI *ADDIs;
|
|
static VectorPropInfo *vectorpropinfo;
|
|
static int *naddsinblock;
|
|
static int *firstaddinblock;
|
|
static Boolean converted_arrays;
|
|
|
|
static LocalVectorArray *scanforlocalvectorarrays(void) {
|
|
SInt32 elementCount;
|
|
LocalVectorArray *head;
|
|
LocalVectorArray *array;
|
|
ObjectList *list;
|
|
int i;
|
|
SInt32 arraySize;
|
|
|
|
head = NULL;
|
|
|
|
for (list = locals; list; list = list->next) {
|
|
if (
|
|
list->object &&
|
|
!(IS_TYPE_POINTER(list->object->type) ? (TPTR_QUAL(list->object->type) & Q_VOLATILE) : (list->object->qual & Q_VOLATILE)) &&
|
|
list->object->type &&
|
|
IS_TYPE_ARRAY(list->object->type) &&
|
|
IS_TYPE_VECTOR(TPTR_TARGET(list->object->type))
|
|
) {
|
|
arraySize = list->object->type->size;
|
|
elementCount = arraySize / 16;
|
|
if (elementCount > 0 && elementCount <= 8) {
|
|
array = oalloc(sizeof(int) * (elementCount - 1) + sizeof(LocalVectorArray));
|
|
array->next = head;
|
|
head = array;
|
|
|
|
array->object = list->object;
|
|
array->arraySize = arraySize;
|
|
array->elementCount = elementCount;
|
|
array->totalUses = 0;
|
|
array->invalid = 0;
|
|
|
|
for (i = 0; i < elementCount; i++) {
|
|
array->elements[i] = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return head;
|
|
}
|
|
|
|
static LocalVectorArray *lookup_vector_array_object(LocalVectorArray *arrays, Object *object) {
|
|
while (arrays) {
|
|
if (arrays->object == object)
|
|
return arrays;
|
|
arrays = arrays->next;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static void scaninstructions(LocalVectorArray *arrays) {
|
|
PCodeBlock *block;
|
|
PCode *instr;
|
|
int counter;
|
|
int i;
|
|
PCodeArg *op;
|
|
LocalVectorArray *array;
|
|
int element;
|
|
|
|
naddsinblock = oalloc(sizeof(int) * pcblockcount);
|
|
memclrw(naddsinblock, sizeof(int) * pcblockcount);
|
|
|
|
firstaddinblock = oalloc(sizeof(int) * pcblockcount);
|
|
memclrw(firstaddinblock, sizeof(int) * pcblockcount);
|
|
|
|
number_of_ADDIs = 0;
|
|
|
|
for (block = pcbasicblocks; block; block = block->nextBlock) {
|
|
firstaddinblock[block->blockIndex] = number_of_ADDIs;
|
|
counter = 0;
|
|
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
|
|
if (!(instr->flags & fIsBranch) && instr->argCount) {
|
|
op = instr->args;
|
|
i = instr->argCount;
|
|
while (i--) {
|
|
if (
|
|
op->kind == PCOp_MEMORY &&
|
|
(PCOpMemoryArg) op->arg == PCOpMemory1 &&
|
|
(array = lookup_vector_array_object(arrays, op->data.mem.obj)) &&
|
|
!array->invalid
|
|
)
|
|
{
|
|
if (instr->op != PC_ADDI) {
|
|
array->invalid = 1;
|
|
} else if (instr->args[0].data.reg.reg < n_real_registers[RegClass_GPR]) {
|
|
array->invalid = 1;
|
|
} else {
|
|
number_of_ADDIs++;
|
|
counter++;
|
|
}
|
|
|
|
if (!array->invalid) {
|
|
element = op->data.mem.offset / 16;
|
|
if (element < array->elementCount)
|
|
array->elements[element]++;
|
|
else
|
|
array->invalid = 1;
|
|
}
|
|
}
|
|
op++;
|
|
}
|
|
}
|
|
}
|
|
naddsinblock[block->blockIndex] = counter;
|
|
}
|
|
}
|
|
|
|
static void computeaddilist(LocalVectorArray *arrays) {
|
|
PCodeBlock *block;
|
|
PCode *instr;
|
|
RegUseOrDef *list;
|
|
ADDI *addi;
|
|
UInt32 *vec;
|
|
LocalVectorArray *array;
|
|
UseOrDef *def;
|
|
int defID;
|
|
UseOrDef *use;
|
|
int useID;
|
|
|
|
ADDIs = oalloc(sizeof(ADDI) * number_of_ADDIs);
|
|
memclrw(ADDIs, sizeof(ADDI) * number_of_ADDIs);
|
|
|
|
vec = oalloc(4 * ((number_of_Uses + 31) >> 5));
|
|
|
|
for (block = pcbasicblocks; block; block = block->nextBlock) {
|
|
if (naddsinblock[block->blockIndex]) {
|
|
bitvectorcopy(vec, usedefinfo[block->blockIndex].usevec1C, number_of_Uses);
|
|
addi = &ADDIs[firstaddinblock[block->blockIndex] + naddsinblock[block->blockIndex] - 1];
|
|
for (instr = block->lastPCode; instr; instr = instr->prevPCode) {
|
|
if (!(instr->flags & fIsBranch) && instr->argCount) {
|
|
int reg; // r18
|
|
if (
|
|
instr->op == PC_ADDI &&
|
|
(reg = instr->args[0].data.reg.reg) >= n_real_registers[RegClass_GPR] &&
|
|
instr->args[2].kind == PCOp_MEMORY &&
|
|
(PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 &&
|
|
(array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) &&
|
|
!array->invalid
|
|
)
|
|
{
|
|
addi->instr = instr;
|
|
addi->list = NULL;
|
|
for (list = reg_Uses[RegClass_GPR][reg]; list; list = list->next) {
|
|
if (bitvectorgetbit(list->id, vec)) {
|
|
RegUseOrDef *node = oalloc(sizeof(RegUseOrDef));
|
|
node->id = list->id;
|
|
node->next = addi->list;
|
|
addi->list = node;
|
|
}
|
|
}
|
|
addi--;
|
|
}
|
|
|
|
for (def = &Defs[defID = instr->defID]; defID < number_of_Defs && def->pcode == instr; def++, defID++) {
|
|
if (def->v.kind == PCOp_REGISTER) {
|
|
RegUseOrDef *l;
|
|
for (l = reg_Uses[def->v.arg][def->v.u.reg]; l; l = l->next)
|
|
bitvectorclearbit(l->id, vec);
|
|
}
|
|
}
|
|
|
|
for (use = &Uses[useID = instr->useID]; useID < number_of_Uses && use->pcode == instr; use++, useID++) {
|
|
if (use->v.kind == PCOp_REGISTER)
|
|
bitvectorsetbit(useID, vec);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void allocatevectorpropinfo(void) {
|
|
VectorPropInfo *info;
|
|
int i;
|
|
|
|
vectorpropinfo = oalloc(sizeof(VectorPropInfo) * pcblockcount);
|
|
for (i = 0, info = vectorpropinfo; i < pcblockcount; i++, info++) {
|
|
info->use = oalloc(4 * ((number_of_ADDIs + 31) >> 5));
|
|
info->def = oalloc(4 * ((number_of_ADDIs + 31) >> 5));
|
|
info->in = oalloc(4 * ((number_of_ADDIs + 31) >> 5));
|
|
info->out = oalloc(4 * ((number_of_ADDIs + 31) >> 5));
|
|
}
|
|
}
|
|
|
|
static void computelocalvectorpropinfo(LocalVectorArray *arrays) {
|
|
VectorPropInfo *info;
|
|
PCodeBlock *block;
|
|
PCode *instr;
|
|
UInt32 *vec0;
|
|
UInt32 *vec4;
|
|
int index;
|
|
PCodeArg *op;
|
|
int i;
|
|
int addi_i;
|
|
ADDI *addi;
|
|
LocalVectorArray *array;
|
|
|
|
for (block = pcbasicblocks; block; block = block->nextBlock) {
|
|
info = &vectorpropinfo[block->blockIndex];
|
|
vec0 = info->use;
|
|
vec4 = info->def;
|
|
bitvectorinitialize(vec0, number_of_ADDIs, 0);
|
|
bitvectorinitialize(vec4, number_of_ADDIs, 0);
|
|
index = firstaddinblock[block->blockIndex];
|
|
|
|
for (instr = block->firstPCode; instr; instr = instr->nextPCode) {
|
|
if (!(instr->flags & fIsBranch) && instr->argCount) {
|
|
i = instr->argCount;
|
|
op = instr->args;
|
|
while (i--) {
|
|
if (op->kind == PCOp_REGISTER && op->arg == RegClass_GPR && (op->data.reg.effect & EffectWrite)) {
|
|
for (addi_i = 0, addi = ADDIs; addi_i < number_of_ADDIs; addi_i++, addi++) {
|
|
if (
|
|
addi->instr &&
|
|
addi->instr->args[0].arg == op->arg &&
|
|
addi->instr->args[0].data.reg.reg == op->data.reg.reg
|
|
)
|
|
{
|
|
if (addi->instr->block == block)
|
|
bitvectorclearbit(addi_i, vec0);
|
|
else
|
|
bitvectorsetbit(addi_i, vec4);
|
|
}
|
|
}
|
|
}
|
|
op++;
|
|
}
|
|
|
|
if (
|
|
instr->op == PC_ADDI &&
|
|
instr->args[2].kind == PCOp_MEMORY &&
|
|
(PCOpMemoryArg) instr->args[2].arg == PCOpMemory1 &&
|
|
(array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) &&
|
|
!array->invalid
|
|
)
|
|
{
|
|
bitvectorsetbit(index, vec0);
|
|
index++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void computeglobalvectorpropinfo(void) {
|
|
VectorPropInfo *info;
|
|
PCodeBlock *block;
|
|
UInt32 *vec0;
|
|
UInt32 *vec4;
|
|
UInt32 *vec8;
|
|
UInt32 *vecC;
|
|
int bitvecsize;
|
|
int blockIndex;
|
|
int i;
|
|
int j;
|
|
int flag;
|
|
PCLink *preds;
|
|
UInt32 val;
|
|
|
|
bitvecsize = (number_of_ADDIs + 31) >> 5;
|
|
flag = 1;
|
|
info = &vectorpropinfo[pcbasicblocks->blockIndex];
|
|
bitvectorinitialize(info->in, number_of_ADDIs, 0);
|
|
bitvectorcopy(info->out, info->use, number_of_ADDIs);
|
|
|
|
for (block = pcbasicblocks->nextBlock; block; block = block->nextBlock) {
|
|
info = &vectorpropinfo[block->blockIndex];
|
|
vecC = info->out;
|
|
vec4 = info->def;
|
|
for (i = 0; i < bitvecsize; vecC++, vec4++, i++)
|
|
*vecC = ~*vec4;
|
|
}
|
|
|
|
while (flag) {
|
|
flag = 0;
|
|
for (blockIndex = 0; blockIndex < pcblockcount; blockIndex++) {
|
|
if (depthfirstordering[blockIndex]) {
|
|
info = &vectorpropinfo[depthfirstordering[blockIndex]->blockIndex];
|
|
if ((preds = depthfirstordering[blockIndex]->predecessors)) {
|
|
vec8 = info->in;
|
|
bitvectorcopy(vec8, vectorpropinfo[preds->block->blockIndex].out, number_of_ADDIs);
|
|
for (preds = preds->nextLink; preds; preds = preds->nextLink)
|
|
bitvectorintersect(vec8, vectorpropinfo[preds->block->blockIndex].out, number_of_ADDIs);
|
|
}
|
|
|
|
vecC = info->out;
|
|
vec8 = info->in;
|
|
vec0 = info->use;
|
|
vec4 = info->def;
|
|
for (j = 0; j < bitvecsize; j++) {
|
|
val = *vec0 | (*vec8 & ~*vec4);
|
|
if (val != *vecC) {
|
|
*vecC = val;
|
|
flag = 1;
|
|
}
|
|
vec8++;
|
|
vecC++;
|
|
vec4++;
|
|
vec0++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static int precedes(PCode *a, PCode *b) {
|
|
PCode *scan;
|
|
|
|
for (scan = a->nextPCode; scan; scan = scan->nextPCode) {
|
|
if (scan == b)
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int checkvectorstoreorload(int addiID, int useID) {
|
|
PCode *addiInstr;
|
|
UseOrDef *use;
|
|
|
|
addiInstr = ADDIs[addiID].instr;
|
|
use = Uses + useID;
|
|
if (!addiInstr)
|
|
return 0;
|
|
|
|
if (addiInstr->args[0].data.reg.reg < n_real_registers[RegClass_GPR])
|
|
return 0;
|
|
|
|
if (use->pcode->op != PC_LVX && use->pcode->op != PC_STVX)
|
|
return 0;
|
|
|
|
if (
|
|
use->pcode->args[1].kind != PCOp_REGISTER ||
|
|
use->pcode->args[1].arg != RegClass_GPR ||
|
|
use->pcode->args[1].data.reg.reg != 0
|
|
)
|
|
return 0;
|
|
|
|
return use->pcode->args[2].data.reg.reg == addiInstr->args[0].data.reg.reg;
|
|
}
|
|
|
|
static int checkalluses(LocalVectorArray *arrays, int addiID) {
|
|
RegUseOrDef *list;
|
|
PCode *instr;
|
|
LocalVectorArray *array;
|
|
|
|
instr = ADDIs[addiID].instr;
|
|
for (list = ADDIs[addiID].list; list; list = list->next) {
|
|
if (list && !checkvectorstoreorload(addiID, list->id)) {
|
|
array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj);
|
|
array->invalid = 1;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static void convert_array_to_register(LocalVectorArray *arrays, int addiID) {
|
|
ADDI *addi;
|
|
int newReg;
|
|
RegUseOrDef *list;
|
|
PCode *instr;
|
|
PCode *useInstr;
|
|
LocalVectorArray *array;
|
|
int element;
|
|
|
|
addi = ADDIs + addiID;
|
|
|
|
if (!(instr = addi->instr))
|
|
return;
|
|
|
|
if (
|
|
!(array = lookup_vector_array_object(arrays, instr->args[2].data.mem.obj)) ||
|
|
array->invalid
|
|
)
|
|
return;
|
|
|
|
element = instr->args[2].data.mem.offset / 16;
|
|
if (element > array->elementCount)
|
|
return;
|
|
|
|
newReg = array->elements[element];
|
|
for (list = addi->list; list; list = list->next) {
|
|
useInstr = Uses[list->id].pcode;
|
|
if (useInstr->op == PC_LVX) {
|
|
converted_arrays = 1;
|
|
change_opcode(useInstr, PC_VMR);
|
|
change_num_operands(useInstr, 2);
|
|
useInstr->args[1].kind = PCOp_REGISTER;
|
|
useInstr->args[1].arg = RegClass_VR;
|
|
useInstr->args[1].data.reg.reg = newReg;
|
|
useInstr->args[1].data.reg.effect = EffectRead;
|
|
} else if (useInstr->op == PC_STVX) {
|
|
converted_arrays = 1;
|
|
change_opcode(useInstr, PC_VMR);
|
|
change_num_operands(useInstr, 2);
|
|
useInstr->args[1] = useInstr->args[0];
|
|
useInstr->args[0].kind = PCOp_REGISTER;
|
|
useInstr->args[0].arg = RegClass_VR;
|
|
useInstr->args[0].data.reg.reg = newReg;
|
|
useInstr->args[0].data.reg.effect = EffectWrite;
|
|
} else {
|
|
CError_FATAL(661);
|
|
}
|
|
}
|
|
deletepcode(addi->instr);
|
|
}
|
|
|
|
static void convert_arrays_to_registers(LocalVectorArray *arrays) {
|
|
int i;
|
|
int counter;
|
|
LocalVectorArray **ptr;
|
|
LocalVectorArray *array;
|
|
|
|
for (i = 0; i < number_of_ADDIs; i++)
|
|
checkalluses(arrays, i);
|
|
|
|
counter = 0;
|
|
ptr = &arrays;
|
|
array = *ptr;
|
|
while (array) {
|
|
if (array->invalid) {
|
|
*ptr = array->next;
|
|
array = *ptr;
|
|
continue;
|
|
}
|
|
|
|
counter += array->elementCount;
|
|
|
|
for (i = 0; i < array->elementCount; i++)
|
|
array->totalUses += array->elements[i];
|
|
|
|
array = array->next;
|
|
}
|
|
|
|
if (arrays) {
|
|
while (counter > 32) {
|
|
LocalVectorArray *best;
|
|
int score;
|
|
score = 0;
|
|
best = NULL;
|
|
for (array = arrays; array; array = array->next) {
|
|
if (best) {
|
|
if (array->totalUses < score) {
|
|
score = array->totalUses;
|
|
best = array;
|
|
}
|
|
} else {
|
|
best = array;
|
|
score = array->totalUses;
|
|
}
|
|
}
|
|
|
|
if (!best)
|
|
break;
|
|
|
|
if (best == arrays) {
|
|
arrays = best->next;
|
|
} else {
|
|
for (array = arrays; array; array = array->next) {
|
|
if (array->next == best) {
|
|
array->next = best->next;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
counter -= best->elementCount;
|
|
}
|
|
|
|
if (!(array = arrays))
|
|
return;
|
|
|
|
while (array) {
|
|
for (i = 0; i < array->elementCount; i++)
|
|
array->elements[i] = used_virtual_registers[RegClass_VR]++;
|
|
array = array->next;
|
|
}
|
|
|
|
if (arrays) {
|
|
for (i = 0; i < number_of_ADDIs; i++)
|
|
convert_array_to_register(arrays, i);
|
|
}
|
|
}
|
|
}
|
|
|
|
int vectorarraystoregs(void) {
|
|
LocalVectorArray *arrays;
|
|
|
|
converted_arrays = 0;
|
|
if ((arrays = scanforlocalvectorarrays())) {
|
|
scaninstructions(arrays);
|
|
if (number_of_ADDIs > 0) {
|
|
computeusedefchains(0);
|
|
computeaddilist(arrays);
|
|
allocatevectorpropinfo();
|
|
computelocalvectorpropinfo(arrays);
|
|
computedepthfirstordering();
|
|
computeglobalvectorpropinfo();
|
|
convert_arrays_to_registers(arrays);
|
|
}
|
|
}
|
|
|
|
freeoheap();
|
|
return converted_arrays;
|
|
}
|