1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker * Copyright © 2018 Valve Corporation
3*61046927SAndroid Build Coastguard Worker * Copyright © 2018 Google
4*61046927SAndroid Build Coastguard Worker *
5*61046927SAndroid Build Coastguard Worker * SPDX-License-Identifier: MIT
6*61046927SAndroid Build Coastguard Worker */
7*61046927SAndroid Build Coastguard Worker
8*61046927SAndroid Build Coastguard Worker #include "aco_builder.h"
9*61046927SAndroid Build Coastguard Worker #include "aco_ir.h"
10*61046927SAndroid Build Coastguard Worker #include "aco_util.h"
11*61046927SAndroid Build Coastguard Worker
12*61046927SAndroid Build Coastguard Worker #include "common/ac_descriptors.h"
13*61046927SAndroid Build Coastguard Worker #include "common/sid.h"
14*61046927SAndroid Build Coastguard Worker
15*61046927SAndroid Build Coastguard Worker #include <algorithm>
16*61046927SAndroid Build Coastguard Worker #include <cstring>
17*61046927SAndroid Build Coastguard Worker #include <map>
18*61046927SAndroid Build Coastguard Worker #include <set>
19*61046927SAndroid Build Coastguard Worker #include <unordered_map>
20*61046927SAndroid Build Coastguard Worker #include <unordered_set>
21*61046927SAndroid Build Coastguard Worker #include <vector>
22*61046927SAndroid Build Coastguard Worker
23*61046927SAndroid Build Coastguard Worker namespace std {
24*61046927SAndroid Build Coastguard Worker template <> struct hash<aco::Temp> {
operator ()std::hash25*61046927SAndroid Build Coastguard Worker size_t operator()(aco::Temp temp) const noexcept
26*61046927SAndroid Build Coastguard Worker {
27*61046927SAndroid Build Coastguard Worker uint32_t v;
28*61046927SAndroid Build Coastguard Worker std::memcpy(&v, &temp, sizeof(temp));
29*61046927SAndroid Build Coastguard Worker return std::hash<uint32_t>{}(v);
30*61046927SAndroid Build Coastguard Worker }
31*61046927SAndroid Build Coastguard Worker };
32*61046927SAndroid Build Coastguard Worker } // namespace std
33*61046927SAndroid Build Coastguard Worker
34*61046927SAndroid Build Coastguard Worker /*
35*61046927SAndroid Build Coastguard Worker * Implements the spilling algorithm on SSA-form based on
36*61046927SAndroid Build Coastguard Worker * "Register Spilling and Live-Range Splitting for SSA-Form Programs"
37*61046927SAndroid Build Coastguard Worker * by Matthias Braun and Sebastian Hack.
38*61046927SAndroid Build Coastguard Worker *
39*61046927SAndroid Build Coastguard Worker * Key difference between this algorithm and the min-algorithm from the paper
40*61046927SAndroid Build Coastguard Worker * is the use of average use distances rather than next-use distances per
41*61046927SAndroid Build Coastguard Worker * instruction.
42*61046927SAndroid Build Coastguard Worker * As we decrement the number of remaining uses, the average use distances
43*61046927SAndroid Build Coastguard Worker * give an approximation of the next-use distances while being computationally
44*61046927SAndroid Build Coastguard Worker * and memory-wise less expensive.
45*61046927SAndroid Build Coastguard Worker */
46*61046927SAndroid Build Coastguard Worker
47*61046927SAndroid Build Coastguard Worker namespace aco {
48*61046927SAndroid Build Coastguard Worker
49*61046927SAndroid Build Coastguard Worker namespace {
50*61046927SAndroid Build Coastguard Worker
51*61046927SAndroid Build Coastguard Worker struct remat_info {
52*61046927SAndroid Build Coastguard Worker Instruction* instr;
53*61046927SAndroid Build Coastguard Worker };
54*61046927SAndroid Build Coastguard Worker
55*61046927SAndroid Build Coastguard Worker struct loop_info {
56*61046927SAndroid Build Coastguard Worker uint32_t index;
57*61046927SAndroid Build Coastguard Worker aco::unordered_map<Temp, uint32_t> spills;
58*61046927SAndroid Build Coastguard Worker IDSet live_in;
59*61046927SAndroid Build Coastguard Worker };
60*61046927SAndroid Build Coastguard Worker
61*61046927SAndroid Build Coastguard Worker struct use_info {
62*61046927SAndroid Build Coastguard Worker uint32_t num_uses = 0;
63*61046927SAndroid Build Coastguard Worker uint32_t last_use = 0;
scoreaco::__anon6dee26dc0111::use_info64*61046927SAndroid Build Coastguard Worker float score() { return last_use / num_uses; }
65*61046927SAndroid Build Coastguard Worker };
66*61046927SAndroid Build Coastguard Worker
67*61046927SAndroid Build Coastguard Worker struct spill_ctx {
68*61046927SAndroid Build Coastguard Worker RegisterDemand target_pressure;
69*61046927SAndroid Build Coastguard Worker Program* program;
70*61046927SAndroid Build Coastguard Worker aco::monotonic_buffer_resource memory;
71*61046927SAndroid Build Coastguard Worker
72*61046927SAndroid Build Coastguard Worker std::vector<aco::map<Temp, Temp>> renames;
73*61046927SAndroid Build Coastguard Worker std::vector<aco::unordered_map<Temp, uint32_t>> spills_entry;
74*61046927SAndroid Build Coastguard Worker std::vector<aco::unordered_map<Temp, uint32_t>> spills_exit;
75*61046927SAndroid Build Coastguard Worker
76*61046927SAndroid Build Coastguard Worker std::vector<bool> processed;
77*61046927SAndroid Build Coastguard Worker std::vector<loop_info> loop;
78*61046927SAndroid Build Coastguard Worker
79*61046927SAndroid Build Coastguard Worker std::vector<use_info> ssa_infos;
80*61046927SAndroid Build Coastguard Worker std::vector<std::pair<RegClass, std::unordered_set<uint32_t>>> interferences;
81*61046927SAndroid Build Coastguard Worker std::vector<std::vector<uint32_t>> affinities;
82*61046927SAndroid Build Coastguard Worker std::vector<bool> is_reloaded;
83*61046927SAndroid Build Coastguard Worker aco::unordered_map<Temp, remat_info> remat;
84*61046927SAndroid Build Coastguard Worker std::set<Instruction*> unused_remats;
85*61046927SAndroid Build Coastguard Worker unsigned wave_size;
86*61046927SAndroid Build Coastguard Worker
87*61046927SAndroid Build Coastguard Worker unsigned sgpr_spill_slots;
88*61046927SAndroid Build Coastguard Worker unsigned vgpr_spill_slots;
89*61046927SAndroid Build Coastguard Worker Temp scratch_rsrc;
90*61046927SAndroid Build Coastguard Worker
spill_ctxaco::__anon6dee26dc0111::spill_ctx91*61046927SAndroid Build Coastguard Worker spill_ctx(const RegisterDemand target_pressure_, Program* program_)
92*61046927SAndroid Build Coastguard Worker : target_pressure(target_pressure_), program(program_), memory(),
93*61046927SAndroid Build Coastguard Worker renames(program->blocks.size(), aco::map<Temp, Temp>(memory)),
94*61046927SAndroid Build Coastguard Worker spills_entry(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)),
95*61046927SAndroid Build Coastguard Worker spills_exit(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)),
96*61046927SAndroid Build Coastguard Worker processed(program->blocks.size(), false), ssa_infos(program->peekAllocationId()),
97*61046927SAndroid Build Coastguard Worker remat(memory), wave_size(program->wave_size), sgpr_spill_slots(0), vgpr_spill_slots(0)
98*61046927SAndroid Build Coastguard Worker {}
99*61046927SAndroid Build Coastguard Worker
add_affinityaco::__anon6dee26dc0111::spill_ctx100*61046927SAndroid Build Coastguard Worker void add_affinity(uint32_t first, uint32_t second)
101*61046927SAndroid Build Coastguard Worker {
102*61046927SAndroid Build Coastguard Worker unsigned found_first = affinities.size();
103*61046927SAndroid Build Coastguard Worker unsigned found_second = affinities.size();
104*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < affinities.size(); i++) {
105*61046927SAndroid Build Coastguard Worker std::vector<uint32_t>& vec = affinities[i];
106*61046927SAndroid Build Coastguard Worker for (uint32_t entry : vec) {
107*61046927SAndroid Build Coastguard Worker if (entry == first)
108*61046927SAndroid Build Coastguard Worker found_first = i;
109*61046927SAndroid Build Coastguard Worker else if (entry == second)
110*61046927SAndroid Build Coastguard Worker found_second = i;
111*61046927SAndroid Build Coastguard Worker }
112*61046927SAndroid Build Coastguard Worker }
113*61046927SAndroid Build Coastguard Worker if (found_first == affinities.size() && found_second == affinities.size()) {
114*61046927SAndroid Build Coastguard Worker affinities.emplace_back(std::vector<uint32_t>({first, second}));
115*61046927SAndroid Build Coastguard Worker } else if (found_first < affinities.size() && found_second == affinities.size()) {
116*61046927SAndroid Build Coastguard Worker affinities[found_first].push_back(second);
117*61046927SAndroid Build Coastguard Worker } else if (found_second < affinities.size() && found_first == affinities.size()) {
118*61046927SAndroid Build Coastguard Worker affinities[found_second].push_back(first);
119*61046927SAndroid Build Coastguard Worker } else if (found_first != found_second) {
120*61046927SAndroid Build Coastguard Worker /* merge second into first */
121*61046927SAndroid Build Coastguard Worker affinities[found_first].insert(affinities[found_first].end(),
122*61046927SAndroid Build Coastguard Worker affinities[found_second].begin(),
123*61046927SAndroid Build Coastguard Worker affinities[found_second].end());
124*61046927SAndroid Build Coastguard Worker affinities.erase(std::next(affinities.begin(), found_second));
125*61046927SAndroid Build Coastguard Worker } else {
126*61046927SAndroid Build Coastguard Worker assert(found_first == found_second);
127*61046927SAndroid Build Coastguard Worker }
128*61046927SAndroid Build Coastguard Worker }
129*61046927SAndroid Build Coastguard Worker
add_to_spillsaco::__anon6dee26dc0111::spill_ctx130*61046927SAndroid Build Coastguard Worker uint32_t add_to_spills(Temp to_spill, aco::unordered_map<Temp, uint32_t>& spills)
131*61046927SAndroid Build Coastguard Worker {
132*61046927SAndroid Build Coastguard Worker const uint32_t spill_id = allocate_spill_id(to_spill.regClass());
133*61046927SAndroid Build Coastguard Worker for (auto pair : spills)
134*61046927SAndroid Build Coastguard Worker add_interference(spill_id, pair.second);
135*61046927SAndroid Build Coastguard Worker if (!loop.empty()) {
136*61046927SAndroid Build Coastguard Worker for (auto pair : loop.back().spills)
137*61046927SAndroid Build Coastguard Worker add_interference(spill_id, pair.second);
138*61046927SAndroid Build Coastguard Worker }
139*61046927SAndroid Build Coastguard Worker
140*61046927SAndroid Build Coastguard Worker spills[to_spill] = spill_id;
141*61046927SAndroid Build Coastguard Worker return spill_id;
142*61046927SAndroid Build Coastguard Worker }
143*61046927SAndroid Build Coastguard Worker
add_interferenceaco::__anon6dee26dc0111::spill_ctx144*61046927SAndroid Build Coastguard Worker void add_interference(uint32_t first, uint32_t second)
145*61046927SAndroid Build Coastguard Worker {
146*61046927SAndroid Build Coastguard Worker if (interferences[first].first.type() != interferences[second].first.type())
147*61046927SAndroid Build Coastguard Worker return;
148*61046927SAndroid Build Coastguard Worker
149*61046927SAndroid Build Coastguard Worker bool inserted = interferences[first].second.insert(second).second;
150*61046927SAndroid Build Coastguard Worker if (inserted)
151*61046927SAndroid Build Coastguard Worker interferences[second].second.insert(first);
152*61046927SAndroid Build Coastguard Worker }
153*61046927SAndroid Build Coastguard Worker
allocate_spill_idaco::__anon6dee26dc0111::spill_ctx154*61046927SAndroid Build Coastguard Worker uint32_t allocate_spill_id(RegClass rc)
155*61046927SAndroid Build Coastguard Worker {
156*61046927SAndroid Build Coastguard Worker interferences.emplace_back(rc, std::unordered_set<uint32_t>());
157*61046927SAndroid Build Coastguard Worker is_reloaded.push_back(false);
158*61046927SAndroid Build Coastguard Worker return next_spill_id++;
159*61046927SAndroid Build Coastguard Worker }
160*61046927SAndroid Build Coastguard Worker
161*61046927SAndroid Build Coastguard Worker uint32_t next_spill_id = 0;
162*61046927SAndroid Build Coastguard Worker };
163*61046927SAndroid Build Coastguard Worker
164*61046927SAndroid Build Coastguard Worker /**
165*61046927SAndroid Build Coastguard Worker * Gathers information about the number of uses and point of last use
166*61046927SAndroid Build Coastguard Worker * per SSA value.
167*61046927SAndroid Build Coastguard Worker *
168*61046927SAndroid Build Coastguard Worker * Phi definitions are added to live-ins.
169*61046927SAndroid Build Coastguard Worker */
170*61046927SAndroid Build Coastguard Worker void
gather_ssa_use_info(spill_ctx & ctx)171*61046927SAndroid Build Coastguard Worker gather_ssa_use_info(spill_ctx& ctx)
172*61046927SAndroid Build Coastguard Worker {
173*61046927SAndroid Build Coastguard Worker unsigned instruction_idx = 0;
174*61046927SAndroid Build Coastguard Worker for (Block& block : ctx.program->blocks) {
175*61046927SAndroid Build Coastguard Worker for (int i = block.instructions.size() - 1; i >= 0; i--) {
176*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>& instr = block.instructions[i];
177*61046927SAndroid Build Coastguard Worker for (const Operand& op : instr->operands) {
178*61046927SAndroid Build Coastguard Worker if (op.isTemp()) {
179*61046927SAndroid Build Coastguard Worker use_info& info = ctx.ssa_infos[op.tempId()];
180*61046927SAndroid Build Coastguard Worker info.num_uses++;
181*61046927SAndroid Build Coastguard Worker info.last_use = std::max(info.last_use, instruction_idx + i);
182*61046927SAndroid Build Coastguard Worker }
183*61046927SAndroid Build Coastguard Worker }
184*61046927SAndroid Build Coastguard Worker }
185*61046927SAndroid Build Coastguard Worker
186*61046927SAndroid Build Coastguard Worker /* All live-in variables at loop headers get an additional artificial use.
187*61046927SAndroid Build Coastguard Worker * As we decrement the number of uses while processing the blocks, this
188*61046927SAndroid Build Coastguard Worker * ensures that the number of uses won't becomes zero before the loop
189*61046927SAndroid Build Coastguard Worker * (and the variables' live-ranges) end.
190*61046927SAndroid Build Coastguard Worker */
191*61046927SAndroid Build Coastguard Worker if (block.kind & block_kind_loop_header) {
192*61046927SAndroid Build Coastguard Worker for (unsigned t : ctx.program->live.live_in[block.index])
193*61046927SAndroid Build Coastguard Worker ctx.ssa_infos[t].num_uses++;
194*61046927SAndroid Build Coastguard Worker }
195*61046927SAndroid Build Coastguard Worker
196*61046927SAndroid Build Coastguard Worker instruction_idx += block.instructions.size();
197*61046927SAndroid Build Coastguard Worker }
198*61046927SAndroid Build Coastguard Worker }
199*61046927SAndroid Build Coastguard Worker
200*61046927SAndroid Build Coastguard Worker bool
should_rematerialize(aco_ptr<Instruction> & instr)201*61046927SAndroid Build Coastguard Worker should_rematerialize(aco_ptr<Instruction>& instr)
202*61046927SAndroid Build Coastguard Worker {
203*61046927SAndroid Build Coastguard Worker /* TODO: rematerialization is only supported for VOP1, SOP1 and PSEUDO */
204*61046927SAndroid Build Coastguard Worker if (instr->format != Format::VOP1 && instr->format != Format::SOP1 &&
205*61046927SAndroid Build Coastguard Worker instr->format != Format::PSEUDO && instr->format != Format::SOPK)
206*61046927SAndroid Build Coastguard Worker return false;
207*61046927SAndroid Build Coastguard Worker /* TODO: pseudo-instruction rematerialization is only supported for
208*61046927SAndroid Build Coastguard Worker * p_create_vector/p_parallelcopy */
209*61046927SAndroid Build Coastguard Worker if (instr->isPseudo() && instr->opcode != aco_opcode::p_create_vector &&
210*61046927SAndroid Build Coastguard Worker instr->opcode != aco_opcode::p_parallelcopy)
211*61046927SAndroid Build Coastguard Worker return false;
212*61046927SAndroid Build Coastguard Worker if (instr->isSOPK() && instr->opcode != aco_opcode::s_movk_i32)
213*61046927SAndroid Build Coastguard Worker return false;
214*61046927SAndroid Build Coastguard Worker
215*61046927SAndroid Build Coastguard Worker for (const Operand& op : instr->operands) {
216*61046927SAndroid Build Coastguard Worker /* TODO: rematerialization using temporaries isn't yet supported */
217*61046927SAndroid Build Coastguard Worker if (!op.isConstant())
218*61046927SAndroid Build Coastguard Worker return false;
219*61046927SAndroid Build Coastguard Worker }
220*61046927SAndroid Build Coastguard Worker
221*61046927SAndroid Build Coastguard Worker /* TODO: rematerialization with multiple definitions isn't yet supported */
222*61046927SAndroid Build Coastguard Worker if (instr->definitions.size() > 1)
223*61046927SAndroid Build Coastguard Worker return false;
224*61046927SAndroid Build Coastguard Worker
225*61046927SAndroid Build Coastguard Worker return true;
226*61046927SAndroid Build Coastguard Worker }
227*61046927SAndroid Build Coastguard Worker
228*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>
do_reload(spill_ctx & ctx,Temp tmp,Temp new_name,uint32_t spill_id)229*61046927SAndroid Build Coastguard Worker do_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t spill_id)
230*61046927SAndroid Build Coastguard Worker {
231*61046927SAndroid Build Coastguard Worker std::unordered_map<Temp, remat_info>::iterator remat = ctx.remat.find(tmp);
232*61046927SAndroid Build Coastguard Worker if (remat != ctx.remat.end()) {
233*61046927SAndroid Build Coastguard Worker Instruction* instr = remat->second.instr;
234*61046927SAndroid Build Coastguard Worker assert((instr->isVOP1() || instr->isSOP1() || instr->isPseudo() || instr->isSOPK()) &&
235*61046927SAndroid Build Coastguard Worker "unsupported");
236*61046927SAndroid Build Coastguard Worker assert((instr->format != Format::PSEUDO || instr->opcode == aco_opcode::p_create_vector ||
237*61046927SAndroid Build Coastguard Worker instr->opcode == aco_opcode::p_parallelcopy) &&
238*61046927SAndroid Build Coastguard Worker "unsupported");
239*61046927SAndroid Build Coastguard Worker assert(instr->definitions.size() == 1 && "unsupported");
240*61046927SAndroid Build Coastguard Worker
241*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction> res;
242*61046927SAndroid Build Coastguard Worker res.reset(create_instruction(instr->opcode, instr->format, instr->operands.size(),
243*61046927SAndroid Build Coastguard Worker instr->definitions.size()));
244*61046927SAndroid Build Coastguard Worker if (instr->isSOPK())
245*61046927SAndroid Build Coastguard Worker res->salu().imm = instr->salu().imm;
246*61046927SAndroid Build Coastguard Worker
247*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < instr->operands.size(); i++) {
248*61046927SAndroid Build Coastguard Worker res->operands[i] = instr->operands[i];
249*61046927SAndroid Build Coastguard Worker if (instr->operands[i].isTemp()) {
250*61046927SAndroid Build Coastguard Worker assert(false && "unsupported");
251*61046927SAndroid Build Coastguard Worker if (ctx.remat.count(instr->operands[i].getTemp()))
252*61046927SAndroid Build Coastguard Worker ctx.unused_remats.erase(ctx.remat[instr->operands[i].getTemp()].instr);
253*61046927SAndroid Build Coastguard Worker }
254*61046927SAndroid Build Coastguard Worker }
255*61046927SAndroid Build Coastguard Worker res->definitions[0] = Definition(new_name);
256*61046927SAndroid Build Coastguard Worker return res;
257*61046927SAndroid Build Coastguard Worker } else {
258*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction> reload{create_instruction(aco_opcode::p_reload, Format::PSEUDO, 1, 1)};
259*61046927SAndroid Build Coastguard Worker reload->operands[0] = Operand::c32(spill_id);
260*61046927SAndroid Build Coastguard Worker reload->definitions[0] = Definition(new_name);
261*61046927SAndroid Build Coastguard Worker ctx.is_reloaded[spill_id] = true;
262*61046927SAndroid Build Coastguard Worker return reload;
263*61046927SAndroid Build Coastguard Worker }
264*61046927SAndroid Build Coastguard Worker }
265*61046927SAndroid Build Coastguard Worker
266*61046927SAndroid Build Coastguard Worker void
get_rematerialize_info(spill_ctx & ctx)267*61046927SAndroid Build Coastguard Worker get_rematerialize_info(spill_ctx& ctx)
268*61046927SAndroid Build Coastguard Worker {
269*61046927SAndroid Build Coastguard Worker for (Block& block : ctx.program->blocks) {
270*61046927SAndroid Build Coastguard Worker bool logical = false;
271*61046927SAndroid Build Coastguard Worker for (aco_ptr<Instruction>& instr : block.instructions) {
272*61046927SAndroid Build Coastguard Worker if (instr->opcode == aco_opcode::p_logical_start)
273*61046927SAndroid Build Coastguard Worker logical = true;
274*61046927SAndroid Build Coastguard Worker else if (instr->opcode == aco_opcode::p_logical_end)
275*61046927SAndroid Build Coastguard Worker logical = false;
276*61046927SAndroid Build Coastguard Worker if (logical && should_rematerialize(instr)) {
277*61046927SAndroid Build Coastguard Worker for (const Definition& def : instr->definitions) {
278*61046927SAndroid Build Coastguard Worker if (def.isTemp()) {
279*61046927SAndroid Build Coastguard Worker ctx.remat[def.getTemp()] = remat_info{instr.get()};
280*61046927SAndroid Build Coastguard Worker ctx.unused_remats.insert(instr.get());
281*61046927SAndroid Build Coastguard Worker }
282*61046927SAndroid Build Coastguard Worker }
283*61046927SAndroid Build Coastguard Worker }
284*61046927SAndroid Build Coastguard Worker }
285*61046927SAndroid Build Coastguard Worker }
286*61046927SAndroid Build Coastguard Worker }
287*61046927SAndroid Build Coastguard Worker
288*61046927SAndroid Build Coastguard Worker RegisterDemand
init_live_in_vars(spill_ctx & ctx,Block * block,unsigned block_idx)289*61046927SAndroid Build Coastguard Worker init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx)
290*61046927SAndroid Build Coastguard Worker {
291*61046927SAndroid Build Coastguard Worker RegisterDemand spilled_registers;
292*61046927SAndroid Build Coastguard Worker
293*61046927SAndroid Build Coastguard Worker /* first block, nothing was spilled before */
294*61046927SAndroid Build Coastguard Worker if (block->linear_preds.empty())
295*61046927SAndroid Build Coastguard Worker return {0, 0};
296*61046927SAndroid Build Coastguard Worker
297*61046927SAndroid Build Coastguard Worker /* live-in variables at the beginning of the current block */
298*61046927SAndroid Build Coastguard Worker const IDSet& live_in = ctx.program->live.live_in[block_idx];
299*61046927SAndroid Build Coastguard Worker
300*61046927SAndroid Build Coastguard Worker /* loop header block */
301*61046927SAndroid Build Coastguard Worker if (block->kind & block_kind_loop_header) {
302*61046927SAndroid Build Coastguard Worker assert(block->linear_preds[0] == block_idx - 1);
303*61046927SAndroid Build Coastguard Worker assert(block->logical_preds[0] == block_idx - 1);
304*61046927SAndroid Build Coastguard Worker
305*61046927SAndroid Build Coastguard Worker /* check how many live-through variables should be spilled */
306*61046927SAndroid Build Coastguard Worker RegisterDemand reg_pressure = block->live_in_demand;
307*61046927SAndroid Build Coastguard Worker RegisterDemand loop_demand = reg_pressure;
308*61046927SAndroid Build Coastguard Worker unsigned i = block_idx;
309*61046927SAndroid Build Coastguard Worker while (ctx.program->blocks[i].loop_nest_depth >= block->loop_nest_depth)
310*61046927SAndroid Build Coastguard Worker loop_demand.update(ctx.program->blocks[i++].register_demand);
311*61046927SAndroid Build Coastguard Worker
312*61046927SAndroid Build Coastguard Worker for (auto spilled : ctx.spills_exit[block_idx - 1]) {
313*61046927SAndroid Build Coastguard Worker /* variable is not live at loop entry: probably a phi operand */
314*61046927SAndroid Build Coastguard Worker if (!live_in.count(spilled.first.id()))
315*61046927SAndroid Build Coastguard Worker continue;
316*61046927SAndroid Build Coastguard Worker
317*61046927SAndroid Build Coastguard Worker /* keep live-through variables spilled */
318*61046927SAndroid Build Coastguard Worker ctx.spills_entry[block_idx][spilled.first] = spilled.second;
319*61046927SAndroid Build Coastguard Worker spilled_registers += spilled.first;
320*61046927SAndroid Build Coastguard Worker loop_demand -= spilled.first;
321*61046927SAndroid Build Coastguard Worker }
322*61046927SAndroid Build Coastguard Worker if (!ctx.loop.empty()) {
323*61046927SAndroid Build Coastguard Worker /* If this is a nested loop, keep variables from the outer loop spilled. */
324*61046927SAndroid Build Coastguard Worker for (auto spilled : ctx.loop.back().spills) {
325*61046927SAndroid Build Coastguard Worker /* If the inner loop comes after the last continue statement of the outer loop,
326*61046927SAndroid Build Coastguard Worker * the loop-carried variables might not be live-in for the inner loop.
327*61046927SAndroid Build Coastguard Worker */
328*61046927SAndroid Build Coastguard Worker if (live_in.count(spilled.first.id()) &&
329*61046927SAndroid Build Coastguard Worker ctx.spills_entry[block_idx].insert(spilled).second) {
330*61046927SAndroid Build Coastguard Worker spilled_registers += spilled.first;
331*61046927SAndroid Build Coastguard Worker loop_demand -= spilled.first;
332*61046927SAndroid Build Coastguard Worker }
333*61046927SAndroid Build Coastguard Worker }
334*61046927SAndroid Build Coastguard Worker }
335*61046927SAndroid Build Coastguard Worker
336*61046927SAndroid Build Coastguard Worker /* select more live-through variables and constants */
337*61046927SAndroid Build Coastguard Worker RegType type = RegType::vgpr;
338*61046927SAndroid Build Coastguard Worker while (loop_demand.exceeds(ctx.target_pressure)) {
339*61046927SAndroid Build Coastguard Worker /* if VGPR demand is low enough, select SGPRs */
340*61046927SAndroid Build Coastguard Worker if (type == RegType::vgpr && loop_demand.vgpr <= ctx.target_pressure.vgpr)
341*61046927SAndroid Build Coastguard Worker type = RegType::sgpr;
342*61046927SAndroid Build Coastguard Worker /* if SGPR demand is low enough, break */
343*61046927SAndroid Build Coastguard Worker if (type == RegType::sgpr && loop_demand.sgpr <= ctx.target_pressure.sgpr)
344*61046927SAndroid Build Coastguard Worker break;
345*61046927SAndroid Build Coastguard Worker
346*61046927SAndroid Build Coastguard Worker float score = 0.0;
347*61046927SAndroid Build Coastguard Worker unsigned remat = 0;
348*61046927SAndroid Build Coastguard Worker Temp to_spill;
349*61046927SAndroid Build Coastguard Worker for (unsigned t : live_in) {
350*61046927SAndroid Build Coastguard Worker Temp var = Temp(t, ctx.program->temp_rc[t]);
351*61046927SAndroid Build Coastguard Worker if (var.type() != type || ctx.spills_entry[block_idx].count(var) ||
352*61046927SAndroid Build Coastguard Worker var.regClass().is_linear_vgpr())
353*61046927SAndroid Build Coastguard Worker continue;
354*61046927SAndroid Build Coastguard Worker
355*61046927SAndroid Build Coastguard Worker unsigned can_remat = ctx.remat.count(var);
356*61046927SAndroid Build Coastguard Worker if (can_remat > remat || (can_remat == remat && ctx.ssa_infos[t].score() > score)) {
357*61046927SAndroid Build Coastguard Worker to_spill = var;
358*61046927SAndroid Build Coastguard Worker score = ctx.ssa_infos[t].score();
359*61046927SAndroid Build Coastguard Worker remat = can_remat;
360*61046927SAndroid Build Coastguard Worker }
361*61046927SAndroid Build Coastguard Worker }
362*61046927SAndroid Build Coastguard Worker
363*61046927SAndroid Build Coastguard Worker /* select SGPRs or break */
364*61046927SAndroid Build Coastguard Worker if (score == 0.0) {
365*61046927SAndroid Build Coastguard Worker if (type == RegType::sgpr)
366*61046927SAndroid Build Coastguard Worker break;
367*61046927SAndroid Build Coastguard Worker type = RegType::sgpr;
368*61046927SAndroid Build Coastguard Worker continue;
369*61046927SAndroid Build Coastguard Worker }
370*61046927SAndroid Build Coastguard Worker
371*61046927SAndroid Build Coastguard Worker ctx.add_to_spills(to_spill, ctx.spills_entry[block_idx]);
372*61046927SAndroid Build Coastguard Worker spilled_registers += to_spill;
373*61046927SAndroid Build Coastguard Worker loop_demand -= to_spill;
374*61046927SAndroid Build Coastguard Worker }
375*61046927SAndroid Build Coastguard Worker
376*61046927SAndroid Build Coastguard Worker /* create new loop_info */
377*61046927SAndroid Build Coastguard Worker loop_info info = {block_idx, ctx.spills_entry[block_idx], live_in};
378*61046927SAndroid Build Coastguard Worker ctx.loop.emplace_back(std::move(info));
379*61046927SAndroid Build Coastguard Worker
380*61046927SAndroid Build Coastguard Worker /* shortcut */
381*61046927SAndroid Build Coastguard Worker if (!loop_demand.exceeds(ctx.target_pressure))
382*61046927SAndroid Build Coastguard Worker return spilled_registers;
383*61046927SAndroid Build Coastguard Worker
384*61046927SAndroid Build Coastguard Worker /* if reg pressure is too high at beginning of loop, add variables with furthest use */
385*61046927SAndroid Build Coastguard Worker reg_pressure -= spilled_registers;
386*61046927SAndroid Build Coastguard Worker
387*61046927SAndroid Build Coastguard Worker while (reg_pressure.exceeds(ctx.target_pressure)) {
388*61046927SAndroid Build Coastguard Worker float score = 0;
389*61046927SAndroid Build Coastguard Worker Temp to_spill;
390*61046927SAndroid Build Coastguard Worker type = reg_pressure.vgpr > ctx.target_pressure.vgpr ? RegType::vgpr : RegType::sgpr;
391*61046927SAndroid Build Coastguard Worker for (aco_ptr<Instruction>& phi : block->instructions) {
392*61046927SAndroid Build Coastguard Worker if (!is_phi(phi))
393*61046927SAndroid Build Coastguard Worker break;
394*61046927SAndroid Build Coastguard Worker if (!phi->definitions[0].isTemp() || phi->definitions[0].isKill())
395*61046927SAndroid Build Coastguard Worker continue;
396*61046927SAndroid Build Coastguard Worker Temp var = phi->definitions[0].getTemp();
397*61046927SAndroid Build Coastguard Worker if (var.type() == type && !ctx.spills_entry[block_idx].count(var) &&
398*61046927SAndroid Build Coastguard Worker ctx.ssa_infos[var.id()].score() > score) {
399*61046927SAndroid Build Coastguard Worker to_spill = var;
400*61046927SAndroid Build Coastguard Worker score = ctx.ssa_infos[var.id()].score();
401*61046927SAndroid Build Coastguard Worker }
402*61046927SAndroid Build Coastguard Worker }
403*61046927SAndroid Build Coastguard Worker assert(score != 0.0);
404*61046927SAndroid Build Coastguard Worker ctx.add_to_spills(to_spill, ctx.spills_entry[block_idx]);
405*61046927SAndroid Build Coastguard Worker spilled_registers += to_spill;
406*61046927SAndroid Build Coastguard Worker reg_pressure -= to_spill;
407*61046927SAndroid Build Coastguard Worker }
408*61046927SAndroid Build Coastguard Worker
409*61046927SAndroid Build Coastguard Worker return spilled_registers;
410*61046927SAndroid Build Coastguard Worker }
411*61046927SAndroid Build Coastguard Worker
412*61046927SAndroid Build Coastguard Worker /* branch block */
413*61046927SAndroid Build Coastguard Worker if (block->linear_preds.size() == 1 && !(block->kind & block_kind_loop_exit)) {
414*61046927SAndroid Build Coastguard Worker /* keep variables spilled */
415*61046927SAndroid Build Coastguard Worker unsigned pred_idx = block->linear_preds[0];
416*61046927SAndroid Build Coastguard Worker for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
417*61046927SAndroid Build Coastguard Worker if (pair.first.type() != RegType::sgpr)
418*61046927SAndroid Build Coastguard Worker continue;
419*61046927SAndroid Build Coastguard Worker
420*61046927SAndroid Build Coastguard Worker if (live_in.count(pair.first.id())) {
421*61046927SAndroid Build Coastguard Worker spilled_registers += pair.first;
422*61046927SAndroid Build Coastguard Worker ctx.spills_entry[block_idx].emplace(pair);
423*61046927SAndroid Build Coastguard Worker }
424*61046927SAndroid Build Coastguard Worker }
425*61046927SAndroid Build Coastguard Worker
426*61046927SAndroid Build Coastguard Worker if (block->logical_preds.empty())
427*61046927SAndroid Build Coastguard Worker return spilled_registers;
428*61046927SAndroid Build Coastguard Worker
429*61046927SAndroid Build Coastguard Worker pred_idx = block->logical_preds[0];
430*61046927SAndroid Build Coastguard Worker for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
431*61046927SAndroid Build Coastguard Worker if (pair.first.type() != RegType::vgpr)
432*61046927SAndroid Build Coastguard Worker continue;
433*61046927SAndroid Build Coastguard Worker
434*61046927SAndroid Build Coastguard Worker if (live_in.count(pair.first.id())) {
435*61046927SAndroid Build Coastguard Worker spilled_registers += pair.first;
436*61046927SAndroid Build Coastguard Worker ctx.spills_entry[block_idx].emplace(pair);
437*61046927SAndroid Build Coastguard Worker }
438*61046927SAndroid Build Coastguard Worker }
439*61046927SAndroid Build Coastguard Worker
440*61046927SAndroid Build Coastguard Worker return spilled_registers;
441*61046927SAndroid Build Coastguard Worker }
442*61046927SAndroid Build Coastguard Worker
443*61046927SAndroid Build Coastguard Worker /* else: merge block */
444*61046927SAndroid Build Coastguard Worker std::map<Temp, bool> partial_spills;
445*61046927SAndroid Build Coastguard Worker
446*61046927SAndroid Build Coastguard Worker /* keep variables spilled on all incoming paths */
447*61046927SAndroid Build Coastguard Worker for (unsigned t : live_in) {
448*61046927SAndroid Build Coastguard Worker const RegClass rc = ctx.program->temp_rc[t];
449*61046927SAndroid Build Coastguard Worker Temp var = Temp(t, rc);
450*61046927SAndroid Build Coastguard Worker Block::edge_vec& preds = rc.is_linear() ? block->linear_preds : block->logical_preds;
451*61046927SAndroid Build Coastguard Worker
452*61046927SAndroid Build Coastguard Worker /* If it can be rematerialized, keep the variable spilled if all predecessors do not reload
453*61046927SAndroid Build Coastguard Worker * it. Otherwise, if any predecessor reloads it, ensure it's reloaded on all other
454*61046927SAndroid Build Coastguard Worker * predecessors. The idea is that it's better in practice to rematerialize redundantly than to
455*61046927SAndroid Build Coastguard Worker * create lots of phis. */
456*61046927SAndroid Build Coastguard Worker const bool remat = ctx.remat.count(var);
457*61046927SAndroid Build Coastguard Worker /* If the variable is spilled at the current loop-header, spilling is essentially for free
458*61046927SAndroid Build Coastguard Worker * while reloading is not. Thus, keep them spilled if they are at least partially spilled.
459*61046927SAndroid Build Coastguard Worker */
460*61046927SAndroid Build Coastguard Worker const bool avoid_respill = block->loop_nest_depth && ctx.loop.back().spills.count(var);
461*61046927SAndroid Build Coastguard Worker bool spill = true;
462*61046927SAndroid Build Coastguard Worker bool partial_spill = false;
463*61046927SAndroid Build Coastguard Worker uint32_t spill_id = 0;
464*61046927SAndroid Build Coastguard Worker for (unsigned pred_idx : preds) {
465*61046927SAndroid Build Coastguard Worker if (!ctx.spills_exit[pred_idx].count(var)) {
466*61046927SAndroid Build Coastguard Worker spill = false;
467*61046927SAndroid Build Coastguard Worker } else {
468*61046927SAndroid Build Coastguard Worker partial_spill = true;
469*61046927SAndroid Build Coastguard Worker /* it might be that on one incoming path, the variable has a different spill_id, but
470*61046927SAndroid Build Coastguard Worker * add_couple_code() will take care of that. */
471*61046927SAndroid Build Coastguard Worker spill_id = ctx.spills_exit[pred_idx][var];
472*61046927SAndroid Build Coastguard Worker }
473*61046927SAndroid Build Coastguard Worker }
474*61046927SAndroid Build Coastguard Worker spill |= (remat && partial_spill);
475*61046927SAndroid Build Coastguard Worker spill |= (avoid_respill && partial_spill);
476*61046927SAndroid Build Coastguard Worker if (spill) {
477*61046927SAndroid Build Coastguard Worker ctx.spills_entry[block_idx][var] = spill_id;
478*61046927SAndroid Build Coastguard Worker partial_spills.erase(var);
479*61046927SAndroid Build Coastguard Worker spilled_registers += var;
480*61046927SAndroid Build Coastguard Worker } else {
481*61046927SAndroid Build Coastguard Worker partial_spills[var] = partial_spill;
482*61046927SAndroid Build Coastguard Worker }
483*61046927SAndroid Build Coastguard Worker }
484*61046927SAndroid Build Coastguard Worker
485*61046927SAndroid Build Coastguard Worker /* same for phis */
486*61046927SAndroid Build Coastguard Worker for (aco_ptr<Instruction>& phi : block->instructions) {
487*61046927SAndroid Build Coastguard Worker if (!is_phi(phi))
488*61046927SAndroid Build Coastguard Worker break;
489*61046927SAndroid Build Coastguard Worker if (!phi->definitions[0].isTemp() || phi->definitions[0].isKill())
490*61046927SAndroid Build Coastguard Worker continue;
491*61046927SAndroid Build Coastguard Worker
492*61046927SAndroid Build Coastguard Worker Block::edge_vec& preds =
493*61046927SAndroid Build Coastguard Worker phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
494*61046927SAndroid Build Coastguard Worker bool is_all_undef = true;
495*61046927SAndroid Build Coastguard Worker bool is_all_spilled = true;
496*61046927SAndroid Build Coastguard Worker bool is_partial_spill = false;
497*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < phi->operands.size(); i++) {
498*61046927SAndroid Build Coastguard Worker if (phi->operands[i].isUndefined())
499*61046927SAndroid Build Coastguard Worker continue;
500*61046927SAndroid Build Coastguard Worker bool spilled = phi->operands[i].isTemp() &&
501*61046927SAndroid Build Coastguard Worker ctx.spills_exit[preds[i]].count(phi->operands[i].getTemp());
502*61046927SAndroid Build Coastguard Worker is_all_spilled &= spilled;
503*61046927SAndroid Build Coastguard Worker is_partial_spill |= spilled;
504*61046927SAndroid Build Coastguard Worker is_all_undef = false;
505*61046927SAndroid Build Coastguard Worker }
506*61046927SAndroid Build Coastguard Worker
507*61046927SAndroid Build Coastguard Worker if (is_all_spilled && !is_all_undef) {
508*61046927SAndroid Build Coastguard Worker /* The phi is spilled at all predecessors. Keep it spilled. */
509*61046927SAndroid Build Coastguard Worker ctx.add_to_spills(phi->definitions[0].getTemp(), ctx.spills_entry[block_idx]);
510*61046927SAndroid Build Coastguard Worker spilled_registers += phi->definitions[0].getTemp();
511*61046927SAndroid Build Coastguard Worker partial_spills.erase(phi->definitions[0].getTemp());
512*61046927SAndroid Build Coastguard Worker } else {
513*61046927SAndroid Build Coastguard Worker /* Phis might increase the register pressure. */
514*61046927SAndroid Build Coastguard Worker partial_spills[phi->definitions[0].getTemp()] = is_partial_spill;
515*61046927SAndroid Build Coastguard Worker }
516*61046927SAndroid Build Coastguard Worker }
517*61046927SAndroid Build Coastguard Worker
518*61046927SAndroid Build Coastguard Worker /* if reg pressure at first instruction is still too high, add partially spilled variables */
519*61046927SAndroid Build Coastguard Worker RegisterDemand reg_pressure = block->live_in_demand;
520*61046927SAndroid Build Coastguard Worker reg_pressure -= spilled_registers;
521*61046927SAndroid Build Coastguard Worker
522*61046927SAndroid Build Coastguard Worker while (reg_pressure.exceeds(ctx.target_pressure)) {
523*61046927SAndroid Build Coastguard Worker assert(!partial_spills.empty());
524*61046927SAndroid Build Coastguard Worker std::map<Temp, bool>::iterator it = partial_spills.begin();
525*61046927SAndroid Build Coastguard Worker Temp to_spill = Temp();
526*61046927SAndroid Build Coastguard Worker bool is_partial_spill = false;
527*61046927SAndroid Build Coastguard Worker float score = 0.0;
528*61046927SAndroid Build Coastguard Worker RegType type = reg_pressure.vgpr > ctx.target_pressure.vgpr ? RegType::vgpr : RegType::sgpr;
529*61046927SAndroid Build Coastguard Worker
530*61046927SAndroid Build Coastguard Worker while (it != partial_spills.end()) {
531*61046927SAndroid Build Coastguard Worker assert(!ctx.spills_entry[block_idx].count(it->first));
532*61046927SAndroid Build Coastguard Worker
533*61046927SAndroid Build Coastguard Worker if (it->first.type() == type && !it->first.regClass().is_linear_vgpr() &&
534*61046927SAndroid Build Coastguard Worker ((it->second && !is_partial_spill) ||
535*61046927SAndroid Build Coastguard Worker (it->second == is_partial_spill && ctx.ssa_infos[it->first.id()].score() > score))) {
536*61046927SAndroid Build Coastguard Worker score = ctx.ssa_infos[it->first.id()].score();
537*61046927SAndroid Build Coastguard Worker to_spill = it->first;
538*61046927SAndroid Build Coastguard Worker is_partial_spill = it->second;
539*61046927SAndroid Build Coastguard Worker }
540*61046927SAndroid Build Coastguard Worker ++it;
541*61046927SAndroid Build Coastguard Worker }
542*61046927SAndroid Build Coastguard Worker assert(score != 0.0);
543*61046927SAndroid Build Coastguard Worker ctx.add_to_spills(to_spill, ctx.spills_entry[block_idx]);
544*61046927SAndroid Build Coastguard Worker partial_spills.erase(to_spill);
545*61046927SAndroid Build Coastguard Worker spilled_registers += to_spill;
546*61046927SAndroid Build Coastguard Worker reg_pressure -= to_spill;
547*61046927SAndroid Build Coastguard Worker }
548*61046927SAndroid Build Coastguard Worker
549*61046927SAndroid Build Coastguard Worker return spilled_registers;
550*61046927SAndroid Build Coastguard Worker }
551*61046927SAndroid Build Coastguard Worker
552*61046927SAndroid Build Coastguard Worker void
add_coupling_code(spill_ctx & ctx,Block * block,IDSet & live_in)553*61046927SAndroid Build Coastguard Worker add_coupling_code(spill_ctx& ctx, Block* block, IDSet& live_in)
554*61046927SAndroid Build Coastguard Worker {
555*61046927SAndroid Build Coastguard Worker const unsigned block_idx = block->index;
556*61046927SAndroid Build Coastguard Worker /* No coupling code necessary */
557*61046927SAndroid Build Coastguard Worker if (block->linear_preds.size() == 0)
558*61046927SAndroid Build Coastguard Worker return;
559*61046927SAndroid Build Coastguard Worker
560*61046927SAndroid Build Coastguard Worker /* Branch block: update renames */
561*61046927SAndroid Build Coastguard Worker if (block->linear_preds.size() == 1 &&
562*61046927SAndroid Build Coastguard Worker !(block->kind & (block_kind_loop_exit | block_kind_loop_header))) {
563*61046927SAndroid Build Coastguard Worker assert(ctx.processed[block->linear_preds[0]]);
564*61046927SAndroid Build Coastguard Worker
565*61046927SAndroid Build Coastguard Worker ctx.renames[block_idx] = ctx.renames[block->linear_preds[0]];
566*61046927SAndroid Build Coastguard Worker if (!block->logical_preds.empty() && block->logical_preds[0] != block->linear_preds[0]) {
567*61046927SAndroid Build Coastguard Worker for (auto it : ctx.renames[block->logical_preds[0]]) {
568*61046927SAndroid Build Coastguard Worker if (it.first.type() == RegType::vgpr)
569*61046927SAndroid Build Coastguard Worker ctx.renames[block_idx].insert_or_assign(it.first, it.second);
570*61046927SAndroid Build Coastguard Worker }
571*61046927SAndroid Build Coastguard Worker }
572*61046927SAndroid Build Coastguard Worker return;
573*61046927SAndroid Build Coastguard Worker }
574*61046927SAndroid Build Coastguard Worker
575*61046927SAndroid Build Coastguard Worker std::vector<aco_ptr<Instruction>> instructions;
576*61046927SAndroid Build Coastguard Worker
577*61046927SAndroid Build Coastguard Worker /* loop header and merge blocks: check if all (linear) predecessors have been processed */
578*61046927SAndroid Build Coastguard Worker for (ASSERTED unsigned pred : block->linear_preds)
579*61046927SAndroid Build Coastguard Worker assert(ctx.processed[pred]);
580*61046927SAndroid Build Coastguard Worker
581*61046927SAndroid Build Coastguard Worker /* iterate the phi nodes for which operands to spill at the predecessor */
582*61046927SAndroid Build Coastguard Worker for (aco_ptr<Instruction>& phi : block->instructions) {
583*61046927SAndroid Build Coastguard Worker if (!is_phi(phi))
584*61046927SAndroid Build Coastguard Worker break;
585*61046927SAndroid Build Coastguard Worker
586*61046927SAndroid Build Coastguard Worker for (const Operand& op : phi->operands) {
587*61046927SAndroid Build Coastguard Worker if (op.isTemp())
588*61046927SAndroid Build Coastguard Worker ctx.ssa_infos[op.tempId()].num_uses--;
589*61046927SAndroid Build Coastguard Worker }
590*61046927SAndroid Build Coastguard Worker
591*61046927SAndroid Build Coastguard Worker /* The phi is not spilled */
592*61046927SAndroid Build Coastguard Worker if (!phi->definitions[0].isTemp() ||
593*61046927SAndroid Build Coastguard Worker !ctx.spills_entry[block_idx].count(phi->definitions[0].getTemp()))
594*61046927SAndroid Build Coastguard Worker continue;
595*61046927SAndroid Build Coastguard Worker
596*61046927SAndroid Build Coastguard Worker Block::edge_vec& preds =
597*61046927SAndroid Build Coastguard Worker phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
598*61046927SAndroid Build Coastguard Worker uint32_t def_spill_id = ctx.spills_entry[block_idx][phi->definitions[0].getTemp()];
599*61046927SAndroid Build Coastguard Worker phi->definitions[0].setKill(true);
600*61046927SAndroid Build Coastguard Worker
601*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < phi->operands.size(); i++) {
602*61046927SAndroid Build Coastguard Worker if (phi->operands[i].isUndefined())
603*61046927SAndroid Build Coastguard Worker continue;
604*61046927SAndroid Build Coastguard Worker
605*61046927SAndroid Build Coastguard Worker unsigned pred_idx = preds[i];
606*61046927SAndroid Build Coastguard Worker Operand spill_op = phi->operands[i];
607*61046927SAndroid Build Coastguard Worker phi->operands[i] = Operand(phi->definitions[0].regClass());
608*61046927SAndroid Build Coastguard Worker
609*61046927SAndroid Build Coastguard Worker if (spill_op.isTemp()) {
610*61046927SAndroid Build Coastguard Worker assert(spill_op.isKill());
611*61046927SAndroid Build Coastguard Worker Temp var = spill_op.getTemp();
612*61046927SAndroid Build Coastguard Worker
613*61046927SAndroid Build Coastguard Worker std::map<Temp, Temp>::iterator rename_it = ctx.renames[pred_idx].find(var);
614*61046927SAndroid Build Coastguard Worker /* prevent the defining instruction from being DCE'd if it could be rematerialized */
615*61046927SAndroid Build Coastguard Worker if (rename_it == ctx.renames[preds[i]].end() && ctx.remat.count(var))
616*61046927SAndroid Build Coastguard Worker ctx.unused_remats.erase(ctx.remat[var].instr);
617*61046927SAndroid Build Coastguard Worker
618*61046927SAndroid Build Coastguard Worker /* check if variable is already spilled at predecessor */
619*61046927SAndroid Build Coastguard Worker auto spilled = ctx.spills_exit[pred_idx].find(var);
620*61046927SAndroid Build Coastguard Worker if (spilled != ctx.spills_exit[pred_idx].end()) {
621*61046927SAndroid Build Coastguard Worker if (spilled->second != def_spill_id)
622*61046927SAndroid Build Coastguard Worker ctx.add_affinity(def_spill_id, spilled->second);
623*61046927SAndroid Build Coastguard Worker continue;
624*61046927SAndroid Build Coastguard Worker }
625*61046927SAndroid Build Coastguard Worker
626*61046927SAndroid Build Coastguard Worker /* If the phi operand has the same name as the definition,
627*61046927SAndroid Build Coastguard Worker * add to predecessor's spilled variables, so that it gets
628*61046927SAndroid Build Coastguard Worker * skipped in the loop below.
629*61046927SAndroid Build Coastguard Worker */
630*61046927SAndroid Build Coastguard Worker if (var == phi->definitions[0].getTemp())
631*61046927SAndroid Build Coastguard Worker ctx.spills_exit[pred_idx][var] = def_spill_id;
632*61046927SAndroid Build Coastguard Worker
633*61046927SAndroid Build Coastguard Worker /* rename if necessary */
634*61046927SAndroid Build Coastguard Worker if (rename_it != ctx.renames[pred_idx].end()) {
635*61046927SAndroid Build Coastguard Worker spill_op.setTemp(rename_it->second);
636*61046927SAndroid Build Coastguard Worker ctx.renames[pred_idx].erase(rename_it);
637*61046927SAndroid Build Coastguard Worker }
638*61046927SAndroid Build Coastguard Worker }
639*61046927SAndroid Build Coastguard Worker
640*61046927SAndroid Build Coastguard Worker /* add interferences */
641*61046927SAndroid Build Coastguard Worker for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx])
642*61046927SAndroid Build Coastguard Worker ctx.add_interference(def_spill_id, pair.second);
643*61046927SAndroid Build Coastguard Worker
644*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction> spill{create_instruction(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
645*61046927SAndroid Build Coastguard Worker spill->operands[0] = spill_op;
646*61046927SAndroid Build Coastguard Worker spill->operands[1] = Operand::c32(def_spill_id);
647*61046927SAndroid Build Coastguard Worker Block& pred = ctx.program->blocks[pred_idx];
648*61046927SAndroid Build Coastguard Worker unsigned idx = pred.instructions.size();
649*61046927SAndroid Build Coastguard Worker do {
650*61046927SAndroid Build Coastguard Worker assert(idx != 0);
651*61046927SAndroid Build Coastguard Worker idx--;
652*61046927SAndroid Build Coastguard Worker } while (phi->opcode == aco_opcode::p_phi &&
653*61046927SAndroid Build Coastguard Worker pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
654*61046927SAndroid Build Coastguard Worker std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
655*61046927SAndroid Build Coastguard Worker pred.instructions.insert(it, std::move(spill));
656*61046927SAndroid Build Coastguard Worker }
657*61046927SAndroid Build Coastguard Worker }
658*61046927SAndroid Build Coastguard Worker
659*61046927SAndroid Build Coastguard Worker /* iterate all (other) spilled variables for which to spill at the predecessor */
660*61046927SAndroid Build Coastguard Worker // TODO: would be better to have them sorted: first vgprs and first with longest distance
661*61046927SAndroid Build Coastguard Worker for (std::pair<Temp, uint32_t> pair : ctx.spills_entry[block_idx]) {
662*61046927SAndroid Build Coastguard Worker /* if variable is not live-in, it must be from a phi: this works because of CSSA form */
663*61046927SAndroid Build Coastguard Worker if (!live_in.count(pair.first.id()))
664*61046927SAndroid Build Coastguard Worker continue;
665*61046927SAndroid Build Coastguard Worker
666*61046927SAndroid Build Coastguard Worker Block::edge_vec& preds = pair.first.is_linear() ? block->linear_preds : block->logical_preds;
667*61046927SAndroid Build Coastguard Worker for (unsigned pred_idx : preds) {
668*61046927SAndroid Build Coastguard Worker /* variable is already spilled at predecessor */
669*61046927SAndroid Build Coastguard Worker auto spilled = ctx.spills_exit[pred_idx].find(pair.first);
670*61046927SAndroid Build Coastguard Worker if (spilled != ctx.spills_exit[pred_idx].end()) {
671*61046927SAndroid Build Coastguard Worker if (spilled->second != pair.second)
672*61046927SAndroid Build Coastguard Worker ctx.add_affinity(pair.second, spilled->second);
673*61046927SAndroid Build Coastguard Worker continue;
674*61046927SAndroid Build Coastguard Worker }
675*61046927SAndroid Build Coastguard Worker
676*61046927SAndroid Build Coastguard Worker /* If this variable is spilled through the entire loop, no need to re-spill.
677*61046927SAndroid Build Coastguard Worker * It can be reloaded from the same spill-slot it got at the loop-preheader.
678*61046927SAndroid Build Coastguard Worker * No need to add interferences since every spilled variable in the loop already
679*61046927SAndroid Build Coastguard Worker * interferes with the spilled loop-variables. Make sure that the spill_ids match.
680*61046927SAndroid Build Coastguard Worker */
681*61046927SAndroid Build Coastguard Worker const uint32_t loop_nest_depth = std::min(ctx.program->blocks[pred_idx].loop_nest_depth,
682*61046927SAndroid Build Coastguard Worker ctx.program->blocks[block_idx].loop_nest_depth);
683*61046927SAndroid Build Coastguard Worker if (loop_nest_depth) {
684*61046927SAndroid Build Coastguard Worker auto spill = ctx.loop[loop_nest_depth - 1].spills.find(pair.first);
685*61046927SAndroid Build Coastguard Worker if (spill != ctx.loop[loop_nest_depth - 1].spills.end() && spill->second == pair.second)
686*61046927SAndroid Build Coastguard Worker continue;
687*61046927SAndroid Build Coastguard Worker }
688*61046927SAndroid Build Coastguard Worker
689*61046927SAndroid Build Coastguard Worker /* add interferences between spilled variable and predecessors exit spills */
690*61046927SAndroid Build Coastguard Worker for (std::pair<Temp, uint32_t> exit_spill : ctx.spills_exit[pred_idx])
691*61046927SAndroid Build Coastguard Worker ctx.add_interference(exit_spill.second, pair.second);
692*61046927SAndroid Build Coastguard Worker
693*61046927SAndroid Build Coastguard Worker /* variable is in register at predecessor and has to be spilled */
694*61046927SAndroid Build Coastguard Worker /* rename if necessary */
695*61046927SAndroid Build Coastguard Worker Temp var = pair.first;
696*61046927SAndroid Build Coastguard Worker std::map<Temp, Temp>::iterator rename_it = ctx.renames[pred_idx].find(var);
697*61046927SAndroid Build Coastguard Worker if (rename_it != ctx.renames[pred_idx].end()) {
698*61046927SAndroid Build Coastguard Worker var = rename_it->second;
699*61046927SAndroid Build Coastguard Worker ctx.renames[pred_idx].erase(rename_it);
700*61046927SAndroid Build Coastguard Worker }
701*61046927SAndroid Build Coastguard Worker
702*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction> spill{create_instruction(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
703*61046927SAndroid Build Coastguard Worker spill->operands[0] = Operand(var);
704*61046927SAndroid Build Coastguard Worker spill->operands[1] = Operand::c32(pair.second);
705*61046927SAndroid Build Coastguard Worker Block& pred = ctx.program->blocks[pred_idx];
706*61046927SAndroid Build Coastguard Worker unsigned idx = pred.instructions.size();
707*61046927SAndroid Build Coastguard Worker do {
708*61046927SAndroid Build Coastguard Worker assert(idx != 0);
709*61046927SAndroid Build Coastguard Worker idx--;
710*61046927SAndroid Build Coastguard Worker } while (pair.first.type() == RegType::vgpr &&
711*61046927SAndroid Build Coastguard Worker pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
712*61046927SAndroid Build Coastguard Worker std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
713*61046927SAndroid Build Coastguard Worker pred.instructions.insert(it, std::move(spill));
714*61046927SAndroid Build Coastguard Worker }
715*61046927SAndroid Build Coastguard Worker }
716*61046927SAndroid Build Coastguard Worker
717*61046927SAndroid Build Coastguard Worker /* iterate phis for which operands to reload */
718*61046927SAndroid Build Coastguard Worker for (aco_ptr<Instruction>& phi : block->instructions) {
719*61046927SAndroid Build Coastguard Worker if (!is_phi(phi))
720*61046927SAndroid Build Coastguard Worker break;
721*61046927SAndroid Build Coastguard Worker if (phi->definitions[0].isKill())
722*61046927SAndroid Build Coastguard Worker continue;
723*61046927SAndroid Build Coastguard Worker
724*61046927SAndroid Build Coastguard Worker assert(!phi->definitions[0].isTemp() ||
725*61046927SAndroid Build Coastguard Worker !ctx.spills_entry[block_idx].count(phi->definitions[0].getTemp()));
726*61046927SAndroid Build Coastguard Worker
727*61046927SAndroid Build Coastguard Worker Block::edge_vec& preds =
728*61046927SAndroid Build Coastguard Worker phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
729*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < phi->operands.size(); i++) {
730*61046927SAndroid Build Coastguard Worker if (!phi->operands[i].isTemp())
731*61046927SAndroid Build Coastguard Worker continue;
732*61046927SAndroid Build Coastguard Worker unsigned pred_idx = preds[i];
733*61046927SAndroid Build Coastguard Worker
734*61046927SAndroid Build Coastguard Worker /* if the operand was reloaded, rename */
735*61046927SAndroid Build Coastguard Worker if (!ctx.spills_exit[pred_idx].count(phi->operands[i].getTemp())) {
736*61046927SAndroid Build Coastguard Worker std::map<Temp, Temp>::iterator it =
737*61046927SAndroid Build Coastguard Worker ctx.renames[pred_idx].find(phi->operands[i].getTemp());
738*61046927SAndroid Build Coastguard Worker if (it != ctx.renames[pred_idx].end()) {
739*61046927SAndroid Build Coastguard Worker phi->operands[i].setTemp(it->second);
740*61046927SAndroid Build Coastguard Worker /* prevent the defining instruction from being DCE'd if it could be rematerialized */
741*61046927SAndroid Build Coastguard Worker } else {
742*61046927SAndroid Build Coastguard Worker auto remat_it = ctx.remat.find(phi->operands[i].getTemp());
743*61046927SAndroid Build Coastguard Worker if (remat_it != ctx.remat.end()) {
744*61046927SAndroid Build Coastguard Worker ctx.unused_remats.erase(remat_it->second.instr);
745*61046927SAndroid Build Coastguard Worker }
746*61046927SAndroid Build Coastguard Worker }
747*61046927SAndroid Build Coastguard Worker continue;
748*61046927SAndroid Build Coastguard Worker }
749*61046927SAndroid Build Coastguard Worker
750*61046927SAndroid Build Coastguard Worker Temp tmp = phi->operands[i].getTemp();
751*61046927SAndroid Build Coastguard Worker
752*61046927SAndroid Build Coastguard Worker /* reload phi operand at end of predecessor block */
753*61046927SAndroid Build Coastguard Worker Temp new_name = ctx.program->allocateTmp(tmp.regClass());
754*61046927SAndroid Build Coastguard Worker Block& pred = ctx.program->blocks[pred_idx];
755*61046927SAndroid Build Coastguard Worker unsigned idx = pred.instructions.size();
756*61046927SAndroid Build Coastguard Worker do {
757*61046927SAndroid Build Coastguard Worker assert(idx != 0);
758*61046927SAndroid Build Coastguard Worker idx--;
759*61046927SAndroid Build Coastguard Worker } while (phi->opcode == aco_opcode::p_phi &&
760*61046927SAndroid Build Coastguard Worker pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
761*61046927SAndroid Build Coastguard Worker std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
762*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction> reload =
763*61046927SAndroid Build Coastguard Worker do_reload(ctx, tmp, new_name, ctx.spills_exit[pred_idx][tmp]);
764*61046927SAndroid Build Coastguard Worker
765*61046927SAndroid Build Coastguard Worker /* reload spilled exec mask directly to exec */
766*61046927SAndroid Build Coastguard Worker if (!phi->definitions[0].isTemp()) {
767*61046927SAndroid Build Coastguard Worker assert(phi->definitions[0].isFixed() && phi->definitions[0].physReg() == exec);
768*61046927SAndroid Build Coastguard Worker reload->definitions[0] = phi->definitions[0];
769*61046927SAndroid Build Coastguard Worker phi->operands[i] = Operand(exec, ctx.program->lane_mask);
770*61046927SAndroid Build Coastguard Worker } else {
771*61046927SAndroid Build Coastguard Worker ctx.spills_exit[pred_idx].erase(tmp);
772*61046927SAndroid Build Coastguard Worker ctx.renames[pred_idx][tmp] = new_name;
773*61046927SAndroid Build Coastguard Worker phi->operands[i].setTemp(new_name);
774*61046927SAndroid Build Coastguard Worker }
775*61046927SAndroid Build Coastguard Worker
776*61046927SAndroid Build Coastguard Worker pred.instructions.insert(it, std::move(reload));
777*61046927SAndroid Build Coastguard Worker }
778*61046927SAndroid Build Coastguard Worker }
779*61046927SAndroid Build Coastguard Worker
780*61046927SAndroid Build Coastguard Worker /* iterate live variables for which to reload */
781*61046927SAndroid Build Coastguard Worker for (unsigned t : live_in) {
782*61046927SAndroid Build Coastguard Worker const RegClass rc = ctx.program->temp_rc[t];
783*61046927SAndroid Build Coastguard Worker Temp var = Temp(t, rc);
784*61046927SAndroid Build Coastguard Worker
785*61046927SAndroid Build Coastguard Worker /* skip spilled variables */
786*61046927SAndroid Build Coastguard Worker if (ctx.spills_entry[block_idx].count(var))
787*61046927SAndroid Build Coastguard Worker continue;
788*61046927SAndroid Build Coastguard Worker
789*61046927SAndroid Build Coastguard Worker Block::edge_vec& preds = rc.is_linear() ? block->linear_preds : block->logical_preds;
790*61046927SAndroid Build Coastguard Worker for (unsigned pred_idx : preds) {
791*61046927SAndroid Build Coastguard Worker /* skip if the variable is not spilled at the predecessor */
792*61046927SAndroid Build Coastguard Worker if (!ctx.spills_exit[pred_idx].count(var))
793*61046927SAndroid Build Coastguard Worker continue;
794*61046927SAndroid Build Coastguard Worker
795*61046927SAndroid Build Coastguard Worker /* variable is spilled at predecessor and has to be reloaded */
796*61046927SAndroid Build Coastguard Worker Temp new_name = ctx.program->allocateTmp(rc);
797*61046927SAndroid Build Coastguard Worker Block& pred = ctx.program->blocks[pred_idx];
798*61046927SAndroid Build Coastguard Worker unsigned idx = pred.instructions.size();
799*61046927SAndroid Build Coastguard Worker do {
800*61046927SAndroid Build Coastguard Worker assert(idx != 0);
801*61046927SAndroid Build Coastguard Worker idx--;
802*61046927SAndroid Build Coastguard Worker } while (rc.type() == RegType::vgpr &&
803*61046927SAndroid Build Coastguard Worker pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
804*61046927SAndroid Build Coastguard Worker std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
805*61046927SAndroid Build Coastguard Worker
806*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction> reload =
807*61046927SAndroid Build Coastguard Worker do_reload(ctx, var, new_name, ctx.spills_exit[pred.index][var]);
808*61046927SAndroid Build Coastguard Worker pred.instructions.insert(it, std::move(reload));
809*61046927SAndroid Build Coastguard Worker
810*61046927SAndroid Build Coastguard Worker ctx.spills_exit[pred.index].erase(var);
811*61046927SAndroid Build Coastguard Worker ctx.renames[pred.index][var] = new_name;
812*61046927SAndroid Build Coastguard Worker }
813*61046927SAndroid Build Coastguard Worker
814*61046927SAndroid Build Coastguard Worker /* check if we have to create a new phi for this variable */
815*61046927SAndroid Build Coastguard Worker Temp rename = Temp();
816*61046927SAndroid Build Coastguard Worker bool is_same = true;
817*61046927SAndroid Build Coastguard Worker for (unsigned pred_idx : preds) {
818*61046927SAndroid Build Coastguard Worker if (!ctx.renames[pred_idx].count(var)) {
819*61046927SAndroid Build Coastguard Worker if (rename == Temp())
820*61046927SAndroid Build Coastguard Worker rename = var;
821*61046927SAndroid Build Coastguard Worker else
822*61046927SAndroid Build Coastguard Worker is_same = rename == var;
823*61046927SAndroid Build Coastguard Worker } else {
824*61046927SAndroid Build Coastguard Worker if (rename == Temp())
825*61046927SAndroid Build Coastguard Worker rename = ctx.renames[pred_idx][var];
826*61046927SAndroid Build Coastguard Worker else
827*61046927SAndroid Build Coastguard Worker is_same = rename == ctx.renames[pred_idx][var];
828*61046927SAndroid Build Coastguard Worker }
829*61046927SAndroid Build Coastguard Worker
830*61046927SAndroid Build Coastguard Worker if (!is_same)
831*61046927SAndroid Build Coastguard Worker break;
832*61046927SAndroid Build Coastguard Worker }
833*61046927SAndroid Build Coastguard Worker
834*61046927SAndroid Build Coastguard Worker if (!is_same) {
835*61046927SAndroid Build Coastguard Worker /* the variable was renamed differently in the predecessors: we have to create a phi */
836*61046927SAndroid Build Coastguard Worker aco_opcode opcode = rc.is_linear() ? aco_opcode::p_linear_phi : aco_opcode::p_phi;
837*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction> phi{create_instruction(opcode, Format::PSEUDO, preds.size(), 1)};
838*61046927SAndroid Build Coastguard Worker rename = ctx.program->allocateTmp(rc);
839*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < phi->operands.size(); i++) {
840*61046927SAndroid Build Coastguard Worker Temp tmp;
841*61046927SAndroid Build Coastguard Worker if (ctx.renames[preds[i]].count(var)) {
842*61046927SAndroid Build Coastguard Worker tmp = ctx.renames[preds[i]][var];
843*61046927SAndroid Build Coastguard Worker } else if (preds[i] >= block_idx) {
844*61046927SAndroid Build Coastguard Worker tmp = rename;
845*61046927SAndroid Build Coastguard Worker } else {
846*61046927SAndroid Build Coastguard Worker tmp = var;
847*61046927SAndroid Build Coastguard Worker /* prevent the defining instruction from being DCE'd if it could be rematerialized */
848*61046927SAndroid Build Coastguard Worker if (ctx.remat.count(tmp))
849*61046927SAndroid Build Coastguard Worker ctx.unused_remats.erase(ctx.remat[tmp].instr);
850*61046927SAndroid Build Coastguard Worker }
851*61046927SAndroid Build Coastguard Worker phi->operands[i] = Operand(tmp);
852*61046927SAndroid Build Coastguard Worker }
853*61046927SAndroid Build Coastguard Worker phi->definitions[0] = Definition(rename);
854*61046927SAndroid Build Coastguard Worker phi->register_demand = block->live_in_demand;
855*61046927SAndroid Build Coastguard Worker block->instructions.insert(block->instructions.begin(), std::move(phi));
856*61046927SAndroid Build Coastguard Worker }
857*61046927SAndroid Build Coastguard Worker
858*61046927SAndroid Build Coastguard Worker /* the variable was renamed: add new name to renames */
859*61046927SAndroid Build Coastguard Worker if (!(rename == Temp() || rename == var))
860*61046927SAndroid Build Coastguard Worker ctx.renames[block_idx][var] = rename;
861*61046927SAndroid Build Coastguard Worker }
862*61046927SAndroid Build Coastguard Worker }
863*61046927SAndroid Build Coastguard Worker
864*61046927SAndroid Build Coastguard Worker void
process_block(spill_ctx & ctx,unsigned block_idx,Block * block,RegisterDemand spilled_registers)865*61046927SAndroid Build Coastguard Worker process_block(spill_ctx& ctx, unsigned block_idx, Block* block, RegisterDemand spilled_registers)
866*61046927SAndroid Build Coastguard Worker {
867*61046927SAndroid Build Coastguard Worker assert(!ctx.processed[block_idx]);
868*61046927SAndroid Build Coastguard Worker
869*61046927SAndroid Build Coastguard Worker std::vector<aco_ptr<Instruction>> instructions;
870*61046927SAndroid Build Coastguard Worker unsigned idx = 0;
871*61046927SAndroid Build Coastguard Worker
872*61046927SAndroid Build Coastguard Worker /* phis are handled separately */
873*61046927SAndroid Build Coastguard Worker while (block->instructions[idx]->opcode == aco_opcode::p_phi ||
874*61046927SAndroid Build Coastguard Worker block->instructions[idx]->opcode == aco_opcode::p_linear_phi) {
875*61046927SAndroid Build Coastguard Worker const Definition def = block->instructions[idx]->definitions[0];
876*61046927SAndroid Build Coastguard Worker if (def.isTemp() && !def.isKill() && def.tempId() < ctx.ssa_infos.size())
877*61046927SAndroid Build Coastguard Worker ctx.program->live.live_in[block_idx].insert(def.tempId());
878*61046927SAndroid Build Coastguard Worker instructions.emplace_back(std::move(block->instructions[idx++]));
879*61046927SAndroid Build Coastguard Worker }
880*61046927SAndroid Build Coastguard Worker
881*61046927SAndroid Build Coastguard Worker auto& current_spills = ctx.spills_exit[block_idx];
882*61046927SAndroid Build Coastguard Worker
883*61046927SAndroid Build Coastguard Worker while (idx < block->instructions.size()) {
884*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>& instr = block->instructions[idx];
885*61046927SAndroid Build Coastguard Worker
886*61046927SAndroid Build Coastguard Worker /* Spilling is handled as part of phis (they should always have the same or higher register
887*61046927SAndroid Build Coastguard Worker * demand). If we try to spill here, we might not be able to reduce the register demand enough
888*61046927SAndroid Build Coastguard Worker * because there is no path to spill constant/undef phi operands. */
889*61046927SAndroid Build Coastguard Worker if (instr->opcode == aco_opcode::p_branch) {
890*61046927SAndroid Build Coastguard Worker instructions.emplace_back(std::move(instr));
891*61046927SAndroid Build Coastguard Worker idx++;
892*61046927SAndroid Build Coastguard Worker continue;
893*61046927SAndroid Build Coastguard Worker }
894*61046927SAndroid Build Coastguard Worker
895*61046927SAndroid Build Coastguard Worker std::map<Temp, std::pair<Temp, uint32_t>> reloads;
896*61046927SAndroid Build Coastguard Worker
897*61046927SAndroid Build Coastguard Worker /* rename and reload operands */
898*61046927SAndroid Build Coastguard Worker for (Operand& op : instr->operands) {
899*61046927SAndroid Build Coastguard Worker if (!op.isTemp())
900*61046927SAndroid Build Coastguard Worker continue;
901*61046927SAndroid Build Coastguard Worker
902*61046927SAndroid Build Coastguard Worker if (op.isFirstKill())
903*61046927SAndroid Build Coastguard Worker ctx.program->live.live_in[block_idx].erase(op.tempId());
904*61046927SAndroid Build Coastguard Worker ctx.ssa_infos[op.tempId()].num_uses--;
905*61046927SAndroid Build Coastguard Worker
906*61046927SAndroid Build Coastguard Worker if (!current_spills.count(op.getTemp()))
907*61046927SAndroid Build Coastguard Worker continue;
908*61046927SAndroid Build Coastguard Worker
909*61046927SAndroid Build Coastguard Worker /* the Operand is spilled: add it to reloads */
910*61046927SAndroid Build Coastguard Worker Temp new_tmp = ctx.program->allocateTmp(op.regClass());
911*61046927SAndroid Build Coastguard Worker ctx.renames[block_idx][op.getTemp()] = new_tmp;
912*61046927SAndroid Build Coastguard Worker reloads[new_tmp] = std::make_pair(op.getTemp(), current_spills[op.getTemp()]);
913*61046927SAndroid Build Coastguard Worker current_spills.erase(op.getTemp());
914*61046927SAndroid Build Coastguard Worker spilled_registers -= new_tmp;
915*61046927SAndroid Build Coastguard Worker }
916*61046927SAndroid Build Coastguard Worker
917*61046927SAndroid Build Coastguard Worker /* check if register demand is low enough during and after the current instruction */
918*61046927SAndroid Build Coastguard Worker if (block->register_demand.exceeds(ctx.target_pressure)) {
919*61046927SAndroid Build Coastguard Worker RegisterDemand new_demand = instr->register_demand;
920*61046927SAndroid Build Coastguard Worker
921*61046927SAndroid Build Coastguard Worker /* if reg pressure is too high, spill variable with furthest next use */
922*61046927SAndroid Build Coastguard Worker while ((new_demand - spilled_registers).exceeds(ctx.target_pressure)) {
923*61046927SAndroid Build Coastguard Worker float score = 0.0;
924*61046927SAndroid Build Coastguard Worker Temp to_spill;
925*61046927SAndroid Build Coastguard Worker unsigned do_rematerialize = 0;
926*61046927SAndroid Build Coastguard Worker unsigned avoid_respill = 0;
927*61046927SAndroid Build Coastguard Worker RegType type = RegType::sgpr;
928*61046927SAndroid Build Coastguard Worker if (new_demand.vgpr - spilled_registers.vgpr > ctx.target_pressure.vgpr)
929*61046927SAndroid Build Coastguard Worker type = RegType::vgpr;
930*61046927SAndroid Build Coastguard Worker
931*61046927SAndroid Build Coastguard Worker for (unsigned t : ctx.program->live.live_in[block_idx]) {
932*61046927SAndroid Build Coastguard Worker RegClass rc = ctx.program->temp_rc[t];
933*61046927SAndroid Build Coastguard Worker Temp var = Temp(t, rc);
934*61046927SAndroid Build Coastguard Worker if (rc.type() != type || current_spills.count(var) || rc.is_linear_vgpr())
935*61046927SAndroid Build Coastguard Worker continue;
936*61046927SAndroid Build Coastguard Worker
937*61046927SAndroid Build Coastguard Worker unsigned can_rematerialize = ctx.remat.count(var);
938*61046927SAndroid Build Coastguard Worker unsigned loop_variable = block->loop_nest_depth && ctx.loop.back().spills.count(var);
939*61046927SAndroid Build Coastguard Worker if (avoid_respill > loop_variable || do_rematerialize > can_rematerialize)
940*61046927SAndroid Build Coastguard Worker continue;
941*61046927SAndroid Build Coastguard Worker
942*61046927SAndroid Build Coastguard Worker if (can_rematerialize > do_rematerialize || loop_variable > avoid_respill ||
943*61046927SAndroid Build Coastguard Worker ctx.ssa_infos[t].score() > score) {
944*61046927SAndroid Build Coastguard Worker /* Don't spill operands */
945*61046927SAndroid Build Coastguard Worker if (std::any_of(instr->operands.begin(), instr->operands.end(),
946*61046927SAndroid Build Coastguard Worker [&](Operand& op) { return op.isTemp() && op.getTemp() == var; }))
947*61046927SAndroid Build Coastguard Worker continue;
948*61046927SAndroid Build Coastguard Worker
949*61046927SAndroid Build Coastguard Worker to_spill = var;
950*61046927SAndroid Build Coastguard Worker score = ctx.ssa_infos[t].score();
951*61046927SAndroid Build Coastguard Worker do_rematerialize = can_rematerialize;
952*61046927SAndroid Build Coastguard Worker avoid_respill = loop_variable;
953*61046927SAndroid Build Coastguard Worker }
954*61046927SAndroid Build Coastguard Worker }
955*61046927SAndroid Build Coastguard Worker assert(score != 0.0);
956*61046927SAndroid Build Coastguard Worker
957*61046927SAndroid Build Coastguard Worker if (avoid_respill) {
958*61046927SAndroid Build Coastguard Worker /* This variable is spilled at the loop-header of the current loop.
959*61046927SAndroid Build Coastguard Worker * Re-use the spill-slot in order to avoid an extra store.
960*61046927SAndroid Build Coastguard Worker */
961*61046927SAndroid Build Coastguard Worker current_spills[to_spill] = ctx.loop.back().spills[to_spill];
962*61046927SAndroid Build Coastguard Worker spilled_registers += to_spill;
963*61046927SAndroid Build Coastguard Worker continue;
964*61046927SAndroid Build Coastguard Worker }
965*61046927SAndroid Build Coastguard Worker
966*61046927SAndroid Build Coastguard Worker uint32_t spill_id = ctx.add_to_spills(to_spill, current_spills);
967*61046927SAndroid Build Coastguard Worker /* add interferences with reloads */
968*61046927SAndroid Build Coastguard Worker for (std::pair<const Temp, std::pair<Temp, uint32_t>>& pair : reloads)
969*61046927SAndroid Build Coastguard Worker ctx.add_interference(spill_id, pair.second.second);
970*61046927SAndroid Build Coastguard Worker
971*61046927SAndroid Build Coastguard Worker spilled_registers += to_spill;
972*61046927SAndroid Build Coastguard Worker
973*61046927SAndroid Build Coastguard Worker /* rename if necessary */
974*61046927SAndroid Build Coastguard Worker if (ctx.renames[block_idx].count(to_spill)) {
975*61046927SAndroid Build Coastguard Worker to_spill = ctx.renames[block_idx][to_spill];
976*61046927SAndroid Build Coastguard Worker }
977*61046927SAndroid Build Coastguard Worker
978*61046927SAndroid Build Coastguard Worker /* add spill to new instructions */
979*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction> spill{
980*61046927SAndroid Build Coastguard Worker create_instruction(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
981*61046927SAndroid Build Coastguard Worker spill->operands[0] = Operand(to_spill);
982*61046927SAndroid Build Coastguard Worker spill->operands[1] = Operand::c32(spill_id);
983*61046927SAndroid Build Coastguard Worker instructions.emplace_back(std::move(spill));
984*61046927SAndroid Build Coastguard Worker }
985*61046927SAndroid Build Coastguard Worker }
986*61046927SAndroid Build Coastguard Worker
987*61046927SAndroid Build Coastguard Worker for (const Definition& def : instr->definitions) {
988*61046927SAndroid Build Coastguard Worker if (def.isTemp() && !def.isKill())
989*61046927SAndroid Build Coastguard Worker ctx.program->live.live_in[block_idx].insert(def.tempId());
990*61046927SAndroid Build Coastguard Worker }
991*61046927SAndroid Build Coastguard Worker /* rename operands */
992*61046927SAndroid Build Coastguard Worker for (Operand& op : instr->operands) {
993*61046927SAndroid Build Coastguard Worker if (op.isTemp()) {
994*61046927SAndroid Build Coastguard Worker auto rename_it = ctx.renames[block_idx].find(op.getTemp());
995*61046927SAndroid Build Coastguard Worker if (rename_it != ctx.renames[block_idx].end()) {
996*61046927SAndroid Build Coastguard Worker op.setTemp(rename_it->second);
997*61046927SAndroid Build Coastguard Worker } else {
998*61046927SAndroid Build Coastguard Worker /* prevent its defining instruction from being DCE'd if it could be rematerialized */
999*61046927SAndroid Build Coastguard Worker auto remat_it = ctx.remat.find(op.getTemp());
1000*61046927SAndroid Build Coastguard Worker if (remat_it != ctx.remat.end()) {
1001*61046927SAndroid Build Coastguard Worker ctx.unused_remats.erase(remat_it->second.instr);
1002*61046927SAndroid Build Coastguard Worker }
1003*61046927SAndroid Build Coastguard Worker }
1004*61046927SAndroid Build Coastguard Worker }
1005*61046927SAndroid Build Coastguard Worker }
1006*61046927SAndroid Build Coastguard Worker
1007*61046927SAndroid Build Coastguard Worker /* add reloads and instruction to new instructions */
1008*61046927SAndroid Build Coastguard Worker for (std::pair<const Temp, std::pair<Temp, uint32_t>>& pair : reloads) {
1009*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction> reload =
1010*61046927SAndroid Build Coastguard Worker do_reload(ctx, pair.second.first, pair.first, pair.second.second);
1011*61046927SAndroid Build Coastguard Worker instructions.emplace_back(std::move(reload));
1012*61046927SAndroid Build Coastguard Worker }
1013*61046927SAndroid Build Coastguard Worker instructions.emplace_back(std::move(instr));
1014*61046927SAndroid Build Coastguard Worker idx++;
1015*61046927SAndroid Build Coastguard Worker }
1016*61046927SAndroid Build Coastguard Worker
1017*61046927SAndroid Build Coastguard Worker block->instructions = std::move(instructions);
1018*61046927SAndroid Build Coastguard Worker }
1019*61046927SAndroid Build Coastguard Worker
1020*61046927SAndroid Build Coastguard Worker void
spill_block(spill_ctx & ctx,unsigned block_idx)1021*61046927SAndroid Build Coastguard Worker spill_block(spill_ctx& ctx, unsigned block_idx)
1022*61046927SAndroid Build Coastguard Worker {
1023*61046927SAndroid Build Coastguard Worker Block* block = &ctx.program->blocks[block_idx];
1024*61046927SAndroid Build Coastguard Worker
1025*61046927SAndroid Build Coastguard Worker /* determine set of variables which are spilled at the beginning of the block */
1026*61046927SAndroid Build Coastguard Worker RegisterDemand spilled_registers = init_live_in_vars(ctx, block, block_idx);
1027*61046927SAndroid Build Coastguard Worker
1028*61046927SAndroid Build Coastguard Worker if (!(block->kind & block_kind_loop_header)) {
1029*61046927SAndroid Build Coastguard Worker /* add spill/reload code on incoming control flow edges */
1030*61046927SAndroid Build Coastguard Worker add_coupling_code(ctx, block, ctx.program->live.live_in[block_idx]);
1031*61046927SAndroid Build Coastguard Worker }
1032*61046927SAndroid Build Coastguard Worker
1033*61046927SAndroid Build Coastguard Worker assert(ctx.spills_exit[block_idx].empty());
1034*61046927SAndroid Build Coastguard Worker ctx.spills_exit[block_idx] = ctx.spills_entry[block_idx];
1035*61046927SAndroid Build Coastguard Worker process_block(ctx, block_idx, block, spilled_registers);
1036*61046927SAndroid Build Coastguard Worker
1037*61046927SAndroid Build Coastguard Worker ctx.processed[block_idx] = true;
1038*61046927SAndroid Build Coastguard Worker
1039*61046927SAndroid Build Coastguard Worker /* check if the next block leaves the current loop */
1040*61046927SAndroid Build Coastguard Worker if (block->loop_nest_depth == 0 ||
1041*61046927SAndroid Build Coastguard Worker ctx.program->blocks[block_idx + 1].loop_nest_depth >= block->loop_nest_depth)
1042*61046927SAndroid Build Coastguard Worker return;
1043*61046927SAndroid Build Coastguard Worker
1044*61046927SAndroid Build Coastguard Worker uint32_t loop_header_idx = ctx.loop.back().index;
1045*61046927SAndroid Build Coastguard Worker
1046*61046927SAndroid Build Coastguard Worker /* preserve original renames at end of loop header block */
1047*61046927SAndroid Build Coastguard Worker aco::map<Temp, Temp> renames = std::move(ctx.renames[loop_header_idx]);
1048*61046927SAndroid Build Coastguard Worker
1049*61046927SAndroid Build Coastguard Worker /* add coupling code to all loop header predecessors */
1050*61046927SAndroid Build Coastguard Worker for (unsigned t : ctx.loop.back().live_in)
1051*61046927SAndroid Build Coastguard Worker ctx.ssa_infos[t].num_uses--;
1052*61046927SAndroid Build Coastguard Worker add_coupling_code(ctx, &ctx.program->blocks[loop_header_idx], ctx.loop.back().live_in);
1053*61046927SAndroid Build Coastguard Worker renames.swap(ctx.renames[loop_header_idx]);
1054*61046927SAndroid Build Coastguard Worker
1055*61046927SAndroid Build Coastguard Worker /* remove loop header info from stack */
1056*61046927SAndroid Build Coastguard Worker ctx.loop.pop_back();
1057*61046927SAndroid Build Coastguard Worker if (renames.empty())
1058*61046927SAndroid Build Coastguard Worker return;
1059*61046927SAndroid Build Coastguard Worker
1060*61046927SAndroid Build Coastguard Worker /* Add the new renames to each block */
1061*61046927SAndroid Build Coastguard Worker for (std::pair<Temp, Temp> rename : renames) {
1062*61046927SAndroid Build Coastguard Worker /* If there is already a rename, don't overwrite it. */
1063*61046927SAndroid Build Coastguard Worker for (unsigned idx = loop_header_idx; idx <= block_idx; idx++)
1064*61046927SAndroid Build Coastguard Worker ctx.renames[idx].insert(rename);
1065*61046927SAndroid Build Coastguard Worker }
1066*61046927SAndroid Build Coastguard Worker
1067*61046927SAndroid Build Coastguard Worker /* propagate new renames through loop: i.e. repair the SSA */
1068*61046927SAndroid Build Coastguard Worker for (unsigned idx = loop_header_idx; idx <= block_idx; idx++) {
1069*61046927SAndroid Build Coastguard Worker Block& current = ctx.program->blocks[idx];
1070*61046927SAndroid Build Coastguard Worker /* rename all uses in this block */
1071*61046927SAndroid Build Coastguard Worker for (aco_ptr<Instruction>& instr : current.instructions) {
1072*61046927SAndroid Build Coastguard Worker /* no need to rename the loop header phis once again. */
1073*61046927SAndroid Build Coastguard Worker if (idx == loop_header_idx && is_phi(instr))
1074*61046927SAndroid Build Coastguard Worker continue;
1075*61046927SAndroid Build Coastguard Worker
1076*61046927SAndroid Build Coastguard Worker for (Operand& op : instr->operands) {
1077*61046927SAndroid Build Coastguard Worker if (!op.isTemp())
1078*61046927SAndroid Build Coastguard Worker continue;
1079*61046927SAndroid Build Coastguard Worker
1080*61046927SAndroid Build Coastguard Worker auto rename = renames.find(op.getTemp());
1081*61046927SAndroid Build Coastguard Worker if (rename != renames.end())
1082*61046927SAndroid Build Coastguard Worker op.setTemp(rename->second);
1083*61046927SAndroid Build Coastguard Worker }
1084*61046927SAndroid Build Coastguard Worker }
1085*61046927SAndroid Build Coastguard Worker }
1086*61046927SAndroid Build Coastguard Worker }
1087*61046927SAndroid Build Coastguard Worker
1088*61046927SAndroid Build Coastguard Worker Temp
load_scratch_resource(spill_ctx & ctx,Builder & bld,bool apply_scratch_offset)1089*61046927SAndroid Build Coastguard Worker load_scratch_resource(spill_ctx& ctx, Builder& bld, bool apply_scratch_offset)
1090*61046927SAndroid Build Coastguard Worker {
1091*61046927SAndroid Build Coastguard Worker Temp private_segment_buffer = ctx.program->private_segment_buffer;
1092*61046927SAndroid Build Coastguard Worker if (!private_segment_buffer.bytes()) {
1093*61046927SAndroid Build Coastguard Worker Temp addr_lo =
1094*61046927SAndroid Build Coastguard Worker bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_lo));
1095*61046927SAndroid Build Coastguard Worker Temp addr_hi =
1096*61046927SAndroid Build Coastguard Worker bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
1097*61046927SAndroid Build Coastguard Worker private_segment_buffer =
1098*61046927SAndroid Build Coastguard Worker bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
1099*61046927SAndroid Build Coastguard Worker } else if (ctx.program->stage.hw != AC_HW_COMPUTE_SHADER) {
1100*61046927SAndroid Build Coastguard Worker private_segment_buffer =
1101*61046927SAndroid Build Coastguard Worker bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand::zero());
1102*61046927SAndroid Build Coastguard Worker }
1103*61046927SAndroid Build Coastguard Worker
1104*61046927SAndroid Build Coastguard Worker if (apply_scratch_offset) {
1105*61046927SAndroid Build Coastguard Worker Temp addr_lo = bld.tmp(s1);
1106*61046927SAndroid Build Coastguard Worker Temp addr_hi = bld.tmp(s1);
1107*61046927SAndroid Build Coastguard Worker bld.pseudo(aco_opcode::p_split_vector, Definition(addr_lo), Definition(addr_hi),
1108*61046927SAndroid Build Coastguard Worker private_segment_buffer);
1109*61046927SAndroid Build Coastguard Worker
1110*61046927SAndroid Build Coastguard Worker Temp carry = bld.tmp(s1);
1111*61046927SAndroid Build Coastguard Worker addr_lo = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), addr_lo,
1112*61046927SAndroid Build Coastguard Worker ctx.program->scratch_offset);
1113*61046927SAndroid Build Coastguard Worker addr_hi = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), addr_hi,
1114*61046927SAndroid Build Coastguard Worker Operand::c32(0), bld.scc(carry));
1115*61046927SAndroid Build Coastguard Worker
1116*61046927SAndroid Build Coastguard Worker private_segment_buffer =
1117*61046927SAndroid Build Coastguard Worker bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
1118*61046927SAndroid Build Coastguard Worker }
1119*61046927SAndroid Build Coastguard Worker
1120*61046927SAndroid Build Coastguard Worker struct ac_buffer_state ac_state = {0};
1121*61046927SAndroid Build Coastguard Worker uint32_t desc[4];
1122*61046927SAndroid Build Coastguard Worker
1123*61046927SAndroid Build Coastguard Worker ac_state.size = 0xffffffff;
1124*61046927SAndroid Build Coastguard Worker ac_state.format = PIPE_FORMAT_R32_FLOAT;
1125*61046927SAndroid Build Coastguard Worker for (int i = 0; i < 4; i++)
1126*61046927SAndroid Build Coastguard Worker ac_state.swizzle[i] = PIPE_SWIZZLE_0;
1127*61046927SAndroid Build Coastguard Worker /* older generations need element size = 4 bytes. element size removed in GFX9 */
1128*61046927SAndroid Build Coastguard Worker ac_state.element_size = ctx.program->gfx_level <= GFX8 ? 1u : 0u;
1129*61046927SAndroid Build Coastguard Worker ac_state.index_stride = ctx.program->wave_size == 64 ? 3u : 2u;
1130*61046927SAndroid Build Coastguard Worker ac_state.add_tid = true;
1131*61046927SAndroid Build Coastguard Worker ac_state.gfx10_oob_select = V_008F0C_OOB_SELECT_RAW;
1132*61046927SAndroid Build Coastguard Worker
1133*61046927SAndroid Build Coastguard Worker ac_build_buffer_descriptor(ctx.program->gfx_level, &ac_state, desc);
1134*61046927SAndroid Build Coastguard Worker
1135*61046927SAndroid Build Coastguard Worker return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), private_segment_buffer,
1136*61046927SAndroid Build Coastguard Worker Operand::c32(desc[2]), Operand::c32(desc[3]));
1137*61046927SAndroid Build Coastguard Worker }
1138*61046927SAndroid Build Coastguard Worker
1139*61046927SAndroid Build Coastguard Worker void
setup_vgpr_spill_reload(spill_ctx & ctx,Block & block,std::vector<aco_ptr<Instruction>> & instructions,uint32_t spill_slot,Temp & scratch_offset,unsigned * offset)1140*61046927SAndroid Build Coastguard Worker setup_vgpr_spill_reload(spill_ctx& ctx, Block& block,
1141*61046927SAndroid Build Coastguard Worker std::vector<aco_ptr<Instruction>>& instructions, uint32_t spill_slot,
1142*61046927SAndroid Build Coastguard Worker Temp& scratch_offset, unsigned* offset)
1143*61046927SAndroid Build Coastguard Worker {
1144*61046927SAndroid Build Coastguard Worker uint32_t scratch_size = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size;
1145*61046927SAndroid Build Coastguard Worker
1146*61046927SAndroid Build Coastguard Worker uint32_t offset_range;
1147*61046927SAndroid Build Coastguard Worker if (ctx.program->gfx_level >= GFX9) {
1148*61046927SAndroid Build Coastguard Worker offset_range =
1149*61046927SAndroid Build Coastguard Worker ctx.program->dev.scratch_global_offset_max - ctx.program->dev.scratch_global_offset_min;
1150*61046927SAndroid Build Coastguard Worker } else {
1151*61046927SAndroid Build Coastguard Worker if (scratch_size < 4095)
1152*61046927SAndroid Build Coastguard Worker offset_range = 4095 - scratch_size;
1153*61046927SAndroid Build Coastguard Worker else
1154*61046927SAndroid Build Coastguard Worker offset_range = 0;
1155*61046927SAndroid Build Coastguard Worker }
1156*61046927SAndroid Build Coastguard Worker
1157*61046927SAndroid Build Coastguard Worker bool overflow = (ctx.vgpr_spill_slots - 1) * 4 > offset_range;
1158*61046927SAndroid Build Coastguard Worker
1159*61046927SAndroid Build Coastguard Worker Builder rsrc_bld(ctx.program);
1160*61046927SAndroid Build Coastguard Worker if (block.kind & block_kind_top_level) {
1161*61046927SAndroid Build Coastguard Worker rsrc_bld.reset(&instructions);
1162*61046927SAndroid Build Coastguard Worker } else if (ctx.scratch_rsrc == Temp() && (!overflow || ctx.program->gfx_level < GFX9)) {
1163*61046927SAndroid Build Coastguard Worker Block* tl_block = █
1164*61046927SAndroid Build Coastguard Worker while (!(tl_block->kind & block_kind_top_level))
1165*61046927SAndroid Build Coastguard Worker tl_block = &ctx.program->blocks[tl_block->linear_idom];
1166*61046927SAndroid Build Coastguard Worker
1167*61046927SAndroid Build Coastguard Worker /* find p_logical_end */
1168*61046927SAndroid Build Coastguard Worker std::vector<aco_ptr<Instruction>>& prev_instructions = tl_block->instructions;
1169*61046927SAndroid Build Coastguard Worker unsigned idx = prev_instructions.size() - 1;
1170*61046927SAndroid Build Coastguard Worker while (prev_instructions[idx]->opcode != aco_opcode::p_logical_end)
1171*61046927SAndroid Build Coastguard Worker idx--;
1172*61046927SAndroid Build Coastguard Worker rsrc_bld.reset(&prev_instructions, std::next(prev_instructions.begin(), idx));
1173*61046927SAndroid Build Coastguard Worker }
1174*61046927SAndroid Build Coastguard Worker
1175*61046927SAndroid Build Coastguard Worker /* If spilling overflows the constant offset range at any point, we need to emit the soffset
1176*61046927SAndroid Build Coastguard Worker * before every spill/reload to avoid increasing register demand.
1177*61046927SAndroid Build Coastguard Worker */
1178*61046927SAndroid Build Coastguard Worker Builder offset_bld = rsrc_bld;
1179*61046927SAndroid Build Coastguard Worker if (overflow)
1180*61046927SAndroid Build Coastguard Worker offset_bld.reset(&instructions);
1181*61046927SAndroid Build Coastguard Worker
1182*61046927SAndroid Build Coastguard Worker *offset = spill_slot * 4;
1183*61046927SAndroid Build Coastguard Worker if (ctx.program->gfx_level >= GFX9) {
1184*61046927SAndroid Build Coastguard Worker *offset += ctx.program->dev.scratch_global_offset_min;
1185*61046927SAndroid Build Coastguard Worker
1186*61046927SAndroid Build Coastguard Worker if (ctx.scratch_rsrc == Temp() || overflow) {
1187*61046927SAndroid Build Coastguard Worker int32_t saddr = scratch_size - ctx.program->dev.scratch_global_offset_min;
1188*61046927SAndroid Build Coastguard Worker if ((int32_t)*offset > (int32_t)ctx.program->dev.scratch_global_offset_max) {
1189*61046927SAndroid Build Coastguard Worker saddr += (int32_t)*offset;
1190*61046927SAndroid Build Coastguard Worker *offset = 0;
1191*61046927SAndroid Build Coastguard Worker }
1192*61046927SAndroid Build Coastguard Worker
1193*61046927SAndroid Build Coastguard Worker /* GFX9+ uses scratch_* instructions, which don't use a resource. */
1194*61046927SAndroid Build Coastguard Worker ctx.scratch_rsrc = offset_bld.copy(offset_bld.def(s1), Operand::c32(saddr));
1195*61046927SAndroid Build Coastguard Worker }
1196*61046927SAndroid Build Coastguard Worker } else {
1197*61046927SAndroid Build Coastguard Worker if (ctx.scratch_rsrc == Temp())
1198*61046927SAndroid Build Coastguard Worker ctx.scratch_rsrc = load_scratch_resource(ctx, rsrc_bld, overflow);
1199*61046927SAndroid Build Coastguard Worker
1200*61046927SAndroid Build Coastguard Worker if (overflow) {
1201*61046927SAndroid Build Coastguard Worker uint32_t soffset =
1202*61046927SAndroid Build Coastguard Worker ctx.program->config->scratch_bytes_per_wave + *offset * ctx.program->wave_size;
1203*61046927SAndroid Build Coastguard Worker *offset = 0;
1204*61046927SAndroid Build Coastguard Worker
1205*61046927SAndroid Build Coastguard Worker scratch_offset = offset_bld.copy(offset_bld.def(s1), Operand::c32(soffset));
1206*61046927SAndroid Build Coastguard Worker } else {
1207*61046927SAndroid Build Coastguard Worker *offset += scratch_size;
1208*61046927SAndroid Build Coastguard Worker }
1209*61046927SAndroid Build Coastguard Worker }
1210*61046927SAndroid Build Coastguard Worker }
1211*61046927SAndroid Build Coastguard Worker
1212*61046927SAndroid Build Coastguard Worker void
spill_vgpr(spill_ctx & ctx,Block & block,std::vector<aco_ptr<Instruction>> & instructions,aco_ptr<Instruction> & spill,std::vector<uint32_t> & slots)1213*61046927SAndroid Build Coastguard Worker spill_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& instructions,
1214*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>& spill, std::vector<uint32_t>& slots)
1215*61046927SAndroid Build Coastguard Worker {
1216*61046927SAndroid Build Coastguard Worker ctx.program->config->spilled_vgprs += spill->operands[0].size();
1217*61046927SAndroid Build Coastguard Worker
1218*61046927SAndroid Build Coastguard Worker uint32_t spill_id = spill->operands[1].constantValue();
1219*61046927SAndroid Build Coastguard Worker uint32_t spill_slot = slots[spill_id];
1220*61046927SAndroid Build Coastguard Worker
1221*61046927SAndroid Build Coastguard Worker Temp scratch_offset = ctx.program->scratch_offset;
1222*61046927SAndroid Build Coastguard Worker unsigned offset;
1223*61046927SAndroid Build Coastguard Worker setup_vgpr_spill_reload(ctx, block, instructions, spill_slot, scratch_offset, &offset);
1224*61046927SAndroid Build Coastguard Worker
1225*61046927SAndroid Build Coastguard Worker assert(spill->operands[0].isTemp());
1226*61046927SAndroid Build Coastguard Worker Temp temp = spill->operands[0].getTemp();
1227*61046927SAndroid Build Coastguard Worker assert(temp.type() == RegType::vgpr && !temp.is_linear());
1228*61046927SAndroid Build Coastguard Worker
1229*61046927SAndroid Build Coastguard Worker Builder bld(ctx.program, &instructions);
1230*61046927SAndroid Build Coastguard Worker if (temp.size() > 1) {
1231*61046927SAndroid Build Coastguard Worker Instruction* split{
1232*61046927SAndroid Build Coastguard Worker create_instruction(aco_opcode::p_split_vector, Format::PSEUDO, 1, temp.size())};
1233*61046927SAndroid Build Coastguard Worker split->operands[0] = Operand(temp);
1234*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < temp.size(); i++)
1235*61046927SAndroid Build Coastguard Worker split->definitions[i] = bld.def(v1);
1236*61046927SAndroid Build Coastguard Worker bld.insert(split);
1237*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < temp.size(); i++, offset += 4) {
1238*61046927SAndroid Build Coastguard Worker Temp elem = split->definitions[i].getTemp();
1239*61046927SAndroid Build Coastguard Worker if (ctx.program->gfx_level >= GFX9) {
1240*61046927SAndroid Build Coastguard Worker bld.scratch(aco_opcode::scratch_store_dword, Operand(v1), ctx.scratch_rsrc, elem,
1241*61046927SAndroid Build Coastguard Worker offset, memory_sync_info(storage_vgpr_spill, semantic_private));
1242*61046927SAndroid Build Coastguard Worker } else {
1243*61046927SAndroid Build Coastguard Worker Instruction* instr = bld.mubuf(aco_opcode::buffer_store_dword, ctx.scratch_rsrc,
1244*61046927SAndroid Build Coastguard Worker Operand(v1), scratch_offset, elem, offset, false);
1245*61046927SAndroid Build Coastguard Worker instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
1246*61046927SAndroid Build Coastguard Worker instr->mubuf().cache.value = ac_swizzled;
1247*61046927SAndroid Build Coastguard Worker }
1248*61046927SAndroid Build Coastguard Worker }
1249*61046927SAndroid Build Coastguard Worker } else if (ctx.program->gfx_level >= GFX9) {
1250*61046927SAndroid Build Coastguard Worker bld.scratch(aco_opcode::scratch_store_dword, Operand(v1), ctx.scratch_rsrc, temp, offset,
1251*61046927SAndroid Build Coastguard Worker memory_sync_info(storage_vgpr_spill, semantic_private));
1252*61046927SAndroid Build Coastguard Worker } else {
1253*61046927SAndroid Build Coastguard Worker Instruction* instr = bld.mubuf(aco_opcode::buffer_store_dword, ctx.scratch_rsrc, Operand(v1),
1254*61046927SAndroid Build Coastguard Worker scratch_offset, temp, offset, false);
1255*61046927SAndroid Build Coastguard Worker instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
1256*61046927SAndroid Build Coastguard Worker instr->mubuf().cache.value = ac_swizzled;
1257*61046927SAndroid Build Coastguard Worker }
1258*61046927SAndroid Build Coastguard Worker }
1259*61046927SAndroid Build Coastguard Worker
1260*61046927SAndroid Build Coastguard Worker void
reload_vgpr(spill_ctx & ctx,Block & block,std::vector<aco_ptr<Instruction>> & instructions,aco_ptr<Instruction> & reload,std::vector<uint32_t> & slots)1261*61046927SAndroid Build Coastguard Worker reload_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& instructions,
1262*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>& reload, std::vector<uint32_t>& slots)
1263*61046927SAndroid Build Coastguard Worker {
1264*61046927SAndroid Build Coastguard Worker uint32_t spill_id = reload->operands[0].constantValue();
1265*61046927SAndroid Build Coastguard Worker uint32_t spill_slot = slots[spill_id];
1266*61046927SAndroid Build Coastguard Worker
1267*61046927SAndroid Build Coastguard Worker Temp scratch_offset = ctx.program->scratch_offset;
1268*61046927SAndroid Build Coastguard Worker unsigned offset;
1269*61046927SAndroid Build Coastguard Worker setup_vgpr_spill_reload(ctx, block, instructions, spill_slot, scratch_offset, &offset);
1270*61046927SAndroid Build Coastguard Worker
1271*61046927SAndroid Build Coastguard Worker Definition def = reload->definitions[0];
1272*61046927SAndroid Build Coastguard Worker
1273*61046927SAndroid Build Coastguard Worker Builder bld(ctx.program, &instructions);
1274*61046927SAndroid Build Coastguard Worker if (def.size() > 1) {
1275*61046927SAndroid Build Coastguard Worker Instruction* vec{
1276*61046927SAndroid Build Coastguard Worker create_instruction(aco_opcode::p_create_vector, Format::PSEUDO, def.size(), 1)};
1277*61046927SAndroid Build Coastguard Worker vec->definitions[0] = def;
1278*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < def.size(); i++, offset += 4) {
1279*61046927SAndroid Build Coastguard Worker Temp tmp = bld.tmp(v1);
1280*61046927SAndroid Build Coastguard Worker vec->operands[i] = Operand(tmp);
1281*61046927SAndroid Build Coastguard Worker if (ctx.program->gfx_level >= GFX9) {
1282*61046927SAndroid Build Coastguard Worker bld.scratch(aco_opcode::scratch_load_dword, Definition(tmp), Operand(v1),
1283*61046927SAndroid Build Coastguard Worker ctx.scratch_rsrc, offset,
1284*61046927SAndroid Build Coastguard Worker memory_sync_info(storage_vgpr_spill, semantic_private));
1285*61046927SAndroid Build Coastguard Worker } else {
1286*61046927SAndroid Build Coastguard Worker Instruction* instr =
1287*61046927SAndroid Build Coastguard Worker bld.mubuf(aco_opcode::buffer_load_dword, Definition(tmp), ctx.scratch_rsrc,
1288*61046927SAndroid Build Coastguard Worker Operand(v1), scratch_offset, offset, false);
1289*61046927SAndroid Build Coastguard Worker instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
1290*61046927SAndroid Build Coastguard Worker instr->mubuf().cache.value = ac_swizzled;
1291*61046927SAndroid Build Coastguard Worker }
1292*61046927SAndroid Build Coastguard Worker }
1293*61046927SAndroid Build Coastguard Worker bld.insert(vec);
1294*61046927SAndroid Build Coastguard Worker } else if (ctx.program->gfx_level >= GFX9) {
1295*61046927SAndroid Build Coastguard Worker bld.scratch(aco_opcode::scratch_load_dword, def, Operand(v1), ctx.scratch_rsrc, offset,
1296*61046927SAndroid Build Coastguard Worker memory_sync_info(storage_vgpr_spill, semantic_private));
1297*61046927SAndroid Build Coastguard Worker } else {
1298*61046927SAndroid Build Coastguard Worker Instruction* instr = bld.mubuf(aco_opcode::buffer_load_dword, def, ctx.scratch_rsrc,
1299*61046927SAndroid Build Coastguard Worker Operand(v1), scratch_offset, offset, false);
1300*61046927SAndroid Build Coastguard Worker instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
1301*61046927SAndroid Build Coastguard Worker instr->mubuf().cache.value = ac_swizzled;
1302*61046927SAndroid Build Coastguard Worker }
1303*61046927SAndroid Build Coastguard Worker }
1304*61046927SAndroid Build Coastguard Worker
1305*61046927SAndroid Build Coastguard Worker void
add_interferences(spill_ctx & ctx,std::vector<bool> & is_assigned,std::vector<uint32_t> & slots,std::vector<bool> & slots_used,unsigned id)1306*61046927SAndroid Build Coastguard Worker add_interferences(spill_ctx& ctx, std::vector<bool>& is_assigned, std::vector<uint32_t>& slots,
1307*61046927SAndroid Build Coastguard Worker std::vector<bool>& slots_used, unsigned id)
1308*61046927SAndroid Build Coastguard Worker {
1309*61046927SAndroid Build Coastguard Worker for (unsigned other : ctx.interferences[id].second) {
1310*61046927SAndroid Build Coastguard Worker if (!is_assigned[other])
1311*61046927SAndroid Build Coastguard Worker continue;
1312*61046927SAndroid Build Coastguard Worker
1313*61046927SAndroid Build Coastguard Worker RegClass other_rc = ctx.interferences[other].first;
1314*61046927SAndroid Build Coastguard Worker unsigned slot = slots[other];
1315*61046927SAndroid Build Coastguard Worker std::fill(slots_used.begin() + slot, slots_used.begin() + slot + other_rc.size(), true);
1316*61046927SAndroid Build Coastguard Worker }
1317*61046927SAndroid Build Coastguard Worker }
1318*61046927SAndroid Build Coastguard Worker
1319*61046927SAndroid Build Coastguard Worker unsigned
find_available_slot(std::vector<bool> & used,unsigned wave_size,unsigned size,bool is_sgpr)1320*61046927SAndroid Build Coastguard Worker find_available_slot(std::vector<bool>& used, unsigned wave_size, unsigned size, bool is_sgpr)
1321*61046927SAndroid Build Coastguard Worker {
1322*61046927SAndroid Build Coastguard Worker unsigned wave_size_minus_one = wave_size - 1;
1323*61046927SAndroid Build Coastguard Worker unsigned slot = 0;
1324*61046927SAndroid Build Coastguard Worker
1325*61046927SAndroid Build Coastguard Worker while (true) {
1326*61046927SAndroid Build Coastguard Worker bool available = true;
1327*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < size; i++) {
1328*61046927SAndroid Build Coastguard Worker if (slot + i < used.size() && used[slot + i]) {
1329*61046927SAndroid Build Coastguard Worker available = false;
1330*61046927SAndroid Build Coastguard Worker break;
1331*61046927SAndroid Build Coastguard Worker }
1332*61046927SAndroid Build Coastguard Worker }
1333*61046927SAndroid Build Coastguard Worker if (!available) {
1334*61046927SAndroid Build Coastguard Worker slot++;
1335*61046927SAndroid Build Coastguard Worker continue;
1336*61046927SAndroid Build Coastguard Worker }
1337*61046927SAndroid Build Coastguard Worker
1338*61046927SAndroid Build Coastguard Worker if (is_sgpr && ((slot & wave_size_minus_one) > wave_size - size)) {
1339*61046927SAndroid Build Coastguard Worker slot = align(slot, wave_size);
1340*61046927SAndroid Build Coastguard Worker continue;
1341*61046927SAndroid Build Coastguard Worker }
1342*61046927SAndroid Build Coastguard Worker
1343*61046927SAndroid Build Coastguard Worker std::fill(used.begin(), used.end(), false);
1344*61046927SAndroid Build Coastguard Worker
1345*61046927SAndroid Build Coastguard Worker if (slot + size > used.size())
1346*61046927SAndroid Build Coastguard Worker used.resize(slot + size);
1347*61046927SAndroid Build Coastguard Worker
1348*61046927SAndroid Build Coastguard Worker return slot;
1349*61046927SAndroid Build Coastguard Worker }
1350*61046927SAndroid Build Coastguard Worker }
1351*61046927SAndroid Build Coastguard Worker
1352*61046927SAndroid Build Coastguard Worker void
assign_spill_slots_helper(spill_ctx & ctx,RegType type,std::vector<bool> & is_assigned,std::vector<uint32_t> & slots,unsigned * num_slots)1353*61046927SAndroid Build Coastguard Worker assign_spill_slots_helper(spill_ctx& ctx, RegType type, std::vector<bool>& is_assigned,
1354*61046927SAndroid Build Coastguard Worker std::vector<uint32_t>& slots, unsigned* num_slots)
1355*61046927SAndroid Build Coastguard Worker {
1356*61046927SAndroid Build Coastguard Worker std::vector<bool> slots_used;
1357*61046927SAndroid Build Coastguard Worker
1358*61046927SAndroid Build Coastguard Worker /* assign slots for ids with affinities first */
1359*61046927SAndroid Build Coastguard Worker for (std::vector<uint32_t>& vec : ctx.affinities) {
1360*61046927SAndroid Build Coastguard Worker if (ctx.interferences[vec[0]].first.type() != type)
1361*61046927SAndroid Build Coastguard Worker continue;
1362*61046927SAndroid Build Coastguard Worker
1363*61046927SAndroid Build Coastguard Worker for (unsigned id : vec) {
1364*61046927SAndroid Build Coastguard Worker if (!ctx.is_reloaded[id])
1365*61046927SAndroid Build Coastguard Worker continue;
1366*61046927SAndroid Build Coastguard Worker
1367*61046927SAndroid Build Coastguard Worker add_interferences(ctx, is_assigned, slots, slots_used, id);
1368*61046927SAndroid Build Coastguard Worker }
1369*61046927SAndroid Build Coastguard Worker
1370*61046927SAndroid Build Coastguard Worker unsigned slot = find_available_slot(
1371*61046927SAndroid Build Coastguard Worker slots_used, ctx.wave_size, ctx.interferences[vec[0]].first.size(), type == RegType::sgpr);
1372*61046927SAndroid Build Coastguard Worker
1373*61046927SAndroid Build Coastguard Worker for (unsigned id : vec) {
1374*61046927SAndroid Build Coastguard Worker assert(!is_assigned[id]);
1375*61046927SAndroid Build Coastguard Worker
1376*61046927SAndroid Build Coastguard Worker if (ctx.is_reloaded[id]) {
1377*61046927SAndroid Build Coastguard Worker slots[id] = slot;
1378*61046927SAndroid Build Coastguard Worker is_assigned[id] = true;
1379*61046927SAndroid Build Coastguard Worker }
1380*61046927SAndroid Build Coastguard Worker }
1381*61046927SAndroid Build Coastguard Worker }
1382*61046927SAndroid Build Coastguard Worker
1383*61046927SAndroid Build Coastguard Worker /* assign slots for ids without affinities */
1384*61046927SAndroid Build Coastguard Worker for (unsigned id = 0; id < ctx.interferences.size(); id++) {
1385*61046927SAndroid Build Coastguard Worker if (is_assigned[id] || !ctx.is_reloaded[id] || ctx.interferences[id].first.type() != type)
1386*61046927SAndroid Build Coastguard Worker continue;
1387*61046927SAndroid Build Coastguard Worker
1388*61046927SAndroid Build Coastguard Worker add_interferences(ctx, is_assigned, slots, slots_used, id);
1389*61046927SAndroid Build Coastguard Worker
1390*61046927SAndroid Build Coastguard Worker unsigned slot = find_available_slot(
1391*61046927SAndroid Build Coastguard Worker slots_used, ctx.wave_size, ctx.interferences[id].first.size(), type == RegType::sgpr);
1392*61046927SAndroid Build Coastguard Worker
1393*61046927SAndroid Build Coastguard Worker slots[id] = slot;
1394*61046927SAndroid Build Coastguard Worker is_assigned[id] = true;
1395*61046927SAndroid Build Coastguard Worker }
1396*61046927SAndroid Build Coastguard Worker
1397*61046927SAndroid Build Coastguard Worker *num_slots = slots_used.size();
1398*61046927SAndroid Build Coastguard Worker }
1399*61046927SAndroid Build Coastguard Worker
1400*61046927SAndroid Build Coastguard Worker void
end_unused_spill_vgprs(spill_ctx & ctx,Block & block,std::vector<Temp> & vgpr_spill_temps,const std::vector<uint32_t> & slots,const aco::unordered_map<Temp,uint32_t> & spills)1401*61046927SAndroid Build Coastguard Worker end_unused_spill_vgprs(spill_ctx& ctx, Block& block, std::vector<Temp>& vgpr_spill_temps,
1402*61046927SAndroid Build Coastguard Worker const std::vector<uint32_t>& slots,
1403*61046927SAndroid Build Coastguard Worker const aco::unordered_map<Temp, uint32_t>& spills)
1404*61046927SAndroid Build Coastguard Worker {
1405*61046927SAndroid Build Coastguard Worker std::vector<bool> is_used(vgpr_spill_temps.size());
1406*61046927SAndroid Build Coastguard Worker for (std::pair<Temp, uint32_t> pair : spills) {
1407*61046927SAndroid Build Coastguard Worker if (pair.first.type() == RegType::sgpr && ctx.is_reloaded[pair.second])
1408*61046927SAndroid Build Coastguard Worker is_used[slots[pair.second] / ctx.wave_size] = true;
1409*61046927SAndroid Build Coastguard Worker }
1410*61046927SAndroid Build Coastguard Worker
1411*61046927SAndroid Build Coastguard Worker std::vector<Temp> temps;
1412*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) {
1413*61046927SAndroid Build Coastguard Worker if (vgpr_spill_temps[i].id() && !is_used[i]) {
1414*61046927SAndroid Build Coastguard Worker temps.push_back(vgpr_spill_temps[i]);
1415*61046927SAndroid Build Coastguard Worker vgpr_spill_temps[i] = Temp();
1416*61046927SAndroid Build Coastguard Worker }
1417*61046927SAndroid Build Coastguard Worker }
1418*61046927SAndroid Build Coastguard Worker if (temps.empty() || block.linear_preds.empty())
1419*61046927SAndroid Build Coastguard Worker return;
1420*61046927SAndroid Build Coastguard Worker
1421*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction> destr{
1422*61046927SAndroid Build Coastguard Worker create_instruction(aco_opcode::p_end_linear_vgpr, Format::PSEUDO, temps.size(), 0)};
1423*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < temps.size(); i++)
1424*61046927SAndroid Build Coastguard Worker destr->operands[i] = Operand(temps[i]);
1425*61046927SAndroid Build Coastguard Worker
1426*61046927SAndroid Build Coastguard Worker std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin();
1427*61046927SAndroid Build Coastguard Worker while (is_phi(*it))
1428*61046927SAndroid Build Coastguard Worker ++it;
1429*61046927SAndroid Build Coastguard Worker block.instructions.insert(it, std::move(destr));
1430*61046927SAndroid Build Coastguard Worker }
1431*61046927SAndroid Build Coastguard Worker
1432*61046927SAndroid Build Coastguard Worker void
assign_spill_slots(spill_ctx & ctx,unsigned spills_to_vgpr)1433*61046927SAndroid Build Coastguard Worker assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
1434*61046927SAndroid Build Coastguard Worker {
1435*61046927SAndroid Build Coastguard Worker std::vector<uint32_t> slots(ctx.interferences.size());
1436*61046927SAndroid Build Coastguard Worker std::vector<bool> is_assigned(ctx.interferences.size());
1437*61046927SAndroid Build Coastguard Worker
1438*61046927SAndroid Build Coastguard Worker /* first, handle affinities: just merge all interferences into both spill ids */
1439*61046927SAndroid Build Coastguard Worker for (std::vector<uint32_t>& vec : ctx.affinities) {
1440*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < vec.size(); i++) {
1441*61046927SAndroid Build Coastguard Worker for (unsigned j = i + 1; j < vec.size(); j++) {
1442*61046927SAndroid Build Coastguard Worker assert(vec[i] != vec[j]);
1443*61046927SAndroid Build Coastguard Worker bool reloaded = ctx.is_reloaded[vec[i]] || ctx.is_reloaded[vec[j]];
1444*61046927SAndroid Build Coastguard Worker ctx.is_reloaded[vec[i]] = reloaded;
1445*61046927SAndroid Build Coastguard Worker ctx.is_reloaded[vec[j]] = reloaded;
1446*61046927SAndroid Build Coastguard Worker }
1447*61046927SAndroid Build Coastguard Worker }
1448*61046927SAndroid Build Coastguard Worker }
1449*61046927SAndroid Build Coastguard Worker for (ASSERTED uint32_t i = 0; i < ctx.interferences.size(); i++)
1450*61046927SAndroid Build Coastguard Worker for (ASSERTED uint32_t id : ctx.interferences[i].second)
1451*61046927SAndroid Build Coastguard Worker assert(i != id);
1452*61046927SAndroid Build Coastguard Worker
1453*61046927SAndroid Build Coastguard Worker /* for each spill slot, assign as many spill ids as possible */
1454*61046927SAndroid Build Coastguard Worker assign_spill_slots_helper(ctx, RegType::sgpr, is_assigned, slots, &ctx.sgpr_spill_slots);
1455*61046927SAndroid Build Coastguard Worker assign_spill_slots_helper(ctx, RegType::vgpr, is_assigned, slots, &ctx.vgpr_spill_slots);
1456*61046927SAndroid Build Coastguard Worker
1457*61046927SAndroid Build Coastguard Worker for (unsigned id = 0; id < is_assigned.size(); id++)
1458*61046927SAndroid Build Coastguard Worker assert(is_assigned[id] || !ctx.is_reloaded[id]);
1459*61046927SAndroid Build Coastguard Worker
1460*61046927SAndroid Build Coastguard Worker for (std::vector<uint32_t>& vec : ctx.affinities) {
1461*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < vec.size(); i++) {
1462*61046927SAndroid Build Coastguard Worker for (unsigned j = i + 1; j < vec.size(); j++) {
1463*61046927SAndroid Build Coastguard Worker assert(is_assigned[vec[i]] == is_assigned[vec[j]]);
1464*61046927SAndroid Build Coastguard Worker if (!is_assigned[vec[i]])
1465*61046927SAndroid Build Coastguard Worker continue;
1466*61046927SAndroid Build Coastguard Worker assert(ctx.is_reloaded[vec[i]] == ctx.is_reloaded[vec[j]]);
1467*61046927SAndroid Build Coastguard Worker assert(ctx.interferences[vec[i]].first.type() ==
1468*61046927SAndroid Build Coastguard Worker ctx.interferences[vec[j]].first.type());
1469*61046927SAndroid Build Coastguard Worker assert(slots[vec[i]] == slots[vec[j]]);
1470*61046927SAndroid Build Coastguard Worker }
1471*61046927SAndroid Build Coastguard Worker }
1472*61046927SAndroid Build Coastguard Worker }
1473*61046927SAndroid Build Coastguard Worker
1474*61046927SAndroid Build Coastguard Worker /* hope, we didn't mess up */
1475*61046927SAndroid Build Coastguard Worker std::vector<Temp> vgpr_spill_temps((ctx.sgpr_spill_slots + ctx.wave_size - 1) / ctx.wave_size);
1476*61046927SAndroid Build Coastguard Worker assert(vgpr_spill_temps.size() <= spills_to_vgpr);
1477*61046927SAndroid Build Coastguard Worker
1478*61046927SAndroid Build Coastguard Worker /* replace pseudo instructions with actual hardware instructions */
1479*61046927SAndroid Build Coastguard Worker unsigned last_top_level_block_idx = 0;
1480*61046927SAndroid Build Coastguard Worker for (Block& block : ctx.program->blocks) {
1481*61046927SAndroid Build Coastguard Worker
1482*61046927SAndroid Build Coastguard Worker if (block.kind & block_kind_top_level) {
1483*61046927SAndroid Build Coastguard Worker last_top_level_block_idx = block.index;
1484*61046927SAndroid Build Coastguard Worker
1485*61046927SAndroid Build Coastguard Worker end_unused_spill_vgprs(ctx, block, vgpr_spill_temps, slots, ctx.spills_entry[block.index]);
1486*61046927SAndroid Build Coastguard Worker
1487*61046927SAndroid Build Coastguard Worker /* If the block has no predecessors (for example in RT resume shaders),
1488*61046927SAndroid Build Coastguard Worker * we cannot reuse the current scratch_rsrc temp because its definition is unreachable */
1489*61046927SAndroid Build Coastguard Worker if (block.linear_preds.empty())
1490*61046927SAndroid Build Coastguard Worker ctx.scratch_rsrc = Temp();
1491*61046927SAndroid Build Coastguard Worker }
1492*61046927SAndroid Build Coastguard Worker
1493*61046927SAndroid Build Coastguard Worker std::vector<aco_ptr<Instruction>>::iterator it;
1494*61046927SAndroid Build Coastguard Worker std::vector<aco_ptr<Instruction>> instructions;
1495*61046927SAndroid Build Coastguard Worker instructions.reserve(block.instructions.size());
1496*61046927SAndroid Build Coastguard Worker Builder bld(ctx.program, &instructions);
1497*61046927SAndroid Build Coastguard Worker for (it = block.instructions.begin(); it != block.instructions.end(); ++it) {
1498*61046927SAndroid Build Coastguard Worker
1499*61046927SAndroid Build Coastguard Worker if ((*it)->opcode == aco_opcode::p_spill) {
1500*61046927SAndroid Build Coastguard Worker uint32_t spill_id = (*it)->operands[1].constantValue();
1501*61046927SAndroid Build Coastguard Worker
1502*61046927SAndroid Build Coastguard Worker if (!ctx.is_reloaded[spill_id]) {
1503*61046927SAndroid Build Coastguard Worker /* never reloaded, so don't spill */
1504*61046927SAndroid Build Coastguard Worker } else if (!is_assigned[spill_id]) {
1505*61046927SAndroid Build Coastguard Worker unreachable("No spill slot assigned for spill id");
1506*61046927SAndroid Build Coastguard Worker } else if (ctx.interferences[spill_id].first.type() == RegType::vgpr) {
1507*61046927SAndroid Build Coastguard Worker spill_vgpr(ctx, block, instructions, *it, slots);
1508*61046927SAndroid Build Coastguard Worker } else {
1509*61046927SAndroid Build Coastguard Worker ctx.program->config->spilled_sgprs += (*it)->operands[0].size();
1510*61046927SAndroid Build Coastguard Worker
1511*61046927SAndroid Build Coastguard Worker uint32_t spill_slot = slots[spill_id];
1512*61046927SAndroid Build Coastguard Worker
1513*61046927SAndroid Build Coastguard Worker /* check if the linear vgpr already exists */
1514*61046927SAndroid Build Coastguard Worker if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) {
1515*61046927SAndroid Build Coastguard Worker Temp linear_vgpr = ctx.program->allocateTmp(v1.as_linear());
1516*61046927SAndroid Build Coastguard Worker vgpr_spill_temps[spill_slot / ctx.wave_size] = linear_vgpr;
1517*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction> create{
1518*61046927SAndroid Build Coastguard Worker create_instruction(aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
1519*61046927SAndroid Build Coastguard Worker create->definitions[0] = Definition(linear_vgpr);
1520*61046927SAndroid Build Coastguard Worker /* find the right place to insert this definition */
1521*61046927SAndroid Build Coastguard Worker if (last_top_level_block_idx == block.index) {
1522*61046927SAndroid Build Coastguard Worker /* insert right before the current instruction */
1523*61046927SAndroid Build Coastguard Worker instructions.emplace_back(std::move(create));
1524*61046927SAndroid Build Coastguard Worker } else {
1525*61046927SAndroid Build Coastguard Worker assert(last_top_level_block_idx < block.index);
1526*61046927SAndroid Build Coastguard Worker /* insert after p_logical_end of the last top-level block */
1527*61046927SAndroid Build Coastguard Worker std::vector<aco_ptr<Instruction>>& block_instrs =
1528*61046927SAndroid Build Coastguard Worker ctx.program->blocks[last_top_level_block_idx].instructions;
1529*61046927SAndroid Build Coastguard Worker auto insert_point =
1530*61046927SAndroid Build Coastguard Worker std::find_if(block_instrs.rbegin(), block_instrs.rend(),
1531*61046927SAndroid Build Coastguard Worker [](const auto& iter) {
1532*61046927SAndroid Build Coastguard Worker return iter->opcode == aco_opcode::p_logical_end;
1533*61046927SAndroid Build Coastguard Worker })
1534*61046927SAndroid Build Coastguard Worker .base();
1535*61046927SAndroid Build Coastguard Worker block_instrs.insert(insert_point, std::move(create));
1536*61046927SAndroid Build Coastguard Worker }
1537*61046927SAndroid Build Coastguard Worker }
1538*61046927SAndroid Build Coastguard Worker
1539*61046927SAndroid Build Coastguard Worker /* spill sgpr: just add the vgpr temp to operands */
1540*61046927SAndroid Build Coastguard Worker Instruction* spill = create_instruction(aco_opcode::p_spill, Format::PSEUDO, 3, 0);
1541*61046927SAndroid Build Coastguard Worker spill->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]);
1542*61046927SAndroid Build Coastguard Worker spill->operands[1] = Operand::c32(spill_slot % ctx.wave_size);
1543*61046927SAndroid Build Coastguard Worker spill->operands[2] = (*it)->operands[0];
1544*61046927SAndroid Build Coastguard Worker instructions.emplace_back(aco_ptr<Instruction>(spill));
1545*61046927SAndroid Build Coastguard Worker }
1546*61046927SAndroid Build Coastguard Worker
1547*61046927SAndroid Build Coastguard Worker } else if ((*it)->opcode == aco_opcode::p_reload) {
1548*61046927SAndroid Build Coastguard Worker uint32_t spill_id = (*it)->operands[0].constantValue();
1549*61046927SAndroid Build Coastguard Worker assert(ctx.is_reloaded[spill_id]);
1550*61046927SAndroid Build Coastguard Worker
1551*61046927SAndroid Build Coastguard Worker if (!is_assigned[spill_id]) {
1552*61046927SAndroid Build Coastguard Worker unreachable("No spill slot assigned for spill id");
1553*61046927SAndroid Build Coastguard Worker } else if (ctx.interferences[spill_id].first.type() == RegType::vgpr) {
1554*61046927SAndroid Build Coastguard Worker reload_vgpr(ctx, block, instructions, *it, slots);
1555*61046927SAndroid Build Coastguard Worker } else {
1556*61046927SAndroid Build Coastguard Worker uint32_t spill_slot = slots[spill_id];
1557*61046927SAndroid Build Coastguard Worker
1558*61046927SAndroid Build Coastguard Worker /* check if the linear vgpr already exists */
1559*61046927SAndroid Build Coastguard Worker if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) {
1560*61046927SAndroid Build Coastguard Worker Temp linear_vgpr = ctx.program->allocateTmp(v1.as_linear());
1561*61046927SAndroid Build Coastguard Worker vgpr_spill_temps[spill_slot / ctx.wave_size] = linear_vgpr;
1562*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction> create{
1563*61046927SAndroid Build Coastguard Worker create_instruction(aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
1564*61046927SAndroid Build Coastguard Worker create->definitions[0] = Definition(linear_vgpr);
1565*61046927SAndroid Build Coastguard Worker /* find the right place to insert this definition */
1566*61046927SAndroid Build Coastguard Worker if (last_top_level_block_idx == block.index) {
1567*61046927SAndroid Build Coastguard Worker /* insert right before the current instruction */
1568*61046927SAndroid Build Coastguard Worker instructions.emplace_back(std::move(create));
1569*61046927SAndroid Build Coastguard Worker } else {
1570*61046927SAndroid Build Coastguard Worker assert(last_top_level_block_idx < block.index);
1571*61046927SAndroid Build Coastguard Worker /* insert after p_logical_end of the last top-level block */
1572*61046927SAndroid Build Coastguard Worker std::vector<aco_ptr<Instruction>>& block_instrs =
1573*61046927SAndroid Build Coastguard Worker ctx.program->blocks[last_top_level_block_idx].instructions;
1574*61046927SAndroid Build Coastguard Worker auto insert_point =
1575*61046927SAndroid Build Coastguard Worker std::find_if(block_instrs.rbegin(), block_instrs.rend(),
1576*61046927SAndroid Build Coastguard Worker [](const auto& iter) {
1577*61046927SAndroid Build Coastguard Worker return iter->opcode == aco_opcode::p_logical_end;
1578*61046927SAndroid Build Coastguard Worker })
1579*61046927SAndroid Build Coastguard Worker .base();
1580*61046927SAndroid Build Coastguard Worker block_instrs.insert(insert_point, std::move(create));
1581*61046927SAndroid Build Coastguard Worker }
1582*61046927SAndroid Build Coastguard Worker }
1583*61046927SAndroid Build Coastguard Worker
1584*61046927SAndroid Build Coastguard Worker /* reload sgpr: just add the vgpr temp to operands */
1585*61046927SAndroid Build Coastguard Worker Instruction* reload = create_instruction(aco_opcode::p_reload, Format::PSEUDO, 2, 1);
1586*61046927SAndroid Build Coastguard Worker reload->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]);
1587*61046927SAndroid Build Coastguard Worker reload->operands[1] = Operand::c32(spill_slot % ctx.wave_size);
1588*61046927SAndroid Build Coastguard Worker reload->definitions[0] = (*it)->definitions[0];
1589*61046927SAndroid Build Coastguard Worker instructions.emplace_back(aco_ptr<Instruction>(reload));
1590*61046927SAndroid Build Coastguard Worker }
1591*61046927SAndroid Build Coastguard Worker } else if (!ctx.unused_remats.count(it->get())) {
1592*61046927SAndroid Build Coastguard Worker instructions.emplace_back(std::move(*it));
1593*61046927SAndroid Build Coastguard Worker }
1594*61046927SAndroid Build Coastguard Worker }
1595*61046927SAndroid Build Coastguard Worker block.instructions = std::move(instructions);
1596*61046927SAndroid Build Coastguard Worker }
1597*61046927SAndroid Build Coastguard Worker
1598*61046927SAndroid Build Coastguard Worker /* update required scratch memory */
1599*61046927SAndroid Build Coastguard Worker ctx.program->config->scratch_bytes_per_wave += ctx.vgpr_spill_slots * 4 * ctx.program->wave_size;
1600*61046927SAndroid Build Coastguard Worker }
1601*61046927SAndroid Build Coastguard Worker
1602*61046927SAndroid Build Coastguard Worker } /* end namespace */
1603*61046927SAndroid Build Coastguard Worker
1604*61046927SAndroid Build Coastguard Worker void
spill(Program * program)1605*61046927SAndroid Build Coastguard Worker spill(Program* program)
1606*61046927SAndroid Build Coastguard Worker {
1607*61046927SAndroid Build Coastguard Worker program->config->spilled_vgprs = 0;
1608*61046927SAndroid Build Coastguard Worker program->config->spilled_sgprs = 0;
1609*61046927SAndroid Build Coastguard Worker
1610*61046927SAndroid Build Coastguard Worker program->progress = CompilationProgress::after_spilling;
1611*61046927SAndroid Build Coastguard Worker
1612*61046927SAndroid Build Coastguard Worker /* no spilling when register pressure is low enough */
1613*61046927SAndroid Build Coastguard Worker if (program->num_waves > 0)
1614*61046927SAndroid Build Coastguard Worker return;
1615*61046927SAndroid Build Coastguard Worker
1616*61046927SAndroid Build Coastguard Worker /* lower to CSSA before spilling to ensure correctness w.r.t. phis */
1617*61046927SAndroid Build Coastguard Worker lower_to_cssa(program);
1618*61046927SAndroid Build Coastguard Worker
1619*61046927SAndroid Build Coastguard Worker /* calculate target register demand */
1620*61046927SAndroid Build Coastguard Worker const RegisterDemand demand = program->max_reg_demand; /* current max */
1621*61046927SAndroid Build Coastguard Worker const uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->min_waves);
1622*61046927SAndroid Build Coastguard Worker const uint16_t vgpr_limit = get_addr_vgpr_from_waves(program, program->min_waves);
1623*61046927SAndroid Build Coastguard Worker uint16_t extra_vgprs = 0;
1624*61046927SAndroid Build Coastguard Worker uint16_t extra_sgprs = 0;
1625*61046927SAndroid Build Coastguard Worker
1626*61046927SAndroid Build Coastguard Worker /* calculate extra VGPRs required for spilling SGPRs */
1627*61046927SAndroid Build Coastguard Worker if (demand.sgpr > sgpr_limit) {
1628*61046927SAndroid Build Coastguard Worker unsigned sgpr_spills = demand.sgpr - sgpr_limit;
1629*61046927SAndroid Build Coastguard Worker extra_vgprs = DIV_ROUND_UP(sgpr_spills * 2, program->wave_size) + 1;
1630*61046927SAndroid Build Coastguard Worker }
1631*61046927SAndroid Build Coastguard Worker /* add extra SGPRs required for spilling VGPRs */
1632*61046927SAndroid Build Coastguard Worker if (demand.vgpr + extra_vgprs > vgpr_limit) {
1633*61046927SAndroid Build Coastguard Worker if (program->gfx_level >= GFX9)
1634*61046927SAndroid Build Coastguard Worker extra_sgprs = 1; /* SADDR */
1635*61046927SAndroid Build Coastguard Worker else
1636*61046927SAndroid Build Coastguard Worker extra_sgprs = 5; /* scratch_resource (s4) + scratch_offset (s1) */
1637*61046927SAndroid Build Coastguard Worker if (demand.sgpr + extra_sgprs > sgpr_limit) {
1638*61046927SAndroid Build Coastguard Worker /* re-calculate in case something has changed */
1639*61046927SAndroid Build Coastguard Worker unsigned sgpr_spills = demand.sgpr + extra_sgprs - sgpr_limit;
1640*61046927SAndroid Build Coastguard Worker extra_vgprs = DIV_ROUND_UP(sgpr_spills * 2, program->wave_size) + 1;
1641*61046927SAndroid Build Coastguard Worker }
1642*61046927SAndroid Build Coastguard Worker }
1643*61046927SAndroid Build Coastguard Worker /* the spiller has to target the following register demand */
1644*61046927SAndroid Build Coastguard Worker const RegisterDemand target(vgpr_limit - extra_vgprs, sgpr_limit - extra_sgprs);
1645*61046927SAndroid Build Coastguard Worker
1646*61046927SAndroid Build Coastguard Worker /* initialize ctx */
1647*61046927SAndroid Build Coastguard Worker spill_ctx ctx(target, program);
1648*61046927SAndroid Build Coastguard Worker gather_ssa_use_info(ctx);
1649*61046927SAndroid Build Coastguard Worker get_rematerialize_info(ctx);
1650*61046927SAndroid Build Coastguard Worker
1651*61046927SAndroid Build Coastguard Worker /* create spills and reloads */
1652*61046927SAndroid Build Coastguard Worker for (unsigned i = 0; i < program->blocks.size(); i++)
1653*61046927SAndroid Build Coastguard Worker spill_block(ctx, i);
1654*61046927SAndroid Build Coastguard Worker
1655*61046927SAndroid Build Coastguard Worker /* assign spill slots and DCE rematerialized code */
1656*61046927SAndroid Build Coastguard Worker assign_spill_slots(ctx, extra_vgprs);
1657*61046927SAndroid Build Coastguard Worker
1658*61046927SAndroid Build Coastguard Worker /* update live variable information */
1659*61046927SAndroid Build Coastguard Worker live_var_analysis(program);
1660*61046927SAndroid Build Coastguard Worker
1661*61046927SAndroid Build Coastguard Worker assert(program->num_waves > 0);
1662*61046927SAndroid Build Coastguard Worker }
1663*61046927SAndroid Build Coastguard Worker
1664*61046927SAndroid Build Coastguard Worker } // namespace aco
1665