xref: /aosp_15_r20/external/mesa3d/src/amd/compiler/aco_spill.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright © 2018 Valve Corporation
3*61046927SAndroid Build Coastguard Worker  * Copyright © 2018 Google
4*61046927SAndroid Build Coastguard Worker  *
5*61046927SAndroid Build Coastguard Worker  * SPDX-License-Identifier: MIT
6*61046927SAndroid Build Coastguard Worker  */
7*61046927SAndroid Build Coastguard Worker 
8*61046927SAndroid Build Coastguard Worker #include "aco_builder.h"
9*61046927SAndroid Build Coastguard Worker #include "aco_ir.h"
10*61046927SAndroid Build Coastguard Worker #include "aco_util.h"
11*61046927SAndroid Build Coastguard Worker 
12*61046927SAndroid Build Coastguard Worker #include "common/ac_descriptors.h"
13*61046927SAndroid Build Coastguard Worker #include "common/sid.h"
14*61046927SAndroid Build Coastguard Worker 
15*61046927SAndroid Build Coastguard Worker #include <algorithm>
16*61046927SAndroid Build Coastguard Worker #include <cstring>
17*61046927SAndroid Build Coastguard Worker #include <map>
18*61046927SAndroid Build Coastguard Worker #include <set>
19*61046927SAndroid Build Coastguard Worker #include <unordered_map>
20*61046927SAndroid Build Coastguard Worker #include <unordered_set>
21*61046927SAndroid Build Coastguard Worker #include <vector>
22*61046927SAndroid Build Coastguard Worker 
23*61046927SAndroid Build Coastguard Worker namespace std {
24*61046927SAndroid Build Coastguard Worker template <> struct hash<aco::Temp> {
operator ()std::hash25*61046927SAndroid Build Coastguard Worker    size_t operator()(aco::Temp temp) const noexcept
26*61046927SAndroid Build Coastguard Worker    {
27*61046927SAndroid Build Coastguard Worker       uint32_t v;
28*61046927SAndroid Build Coastguard Worker       std::memcpy(&v, &temp, sizeof(temp));
29*61046927SAndroid Build Coastguard Worker       return std::hash<uint32_t>{}(v);
30*61046927SAndroid Build Coastguard Worker    }
31*61046927SAndroid Build Coastguard Worker };
32*61046927SAndroid Build Coastguard Worker } // namespace std
33*61046927SAndroid Build Coastguard Worker 
34*61046927SAndroid Build Coastguard Worker /*
35*61046927SAndroid Build Coastguard Worker  * Implements the spilling algorithm on SSA-form based on
36*61046927SAndroid Build Coastguard Worker  * "Register Spilling and Live-Range Splitting for SSA-Form Programs"
37*61046927SAndroid Build Coastguard Worker  * by Matthias Braun and Sebastian Hack.
38*61046927SAndroid Build Coastguard Worker  *
39*61046927SAndroid Build Coastguard Worker  * Key difference between this algorithm and the min-algorithm from the paper
40*61046927SAndroid Build Coastguard Worker  * is the use of average use distances rather than next-use distances per
41*61046927SAndroid Build Coastguard Worker  * instruction.
42*61046927SAndroid Build Coastguard Worker  * As we decrement the number of remaining uses, the average use distances
43*61046927SAndroid Build Coastguard Worker  * give an approximation of the next-use distances while being computationally
44*61046927SAndroid Build Coastguard Worker  * and memory-wise less expensive.
45*61046927SAndroid Build Coastguard Worker  */
46*61046927SAndroid Build Coastguard Worker 
47*61046927SAndroid Build Coastguard Worker namespace aco {
48*61046927SAndroid Build Coastguard Worker 
49*61046927SAndroid Build Coastguard Worker namespace {
50*61046927SAndroid Build Coastguard Worker 
51*61046927SAndroid Build Coastguard Worker struct remat_info {
52*61046927SAndroid Build Coastguard Worker    Instruction* instr;
53*61046927SAndroid Build Coastguard Worker };
54*61046927SAndroid Build Coastguard Worker 
55*61046927SAndroid Build Coastguard Worker struct loop_info {
56*61046927SAndroid Build Coastguard Worker    uint32_t index;
57*61046927SAndroid Build Coastguard Worker    aco::unordered_map<Temp, uint32_t> spills;
58*61046927SAndroid Build Coastguard Worker    IDSet live_in;
59*61046927SAndroid Build Coastguard Worker };
60*61046927SAndroid Build Coastguard Worker 
61*61046927SAndroid Build Coastguard Worker struct use_info {
62*61046927SAndroid Build Coastguard Worker    uint32_t num_uses = 0;
63*61046927SAndroid Build Coastguard Worker    uint32_t last_use = 0;
scoreaco::__anon6dee26dc0111::use_info64*61046927SAndroid Build Coastguard Worker    float score() { return last_use / num_uses; }
65*61046927SAndroid Build Coastguard Worker };
66*61046927SAndroid Build Coastguard Worker 
67*61046927SAndroid Build Coastguard Worker struct spill_ctx {
68*61046927SAndroid Build Coastguard Worker    RegisterDemand target_pressure;
69*61046927SAndroid Build Coastguard Worker    Program* program;
70*61046927SAndroid Build Coastguard Worker    aco::monotonic_buffer_resource memory;
71*61046927SAndroid Build Coastguard Worker 
72*61046927SAndroid Build Coastguard Worker    std::vector<aco::map<Temp, Temp>> renames;
73*61046927SAndroid Build Coastguard Worker    std::vector<aco::unordered_map<Temp, uint32_t>> spills_entry;
74*61046927SAndroid Build Coastguard Worker    std::vector<aco::unordered_map<Temp, uint32_t>> spills_exit;
75*61046927SAndroid Build Coastguard Worker 
76*61046927SAndroid Build Coastguard Worker    std::vector<bool> processed;
77*61046927SAndroid Build Coastguard Worker    std::vector<loop_info> loop;
78*61046927SAndroid Build Coastguard Worker 
79*61046927SAndroid Build Coastguard Worker    std::vector<use_info> ssa_infos;
80*61046927SAndroid Build Coastguard Worker    std::vector<std::pair<RegClass, std::unordered_set<uint32_t>>> interferences;
81*61046927SAndroid Build Coastguard Worker    std::vector<std::vector<uint32_t>> affinities;
82*61046927SAndroid Build Coastguard Worker    std::vector<bool> is_reloaded;
83*61046927SAndroid Build Coastguard Worker    aco::unordered_map<Temp, remat_info> remat;
84*61046927SAndroid Build Coastguard Worker    std::set<Instruction*> unused_remats;
85*61046927SAndroid Build Coastguard Worker    unsigned wave_size;
86*61046927SAndroid Build Coastguard Worker 
87*61046927SAndroid Build Coastguard Worker    unsigned sgpr_spill_slots;
88*61046927SAndroid Build Coastguard Worker    unsigned vgpr_spill_slots;
89*61046927SAndroid Build Coastguard Worker    Temp scratch_rsrc;
90*61046927SAndroid Build Coastguard Worker 
spill_ctxaco::__anon6dee26dc0111::spill_ctx91*61046927SAndroid Build Coastguard Worker    spill_ctx(const RegisterDemand target_pressure_, Program* program_)
92*61046927SAndroid Build Coastguard Worker        : target_pressure(target_pressure_), program(program_), memory(),
93*61046927SAndroid Build Coastguard Worker          renames(program->blocks.size(), aco::map<Temp, Temp>(memory)),
94*61046927SAndroid Build Coastguard Worker          spills_entry(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)),
95*61046927SAndroid Build Coastguard Worker          spills_exit(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)),
96*61046927SAndroid Build Coastguard Worker          processed(program->blocks.size(), false), ssa_infos(program->peekAllocationId()),
97*61046927SAndroid Build Coastguard Worker          remat(memory), wave_size(program->wave_size), sgpr_spill_slots(0), vgpr_spill_slots(0)
98*61046927SAndroid Build Coastguard Worker    {}
99*61046927SAndroid Build Coastguard Worker 
add_affinityaco::__anon6dee26dc0111::spill_ctx100*61046927SAndroid Build Coastguard Worker    void add_affinity(uint32_t first, uint32_t second)
101*61046927SAndroid Build Coastguard Worker    {
102*61046927SAndroid Build Coastguard Worker       unsigned found_first = affinities.size();
103*61046927SAndroid Build Coastguard Worker       unsigned found_second = affinities.size();
104*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < affinities.size(); i++) {
105*61046927SAndroid Build Coastguard Worker          std::vector<uint32_t>& vec = affinities[i];
106*61046927SAndroid Build Coastguard Worker          for (uint32_t entry : vec) {
107*61046927SAndroid Build Coastguard Worker             if (entry == first)
108*61046927SAndroid Build Coastguard Worker                found_first = i;
109*61046927SAndroid Build Coastguard Worker             else if (entry == second)
110*61046927SAndroid Build Coastguard Worker                found_second = i;
111*61046927SAndroid Build Coastguard Worker          }
112*61046927SAndroid Build Coastguard Worker       }
113*61046927SAndroid Build Coastguard Worker       if (found_first == affinities.size() && found_second == affinities.size()) {
114*61046927SAndroid Build Coastguard Worker          affinities.emplace_back(std::vector<uint32_t>({first, second}));
115*61046927SAndroid Build Coastguard Worker       } else if (found_first < affinities.size() && found_second == affinities.size()) {
116*61046927SAndroid Build Coastguard Worker          affinities[found_first].push_back(second);
117*61046927SAndroid Build Coastguard Worker       } else if (found_second < affinities.size() && found_first == affinities.size()) {
118*61046927SAndroid Build Coastguard Worker          affinities[found_second].push_back(first);
119*61046927SAndroid Build Coastguard Worker       } else if (found_first != found_second) {
120*61046927SAndroid Build Coastguard Worker          /* merge second into first */
121*61046927SAndroid Build Coastguard Worker          affinities[found_first].insert(affinities[found_first].end(),
122*61046927SAndroid Build Coastguard Worker                                         affinities[found_second].begin(),
123*61046927SAndroid Build Coastguard Worker                                         affinities[found_second].end());
124*61046927SAndroid Build Coastguard Worker          affinities.erase(std::next(affinities.begin(), found_second));
125*61046927SAndroid Build Coastguard Worker       } else {
126*61046927SAndroid Build Coastguard Worker          assert(found_first == found_second);
127*61046927SAndroid Build Coastguard Worker       }
128*61046927SAndroid Build Coastguard Worker    }
129*61046927SAndroid Build Coastguard Worker 
add_to_spillsaco::__anon6dee26dc0111::spill_ctx130*61046927SAndroid Build Coastguard Worker    uint32_t add_to_spills(Temp to_spill, aco::unordered_map<Temp, uint32_t>& spills)
131*61046927SAndroid Build Coastguard Worker    {
132*61046927SAndroid Build Coastguard Worker       const uint32_t spill_id = allocate_spill_id(to_spill.regClass());
133*61046927SAndroid Build Coastguard Worker       for (auto pair : spills)
134*61046927SAndroid Build Coastguard Worker          add_interference(spill_id, pair.second);
135*61046927SAndroid Build Coastguard Worker       if (!loop.empty()) {
136*61046927SAndroid Build Coastguard Worker          for (auto pair : loop.back().spills)
137*61046927SAndroid Build Coastguard Worker             add_interference(spill_id, pair.second);
138*61046927SAndroid Build Coastguard Worker       }
139*61046927SAndroid Build Coastguard Worker 
140*61046927SAndroid Build Coastguard Worker       spills[to_spill] = spill_id;
141*61046927SAndroid Build Coastguard Worker       return spill_id;
142*61046927SAndroid Build Coastguard Worker    }
143*61046927SAndroid Build Coastguard Worker 
add_interferenceaco::__anon6dee26dc0111::spill_ctx144*61046927SAndroid Build Coastguard Worker    void add_interference(uint32_t first, uint32_t second)
145*61046927SAndroid Build Coastguard Worker    {
146*61046927SAndroid Build Coastguard Worker       if (interferences[first].first.type() != interferences[second].first.type())
147*61046927SAndroid Build Coastguard Worker          return;
148*61046927SAndroid Build Coastguard Worker 
149*61046927SAndroid Build Coastguard Worker       bool inserted = interferences[first].second.insert(second).second;
150*61046927SAndroid Build Coastguard Worker       if (inserted)
151*61046927SAndroid Build Coastguard Worker          interferences[second].second.insert(first);
152*61046927SAndroid Build Coastguard Worker    }
153*61046927SAndroid Build Coastguard Worker 
allocate_spill_idaco::__anon6dee26dc0111::spill_ctx154*61046927SAndroid Build Coastguard Worker    uint32_t allocate_spill_id(RegClass rc)
155*61046927SAndroid Build Coastguard Worker    {
156*61046927SAndroid Build Coastguard Worker       interferences.emplace_back(rc, std::unordered_set<uint32_t>());
157*61046927SAndroid Build Coastguard Worker       is_reloaded.push_back(false);
158*61046927SAndroid Build Coastguard Worker       return next_spill_id++;
159*61046927SAndroid Build Coastguard Worker    }
160*61046927SAndroid Build Coastguard Worker 
161*61046927SAndroid Build Coastguard Worker    uint32_t next_spill_id = 0;
162*61046927SAndroid Build Coastguard Worker };
163*61046927SAndroid Build Coastguard Worker 
164*61046927SAndroid Build Coastguard Worker /**
165*61046927SAndroid Build Coastguard Worker  * Gathers information about the number of uses and point of last use
166*61046927SAndroid Build Coastguard Worker  * per SSA value.
167*61046927SAndroid Build Coastguard Worker  *
168*61046927SAndroid Build Coastguard Worker  * Phi definitions are added to live-ins.
169*61046927SAndroid Build Coastguard Worker  */
170*61046927SAndroid Build Coastguard Worker void
gather_ssa_use_info(spill_ctx & ctx)171*61046927SAndroid Build Coastguard Worker gather_ssa_use_info(spill_ctx& ctx)
172*61046927SAndroid Build Coastguard Worker {
173*61046927SAndroid Build Coastguard Worker    unsigned instruction_idx = 0;
174*61046927SAndroid Build Coastguard Worker    for (Block& block : ctx.program->blocks) {
175*61046927SAndroid Build Coastguard Worker       for (int i = block.instructions.size() - 1; i >= 0; i--) {
176*61046927SAndroid Build Coastguard Worker          aco_ptr<Instruction>& instr = block.instructions[i];
177*61046927SAndroid Build Coastguard Worker          for (const Operand& op : instr->operands) {
178*61046927SAndroid Build Coastguard Worker             if (op.isTemp()) {
179*61046927SAndroid Build Coastguard Worker                use_info& info = ctx.ssa_infos[op.tempId()];
180*61046927SAndroid Build Coastguard Worker                info.num_uses++;
181*61046927SAndroid Build Coastguard Worker                info.last_use = std::max(info.last_use, instruction_idx + i);
182*61046927SAndroid Build Coastguard Worker             }
183*61046927SAndroid Build Coastguard Worker          }
184*61046927SAndroid Build Coastguard Worker       }
185*61046927SAndroid Build Coastguard Worker 
186*61046927SAndroid Build Coastguard Worker       /* All live-in variables at loop headers get an additional artificial use.
187*61046927SAndroid Build Coastguard Worker        * As we decrement the number of uses while processing the blocks, this
188*61046927SAndroid Build Coastguard Worker        * ensures that the number of uses won't becomes zero before the loop
189*61046927SAndroid Build Coastguard Worker        * (and the variables' live-ranges) end.
190*61046927SAndroid Build Coastguard Worker        */
191*61046927SAndroid Build Coastguard Worker       if (block.kind & block_kind_loop_header) {
192*61046927SAndroid Build Coastguard Worker          for (unsigned t : ctx.program->live.live_in[block.index])
193*61046927SAndroid Build Coastguard Worker             ctx.ssa_infos[t].num_uses++;
194*61046927SAndroid Build Coastguard Worker       }
195*61046927SAndroid Build Coastguard Worker 
196*61046927SAndroid Build Coastguard Worker       instruction_idx += block.instructions.size();
197*61046927SAndroid Build Coastguard Worker    }
198*61046927SAndroid Build Coastguard Worker }
199*61046927SAndroid Build Coastguard Worker 
200*61046927SAndroid Build Coastguard Worker bool
should_rematerialize(aco_ptr<Instruction> & instr)201*61046927SAndroid Build Coastguard Worker should_rematerialize(aco_ptr<Instruction>& instr)
202*61046927SAndroid Build Coastguard Worker {
203*61046927SAndroid Build Coastguard Worker    /* TODO: rematerialization is only supported for VOP1, SOP1 and PSEUDO */
204*61046927SAndroid Build Coastguard Worker    if (instr->format != Format::VOP1 && instr->format != Format::SOP1 &&
205*61046927SAndroid Build Coastguard Worker        instr->format != Format::PSEUDO && instr->format != Format::SOPK)
206*61046927SAndroid Build Coastguard Worker       return false;
207*61046927SAndroid Build Coastguard Worker    /* TODO: pseudo-instruction rematerialization is only supported for
208*61046927SAndroid Build Coastguard Worker     * p_create_vector/p_parallelcopy */
209*61046927SAndroid Build Coastguard Worker    if (instr->isPseudo() && instr->opcode != aco_opcode::p_create_vector &&
210*61046927SAndroid Build Coastguard Worker        instr->opcode != aco_opcode::p_parallelcopy)
211*61046927SAndroid Build Coastguard Worker       return false;
212*61046927SAndroid Build Coastguard Worker    if (instr->isSOPK() && instr->opcode != aco_opcode::s_movk_i32)
213*61046927SAndroid Build Coastguard Worker       return false;
214*61046927SAndroid Build Coastguard Worker 
215*61046927SAndroid Build Coastguard Worker    for (const Operand& op : instr->operands) {
216*61046927SAndroid Build Coastguard Worker       /* TODO: rematerialization using temporaries isn't yet supported */
217*61046927SAndroid Build Coastguard Worker       if (!op.isConstant())
218*61046927SAndroid Build Coastguard Worker          return false;
219*61046927SAndroid Build Coastguard Worker    }
220*61046927SAndroid Build Coastguard Worker 
221*61046927SAndroid Build Coastguard Worker    /* TODO: rematerialization with multiple definitions isn't yet supported */
222*61046927SAndroid Build Coastguard Worker    if (instr->definitions.size() > 1)
223*61046927SAndroid Build Coastguard Worker       return false;
224*61046927SAndroid Build Coastguard Worker 
225*61046927SAndroid Build Coastguard Worker    return true;
226*61046927SAndroid Build Coastguard Worker }
227*61046927SAndroid Build Coastguard Worker 
228*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>
do_reload(spill_ctx & ctx,Temp tmp,Temp new_name,uint32_t spill_id)229*61046927SAndroid Build Coastguard Worker do_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t spill_id)
230*61046927SAndroid Build Coastguard Worker {
231*61046927SAndroid Build Coastguard Worker    std::unordered_map<Temp, remat_info>::iterator remat = ctx.remat.find(tmp);
232*61046927SAndroid Build Coastguard Worker    if (remat != ctx.remat.end()) {
233*61046927SAndroid Build Coastguard Worker       Instruction* instr = remat->second.instr;
234*61046927SAndroid Build Coastguard Worker       assert((instr->isVOP1() || instr->isSOP1() || instr->isPseudo() || instr->isSOPK()) &&
235*61046927SAndroid Build Coastguard Worker              "unsupported");
236*61046927SAndroid Build Coastguard Worker       assert((instr->format != Format::PSEUDO || instr->opcode == aco_opcode::p_create_vector ||
237*61046927SAndroid Build Coastguard Worker               instr->opcode == aco_opcode::p_parallelcopy) &&
238*61046927SAndroid Build Coastguard Worker              "unsupported");
239*61046927SAndroid Build Coastguard Worker       assert(instr->definitions.size() == 1 && "unsupported");
240*61046927SAndroid Build Coastguard Worker 
241*61046927SAndroid Build Coastguard Worker       aco_ptr<Instruction> res;
242*61046927SAndroid Build Coastguard Worker       res.reset(create_instruction(instr->opcode, instr->format, instr->operands.size(),
243*61046927SAndroid Build Coastguard Worker                                    instr->definitions.size()));
244*61046927SAndroid Build Coastguard Worker       if (instr->isSOPK())
245*61046927SAndroid Build Coastguard Worker          res->salu().imm = instr->salu().imm;
246*61046927SAndroid Build Coastguard Worker 
247*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < instr->operands.size(); i++) {
248*61046927SAndroid Build Coastguard Worker          res->operands[i] = instr->operands[i];
249*61046927SAndroid Build Coastguard Worker          if (instr->operands[i].isTemp()) {
250*61046927SAndroid Build Coastguard Worker             assert(false && "unsupported");
251*61046927SAndroid Build Coastguard Worker             if (ctx.remat.count(instr->operands[i].getTemp()))
252*61046927SAndroid Build Coastguard Worker                ctx.unused_remats.erase(ctx.remat[instr->operands[i].getTemp()].instr);
253*61046927SAndroid Build Coastguard Worker          }
254*61046927SAndroid Build Coastguard Worker       }
255*61046927SAndroid Build Coastguard Worker       res->definitions[0] = Definition(new_name);
256*61046927SAndroid Build Coastguard Worker       return res;
257*61046927SAndroid Build Coastguard Worker    } else {
258*61046927SAndroid Build Coastguard Worker       aco_ptr<Instruction> reload{create_instruction(aco_opcode::p_reload, Format::PSEUDO, 1, 1)};
259*61046927SAndroid Build Coastguard Worker       reload->operands[0] = Operand::c32(spill_id);
260*61046927SAndroid Build Coastguard Worker       reload->definitions[0] = Definition(new_name);
261*61046927SAndroid Build Coastguard Worker       ctx.is_reloaded[spill_id] = true;
262*61046927SAndroid Build Coastguard Worker       return reload;
263*61046927SAndroid Build Coastguard Worker    }
264*61046927SAndroid Build Coastguard Worker }
265*61046927SAndroid Build Coastguard Worker 
266*61046927SAndroid Build Coastguard Worker void
get_rematerialize_info(spill_ctx & ctx)267*61046927SAndroid Build Coastguard Worker get_rematerialize_info(spill_ctx& ctx)
268*61046927SAndroid Build Coastguard Worker {
269*61046927SAndroid Build Coastguard Worker    for (Block& block : ctx.program->blocks) {
270*61046927SAndroid Build Coastguard Worker       bool logical = false;
271*61046927SAndroid Build Coastguard Worker       for (aco_ptr<Instruction>& instr : block.instructions) {
272*61046927SAndroid Build Coastguard Worker          if (instr->opcode == aco_opcode::p_logical_start)
273*61046927SAndroid Build Coastguard Worker             logical = true;
274*61046927SAndroid Build Coastguard Worker          else if (instr->opcode == aco_opcode::p_logical_end)
275*61046927SAndroid Build Coastguard Worker             logical = false;
276*61046927SAndroid Build Coastguard Worker          if (logical && should_rematerialize(instr)) {
277*61046927SAndroid Build Coastguard Worker             for (const Definition& def : instr->definitions) {
278*61046927SAndroid Build Coastguard Worker                if (def.isTemp()) {
279*61046927SAndroid Build Coastguard Worker                   ctx.remat[def.getTemp()] = remat_info{instr.get()};
280*61046927SAndroid Build Coastguard Worker                   ctx.unused_remats.insert(instr.get());
281*61046927SAndroid Build Coastguard Worker                }
282*61046927SAndroid Build Coastguard Worker             }
283*61046927SAndroid Build Coastguard Worker          }
284*61046927SAndroid Build Coastguard Worker       }
285*61046927SAndroid Build Coastguard Worker    }
286*61046927SAndroid Build Coastguard Worker }
287*61046927SAndroid Build Coastguard Worker 
288*61046927SAndroid Build Coastguard Worker RegisterDemand
init_live_in_vars(spill_ctx & ctx,Block * block,unsigned block_idx)289*61046927SAndroid Build Coastguard Worker init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx)
290*61046927SAndroid Build Coastguard Worker {
291*61046927SAndroid Build Coastguard Worker    RegisterDemand spilled_registers;
292*61046927SAndroid Build Coastguard Worker 
293*61046927SAndroid Build Coastguard Worker    /* first block, nothing was spilled before */
294*61046927SAndroid Build Coastguard Worker    if (block->linear_preds.empty())
295*61046927SAndroid Build Coastguard Worker       return {0, 0};
296*61046927SAndroid Build Coastguard Worker 
297*61046927SAndroid Build Coastguard Worker    /* live-in variables at the beginning of the current block */
298*61046927SAndroid Build Coastguard Worker    const IDSet& live_in = ctx.program->live.live_in[block_idx];
299*61046927SAndroid Build Coastguard Worker 
300*61046927SAndroid Build Coastguard Worker    /* loop header block */
301*61046927SAndroid Build Coastguard Worker    if (block->kind & block_kind_loop_header) {
302*61046927SAndroid Build Coastguard Worker       assert(block->linear_preds[0] == block_idx - 1);
303*61046927SAndroid Build Coastguard Worker       assert(block->logical_preds[0] == block_idx - 1);
304*61046927SAndroid Build Coastguard Worker 
305*61046927SAndroid Build Coastguard Worker       /* check how many live-through variables should be spilled */
306*61046927SAndroid Build Coastguard Worker       RegisterDemand reg_pressure = block->live_in_demand;
307*61046927SAndroid Build Coastguard Worker       RegisterDemand loop_demand = reg_pressure;
308*61046927SAndroid Build Coastguard Worker       unsigned i = block_idx;
309*61046927SAndroid Build Coastguard Worker       while (ctx.program->blocks[i].loop_nest_depth >= block->loop_nest_depth)
310*61046927SAndroid Build Coastguard Worker          loop_demand.update(ctx.program->blocks[i++].register_demand);
311*61046927SAndroid Build Coastguard Worker 
312*61046927SAndroid Build Coastguard Worker       for (auto spilled : ctx.spills_exit[block_idx - 1]) {
313*61046927SAndroid Build Coastguard Worker          /* variable is not live at loop entry: probably a phi operand */
314*61046927SAndroid Build Coastguard Worker          if (!live_in.count(spilled.first.id()))
315*61046927SAndroid Build Coastguard Worker             continue;
316*61046927SAndroid Build Coastguard Worker 
317*61046927SAndroid Build Coastguard Worker          /* keep live-through variables spilled */
318*61046927SAndroid Build Coastguard Worker          ctx.spills_entry[block_idx][spilled.first] = spilled.second;
319*61046927SAndroid Build Coastguard Worker          spilled_registers += spilled.first;
320*61046927SAndroid Build Coastguard Worker          loop_demand -= spilled.first;
321*61046927SAndroid Build Coastguard Worker       }
322*61046927SAndroid Build Coastguard Worker       if (!ctx.loop.empty()) {
323*61046927SAndroid Build Coastguard Worker          /* If this is a nested loop, keep variables from the outer loop spilled. */
324*61046927SAndroid Build Coastguard Worker          for (auto spilled : ctx.loop.back().spills) {
325*61046927SAndroid Build Coastguard Worker             /* If the inner loop comes after the last continue statement of the outer loop,
326*61046927SAndroid Build Coastguard Worker              * the loop-carried variables might not be live-in for the inner loop.
327*61046927SAndroid Build Coastguard Worker              */
328*61046927SAndroid Build Coastguard Worker             if (live_in.count(spilled.first.id()) &&
329*61046927SAndroid Build Coastguard Worker                 ctx.spills_entry[block_idx].insert(spilled).second) {
330*61046927SAndroid Build Coastguard Worker                spilled_registers += spilled.first;
331*61046927SAndroid Build Coastguard Worker                loop_demand -= spilled.first;
332*61046927SAndroid Build Coastguard Worker             }
333*61046927SAndroid Build Coastguard Worker          }
334*61046927SAndroid Build Coastguard Worker       }
335*61046927SAndroid Build Coastguard Worker 
336*61046927SAndroid Build Coastguard Worker       /* select more live-through variables and constants */
337*61046927SAndroid Build Coastguard Worker       RegType type = RegType::vgpr;
338*61046927SAndroid Build Coastguard Worker       while (loop_demand.exceeds(ctx.target_pressure)) {
339*61046927SAndroid Build Coastguard Worker          /* if VGPR demand is low enough, select SGPRs */
340*61046927SAndroid Build Coastguard Worker          if (type == RegType::vgpr && loop_demand.vgpr <= ctx.target_pressure.vgpr)
341*61046927SAndroid Build Coastguard Worker             type = RegType::sgpr;
342*61046927SAndroid Build Coastguard Worker          /* if SGPR demand is low enough, break */
343*61046927SAndroid Build Coastguard Worker          if (type == RegType::sgpr && loop_demand.sgpr <= ctx.target_pressure.sgpr)
344*61046927SAndroid Build Coastguard Worker             break;
345*61046927SAndroid Build Coastguard Worker 
346*61046927SAndroid Build Coastguard Worker          float score = 0.0;
347*61046927SAndroid Build Coastguard Worker          unsigned remat = 0;
348*61046927SAndroid Build Coastguard Worker          Temp to_spill;
349*61046927SAndroid Build Coastguard Worker          for (unsigned t : live_in) {
350*61046927SAndroid Build Coastguard Worker             Temp var = Temp(t, ctx.program->temp_rc[t]);
351*61046927SAndroid Build Coastguard Worker             if (var.type() != type || ctx.spills_entry[block_idx].count(var) ||
352*61046927SAndroid Build Coastguard Worker                 var.regClass().is_linear_vgpr())
353*61046927SAndroid Build Coastguard Worker                continue;
354*61046927SAndroid Build Coastguard Worker 
355*61046927SAndroid Build Coastguard Worker             unsigned can_remat = ctx.remat.count(var);
356*61046927SAndroid Build Coastguard Worker             if (can_remat > remat || (can_remat == remat && ctx.ssa_infos[t].score() > score)) {
357*61046927SAndroid Build Coastguard Worker                to_spill = var;
358*61046927SAndroid Build Coastguard Worker                score = ctx.ssa_infos[t].score();
359*61046927SAndroid Build Coastguard Worker                remat = can_remat;
360*61046927SAndroid Build Coastguard Worker             }
361*61046927SAndroid Build Coastguard Worker          }
362*61046927SAndroid Build Coastguard Worker 
363*61046927SAndroid Build Coastguard Worker          /* select SGPRs or break */
364*61046927SAndroid Build Coastguard Worker          if (score == 0.0) {
365*61046927SAndroid Build Coastguard Worker             if (type == RegType::sgpr)
366*61046927SAndroid Build Coastguard Worker                break;
367*61046927SAndroid Build Coastguard Worker             type = RegType::sgpr;
368*61046927SAndroid Build Coastguard Worker             continue;
369*61046927SAndroid Build Coastguard Worker          }
370*61046927SAndroid Build Coastguard Worker 
371*61046927SAndroid Build Coastguard Worker          ctx.add_to_spills(to_spill, ctx.spills_entry[block_idx]);
372*61046927SAndroid Build Coastguard Worker          spilled_registers += to_spill;
373*61046927SAndroid Build Coastguard Worker          loop_demand -= to_spill;
374*61046927SAndroid Build Coastguard Worker       }
375*61046927SAndroid Build Coastguard Worker 
376*61046927SAndroid Build Coastguard Worker       /* create new loop_info */
377*61046927SAndroid Build Coastguard Worker       loop_info info = {block_idx, ctx.spills_entry[block_idx], live_in};
378*61046927SAndroid Build Coastguard Worker       ctx.loop.emplace_back(std::move(info));
379*61046927SAndroid Build Coastguard Worker 
380*61046927SAndroid Build Coastguard Worker       /* shortcut */
381*61046927SAndroid Build Coastguard Worker       if (!loop_demand.exceeds(ctx.target_pressure))
382*61046927SAndroid Build Coastguard Worker          return spilled_registers;
383*61046927SAndroid Build Coastguard Worker 
384*61046927SAndroid Build Coastguard Worker       /* if reg pressure is too high at beginning of loop, add variables with furthest use */
385*61046927SAndroid Build Coastguard Worker       reg_pressure -= spilled_registers;
386*61046927SAndroid Build Coastguard Worker 
387*61046927SAndroid Build Coastguard Worker       while (reg_pressure.exceeds(ctx.target_pressure)) {
388*61046927SAndroid Build Coastguard Worker          float score = 0;
389*61046927SAndroid Build Coastguard Worker          Temp to_spill;
390*61046927SAndroid Build Coastguard Worker          type = reg_pressure.vgpr > ctx.target_pressure.vgpr ? RegType::vgpr : RegType::sgpr;
391*61046927SAndroid Build Coastguard Worker          for (aco_ptr<Instruction>& phi : block->instructions) {
392*61046927SAndroid Build Coastguard Worker             if (!is_phi(phi))
393*61046927SAndroid Build Coastguard Worker                break;
394*61046927SAndroid Build Coastguard Worker             if (!phi->definitions[0].isTemp() || phi->definitions[0].isKill())
395*61046927SAndroid Build Coastguard Worker                continue;
396*61046927SAndroid Build Coastguard Worker             Temp var = phi->definitions[0].getTemp();
397*61046927SAndroid Build Coastguard Worker             if (var.type() == type && !ctx.spills_entry[block_idx].count(var) &&
398*61046927SAndroid Build Coastguard Worker                 ctx.ssa_infos[var.id()].score() > score) {
399*61046927SAndroid Build Coastguard Worker                to_spill = var;
400*61046927SAndroid Build Coastguard Worker                score = ctx.ssa_infos[var.id()].score();
401*61046927SAndroid Build Coastguard Worker             }
402*61046927SAndroid Build Coastguard Worker          }
403*61046927SAndroid Build Coastguard Worker          assert(score != 0.0);
404*61046927SAndroid Build Coastguard Worker          ctx.add_to_spills(to_spill, ctx.spills_entry[block_idx]);
405*61046927SAndroid Build Coastguard Worker          spilled_registers += to_spill;
406*61046927SAndroid Build Coastguard Worker          reg_pressure -= to_spill;
407*61046927SAndroid Build Coastguard Worker       }
408*61046927SAndroid Build Coastguard Worker 
409*61046927SAndroid Build Coastguard Worker       return spilled_registers;
410*61046927SAndroid Build Coastguard Worker    }
411*61046927SAndroid Build Coastguard Worker 
412*61046927SAndroid Build Coastguard Worker    /* branch block */
413*61046927SAndroid Build Coastguard Worker    if (block->linear_preds.size() == 1 && !(block->kind & block_kind_loop_exit)) {
414*61046927SAndroid Build Coastguard Worker       /* keep variables spilled */
415*61046927SAndroid Build Coastguard Worker       unsigned pred_idx = block->linear_preds[0];
416*61046927SAndroid Build Coastguard Worker       for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
417*61046927SAndroid Build Coastguard Worker          if (pair.first.type() != RegType::sgpr)
418*61046927SAndroid Build Coastguard Worker             continue;
419*61046927SAndroid Build Coastguard Worker 
420*61046927SAndroid Build Coastguard Worker          if (live_in.count(pair.first.id())) {
421*61046927SAndroid Build Coastguard Worker             spilled_registers += pair.first;
422*61046927SAndroid Build Coastguard Worker             ctx.spills_entry[block_idx].emplace(pair);
423*61046927SAndroid Build Coastguard Worker          }
424*61046927SAndroid Build Coastguard Worker       }
425*61046927SAndroid Build Coastguard Worker 
426*61046927SAndroid Build Coastguard Worker       if (block->logical_preds.empty())
427*61046927SAndroid Build Coastguard Worker          return spilled_registers;
428*61046927SAndroid Build Coastguard Worker 
429*61046927SAndroid Build Coastguard Worker       pred_idx = block->logical_preds[0];
430*61046927SAndroid Build Coastguard Worker       for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx]) {
431*61046927SAndroid Build Coastguard Worker          if (pair.first.type() != RegType::vgpr)
432*61046927SAndroid Build Coastguard Worker             continue;
433*61046927SAndroid Build Coastguard Worker 
434*61046927SAndroid Build Coastguard Worker          if (live_in.count(pair.first.id())) {
435*61046927SAndroid Build Coastguard Worker             spilled_registers += pair.first;
436*61046927SAndroid Build Coastguard Worker             ctx.spills_entry[block_idx].emplace(pair);
437*61046927SAndroid Build Coastguard Worker          }
438*61046927SAndroid Build Coastguard Worker       }
439*61046927SAndroid Build Coastguard Worker 
440*61046927SAndroid Build Coastguard Worker       return spilled_registers;
441*61046927SAndroid Build Coastguard Worker    }
442*61046927SAndroid Build Coastguard Worker 
443*61046927SAndroid Build Coastguard Worker    /* else: merge block */
444*61046927SAndroid Build Coastguard Worker    std::map<Temp, bool> partial_spills;
445*61046927SAndroid Build Coastguard Worker 
446*61046927SAndroid Build Coastguard Worker    /* keep variables spilled on all incoming paths */
447*61046927SAndroid Build Coastguard Worker    for (unsigned t : live_in) {
448*61046927SAndroid Build Coastguard Worker       const RegClass rc = ctx.program->temp_rc[t];
449*61046927SAndroid Build Coastguard Worker       Temp var = Temp(t, rc);
450*61046927SAndroid Build Coastguard Worker       Block::edge_vec& preds = rc.is_linear() ? block->linear_preds : block->logical_preds;
451*61046927SAndroid Build Coastguard Worker 
452*61046927SAndroid Build Coastguard Worker       /* If it can be rematerialized, keep the variable spilled if all predecessors do not reload
453*61046927SAndroid Build Coastguard Worker        * it. Otherwise, if any predecessor reloads it, ensure it's reloaded on all other
454*61046927SAndroid Build Coastguard Worker        * predecessors. The idea is that it's better in practice to rematerialize redundantly than to
455*61046927SAndroid Build Coastguard Worker        * create lots of phis. */
456*61046927SAndroid Build Coastguard Worker       const bool remat = ctx.remat.count(var);
457*61046927SAndroid Build Coastguard Worker       /* If the variable is spilled at the current loop-header, spilling is essentially for free
458*61046927SAndroid Build Coastguard Worker        * while reloading is not. Thus, keep them spilled if they are at least partially spilled.
459*61046927SAndroid Build Coastguard Worker        */
460*61046927SAndroid Build Coastguard Worker       const bool avoid_respill = block->loop_nest_depth && ctx.loop.back().spills.count(var);
461*61046927SAndroid Build Coastguard Worker       bool spill = true;
462*61046927SAndroid Build Coastguard Worker       bool partial_spill = false;
463*61046927SAndroid Build Coastguard Worker       uint32_t spill_id = 0;
464*61046927SAndroid Build Coastguard Worker       for (unsigned pred_idx : preds) {
465*61046927SAndroid Build Coastguard Worker          if (!ctx.spills_exit[pred_idx].count(var)) {
466*61046927SAndroid Build Coastguard Worker             spill = false;
467*61046927SAndroid Build Coastguard Worker          } else {
468*61046927SAndroid Build Coastguard Worker             partial_spill = true;
469*61046927SAndroid Build Coastguard Worker             /* it might be that on one incoming path, the variable has a different spill_id, but
470*61046927SAndroid Build Coastguard Worker              * add_couple_code() will take care of that. */
471*61046927SAndroid Build Coastguard Worker             spill_id = ctx.spills_exit[pred_idx][var];
472*61046927SAndroid Build Coastguard Worker          }
473*61046927SAndroid Build Coastguard Worker       }
474*61046927SAndroid Build Coastguard Worker       spill |= (remat && partial_spill);
475*61046927SAndroid Build Coastguard Worker       spill |= (avoid_respill && partial_spill);
476*61046927SAndroid Build Coastguard Worker       if (spill) {
477*61046927SAndroid Build Coastguard Worker          ctx.spills_entry[block_idx][var] = spill_id;
478*61046927SAndroid Build Coastguard Worker          partial_spills.erase(var);
479*61046927SAndroid Build Coastguard Worker          spilled_registers += var;
480*61046927SAndroid Build Coastguard Worker       } else {
481*61046927SAndroid Build Coastguard Worker          partial_spills[var] = partial_spill;
482*61046927SAndroid Build Coastguard Worker       }
483*61046927SAndroid Build Coastguard Worker    }
484*61046927SAndroid Build Coastguard Worker 
485*61046927SAndroid Build Coastguard Worker    /* same for phis */
486*61046927SAndroid Build Coastguard Worker    for (aco_ptr<Instruction>& phi : block->instructions) {
487*61046927SAndroid Build Coastguard Worker       if (!is_phi(phi))
488*61046927SAndroid Build Coastguard Worker          break;
489*61046927SAndroid Build Coastguard Worker       if (!phi->definitions[0].isTemp() || phi->definitions[0].isKill())
490*61046927SAndroid Build Coastguard Worker          continue;
491*61046927SAndroid Build Coastguard Worker 
492*61046927SAndroid Build Coastguard Worker       Block::edge_vec& preds =
493*61046927SAndroid Build Coastguard Worker          phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
494*61046927SAndroid Build Coastguard Worker       bool is_all_undef = true;
495*61046927SAndroid Build Coastguard Worker       bool is_all_spilled = true;
496*61046927SAndroid Build Coastguard Worker       bool is_partial_spill = false;
497*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < phi->operands.size(); i++) {
498*61046927SAndroid Build Coastguard Worker          if (phi->operands[i].isUndefined())
499*61046927SAndroid Build Coastguard Worker             continue;
500*61046927SAndroid Build Coastguard Worker          bool spilled = phi->operands[i].isTemp() &&
501*61046927SAndroid Build Coastguard Worker                         ctx.spills_exit[preds[i]].count(phi->operands[i].getTemp());
502*61046927SAndroid Build Coastguard Worker          is_all_spilled &= spilled;
503*61046927SAndroid Build Coastguard Worker          is_partial_spill |= spilled;
504*61046927SAndroid Build Coastguard Worker          is_all_undef = false;
505*61046927SAndroid Build Coastguard Worker       }
506*61046927SAndroid Build Coastguard Worker 
507*61046927SAndroid Build Coastguard Worker       if (is_all_spilled && !is_all_undef) {
508*61046927SAndroid Build Coastguard Worker          /* The phi is spilled at all predecessors. Keep it spilled. */
509*61046927SAndroid Build Coastguard Worker          ctx.add_to_spills(phi->definitions[0].getTemp(), ctx.spills_entry[block_idx]);
510*61046927SAndroid Build Coastguard Worker          spilled_registers += phi->definitions[0].getTemp();
511*61046927SAndroid Build Coastguard Worker          partial_spills.erase(phi->definitions[0].getTemp());
512*61046927SAndroid Build Coastguard Worker       } else {
513*61046927SAndroid Build Coastguard Worker          /* Phis might increase the register pressure. */
514*61046927SAndroid Build Coastguard Worker          partial_spills[phi->definitions[0].getTemp()] = is_partial_spill;
515*61046927SAndroid Build Coastguard Worker       }
516*61046927SAndroid Build Coastguard Worker    }
517*61046927SAndroid Build Coastguard Worker 
518*61046927SAndroid Build Coastguard Worker    /* if reg pressure at first instruction is still too high, add partially spilled variables */
519*61046927SAndroid Build Coastguard Worker    RegisterDemand reg_pressure = block->live_in_demand;
520*61046927SAndroid Build Coastguard Worker    reg_pressure -= spilled_registers;
521*61046927SAndroid Build Coastguard Worker 
522*61046927SAndroid Build Coastguard Worker    while (reg_pressure.exceeds(ctx.target_pressure)) {
523*61046927SAndroid Build Coastguard Worker       assert(!partial_spills.empty());
524*61046927SAndroid Build Coastguard Worker       std::map<Temp, bool>::iterator it = partial_spills.begin();
525*61046927SAndroid Build Coastguard Worker       Temp to_spill = Temp();
526*61046927SAndroid Build Coastguard Worker       bool is_partial_spill = false;
527*61046927SAndroid Build Coastguard Worker       float score = 0.0;
528*61046927SAndroid Build Coastguard Worker       RegType type = reg_pressure.vgpr > ctx.target_pressure.vgpr ? RegType::vgpr : RegType::sgpr;
529*61046927SAndroid Build Coastguard Worker 
530*61046927SAndroid Build Coastguard Worker       while (it != partial_spills.end()) {
531*61046927SAndroid Build Coastguard Worker          assert(!ctx.spills_entry[block_idx].count(it->first));
532*61046927SAndroid Build Coastguard Worker 
533*61046927SAndroid Build Coastguard Worker          if (it->first.type() == type && !it->first.regClass().is_linear_vgpr() &&
534*61046927SAndroid Build Coastguard Worker              ((it->second && !is_partial_spill) ||
535*61046927SAndroid Build Coastguard Worker               (it->second == is_partial_spill && ctx.ssa_infos[it->first.id()].score() > score))) {
536*61046927SAndroid Build Coastguard Worker             score = ctx.ssa_infos[it->first.id()].score();
537*61046927SAndroid Build Coastguard Worker             to_spill = it->first;
538*61046927SAndroid Build Coastguard Worker             is_partial_spill = it->second;
539*61046927SAndroid Build Coastguard Worker          }
540*61046927SAndroid Build Coastguard Worker          ++it;
541*61046927SAndroid Build Coastguard Worker       }
542*61046927SAndroid Build Coastguard Worker       assert(score != 0.0);
543*61046927SAndroid Build Coastguard Worker       ctx.add_to_spills(to_spill, ctx.spills_entry[block_idx]);
544*61046927SAndroid Build Coastguard Worker       partial_spills.erase(to_spill);
545*61046927SAndroid Build Coastguard Worker       spilled_registers += to_spill;
546*61046927SAndroid Build Coastguard Worker       reg_pressure -= to_spill;
547*61046927SAndroid Build Coastguard Worker    }
548*61046927SAndroid Build Coastguard Worker 
549*61046927SAndroid Build Coastguard Worker    return spilled_registers;
550*61046927SAndroid Build Coastguard Worker }
551*61046927SAndroid Build Coastguard Worker 
552*61046927SAndroid Build Coastguard Worker void
add_coupling_code(spill_ctx & ctx,Block * block,IDSet & live_in)553*61046927SAndroid Build Coastguard Worker add_coupling_code(spill_ctx& ctx, Block* block, IDSet& live_in)
554*61046927SAndroid Build Coastguard Worker {
555*61046927SAndroid Build Coastguard Worker    const unsigned block_idx = block->index;
556*61046927SAndroid Build Coastguard Worker    /* No coupling code necessary */
557*61046927SAndroid Build Coastguard Worker    if (block->linear_preds.size() == 0)
558*61046927SAndroid Build Coastguard Worker       return;
559*61046927SAndroid Build Coastguard Worker 
560*61046927SAndroid Build Coastguard Worker    /* Branch block: update renames */
561*61046927SAndroid Build Coastguard Worker    if (block->linear_preds.size() == 1 &&
562*61046927SAndroid Build Coastguard Worker        !(block->kind & (block_kind_loop_exit | block_kind_loop_header))) {
563*61046927SAndroid Build Coastguard Worker       assert(ctx.processed[block->linear_preds[0]]);
564*61046927SAndroid Build Coastguard Worker 
565*61046927SAndroid Build Coastguard Worker       ctx.renames[block_idx] = ctx.renames[block->linear_preds[0]];
566*61046927SAndroid Build Coastguard Worker       if (!block->logical_preds.empty() && block->logical_preds[0] != block->linear_preds[0]) {
567*61046927SAndroid Build Coastguard Worker          for (auto it : ctx.renames[block->logical_preds[0]]) {
568*61046927SAndroid Build Coastguard Worker             if (it.first.type() == RegType::vgpr)
569*61046927SAndroid Build Coastguard Worker                ctx.renames[block_idx].insert_or_assign(it.first, it.second);
570*61046927SAndroid Build Coastguard Worker          }
571*61046927SAndroid Build Coastguard Worker       }
572*61046927SAndroid Build Coastguard Worker       return;
573*61046927SAndroid Build Coastguard Worker    }
574*61046927SAndroid Build Coastguard Worker 
575*61046927SAndroid Build Coastguard Worker    std::vector<aco_ptr<Instruction>> instructions;
576*61046927SAndroid Build Coastguard Worker 
577*61046927SAndroid Build Coastguard Worker    /* loop header and merge blocks: check if all (linear) predecessors have been processed */
578*61046927SAndroid Build Coastguard Worker    for (ASSERTED unsigned pred : block->linear_preds)
579*61046927SAndroid Build Coastguard Worker       assert(ctx.processed[pred]);
580*61046927SAndroid Build Coastguard Worker 
581*61046927SAndroid Build Coastguard Worker    /* iterate the phi nodes for which operands to spill at the predecessor */
582*61046927SAndroid Build Coastguard Worker    for (aco_ptr<Instruction>& phi : block->instructions) {
583*61046927SAndroid Build Coastguard Worker       if (!is_phi(phi))
584*61046927SAndroid Build Coastguard Worker          break;
585*61046927SAndroid Build Coastguard Worker 
586*61046927SAndroid Build Coastguard Worker       for (const Operand& op : phi->operands) {
587*61046927SAndroid Build Coastguard Worker          if (op.isTemp())
588*61046927SAndroid Build Coastguard Worker             ctx.ssa_infos[op.tempId()].num_uses--;
589*61046927SAndroid Build Coastguard Worker       }
590*61046927SAndroid Build Coastguard Worker 
591*61046927SAndroid Build Coastguard Worker       /* The phi is not spilled */
592*61046927SAndroid Build Coastguard Worker       if (!phi->definitions[0].isTemp() ||
593*61046927SAndroid Build Coastguard Worker           !ctx.spills_entry[block_idx].count(phi->definitions[0].getTemp()))
594*61046927SAndroid Build Coastguard Worker          continue;
595*61046927SAndroid Build Coastguard Worker 
596*61046927SAndroid Build Coastguard Worker       Block::edge_vec& preds =
597*61046927SAndroid Build Coastguard Worker          phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
598*61046927SAndroid Build Coastguard Worker       uint32_t def_spill_id = ctx.spills_entry[block_idx][phi->definitions[0].getTemp()];
599*61046927SAndroid Build Coastguard Worker       phi->definitions[0].setKill(true);
600*61046927SAndroid Build Coastguard Worker 
601*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < phi->operands.size(); i++) {
602*61046927SAndroid Build Coastguard Worker          if (phi->operands[i].isUndefined())
603*61046927SAndroid Build Coastguard Worker             continue;
604*61046927SAndroid Build Coastguard Worker 
605*61046927SAndroid Build Coastguard Worker          unsigned pred_idx = preds[i];
606*61046927SAndroid Build Coastguard Worker          Operand spill_op = phi->operands[i];
607*61046927SAndroid Build Coastguard Worker          phi->operands[i] = Operand(phi->definitions[0].regClass());
608*61046927SAndroid Build Coastguard Worker 
609*61046927SAndroid Build Coastguard Worker          if (spill_op.isTemp()) {
610*61046927SAndroid Build Coastguard Worker             assert(spill_op.isKill());
611*61046927SAndroid Build Coastguard Worker             Temp var = spill_op.getTemp();
612*61046927SAndroid Build Coastguard Worker 
613*61046927SAndroid Build Coastguard Worker             std::map<Temp, Temp>::iterator rename_it = ctx.renames[pred_idx].find(var);
614*61046927SAndroid Build Coastguard Worker             /* prevent the defining instruction from being DCE'd if it could be rematerialized */
615*61046927SAndroid Build Coastguard Worker             if (rename_it == ctx.renames[preds[i]].end() && ctx.remat.count(var))
616*61046927SAndroid Build Coastguard Worker                ctx.unused_remats.erase(ctx.remat[var].instr);
617*61046927SAndroid Build Coastguard Worker 
618*61046927SAndroid Build Coastguard Worker             /* check if variable is already spilled at predecessor */
619*61046927SAndroid Build Coastguard Worker             auto spilled = ctx.spills_exit[pred_idx].find(var);
620*61046927SAndroid Build Coastguard Worker             if (spilled != ctx.spills_exit[pred_idx].end()) {
621*61046927SAndroid Build Coastguard Worker                if (spilled->second != def_spill_id)
622*61046927SAndroid Build Coastguard Worker                   ctx.add_affinity(def_spill_id, spilled->second);
623*61046927SAndroid Build Coastguard Worker                continue;
624*61046927SAndroid Build Coastguard Worker             }
625*61046927SAndroid Build Coastguard Worker 
626*61046927SAndroid Build Coastguard Worker             /* If the phi operand has the same name as the definition,
627*61046927SAndroid Build Coastguard Worker              * add to predecessor's spilled variables, so that it gets
628*61046927SAndroid Build Coastguard Worker              * skipped in the loop below.
629*61046927SAndroid Build Coastguard Worker              */
630*61046927SAndroid Build Coastguard Worker             if (var == phi->definitions[0].getTemp())
631*61046927SAndroid Build Coastguard Worker                ctx.spills_exit[pred_idx][var] = def_spill_id;
632*61046927SAndroid Build Coastguard Worker 
633*61046927SAndroid Build Coastguard Worker             /* rename if necessary */
634*61046927SAndroid Build Coastguard Worker             if (rename_it != ctx.renames[pred_idx].end()) {
635*61046927SAndroid Build Coastguard Worker                spill_op.setTemp(rename_it->second);
636*61046927SAndroid Build Coastguard Worker                ctx.renames[pred_idx].erase(rename_it);
637*61046927SAndroid Build Coastguard Worker             }
638*61046927SAndroid Build Coastguard Worker          }
639*61046927SAndroid Build Coastguard Worker 
640*61046927SAndroid Build Coastguard Worker          /* add interferences */
641*61046927SAndroid Build Coastguard Worker          for (std::pair<Temp, uint32_t> pair : ctx.spills_exit[pred_idx])
642*61046927SAndroid Build Coastguard Worker             ctx.add_interference(def_spill_id, pair.second);
643*61046927SAndroid Build Coastguard Worker 
644*61046927SAndroid Build Coastguard Worker          aco_ptr<Instruction> spill{create_instruction(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
645*61046927SAndroid Build Coastguard Worker          spill->operands[0] = spill_op;
646*61046927SAndroid Build Coastguard Worker          spill->operands[1] = Operand::c32(def_spill_id);
647*61046927SAndroid Build Coastguard Worker          Block& pred = ctx.program->blocks[pred_idx];
648*61046927SAndroid Build Coastguard Worker          unsigned idx = pred.instructions.size();
649*61046927SAndroid Build Coastguard Worker          do {
650*61046927SAndroid Build Coastguard Worker             assert(idx != 0);
651*61046927SAndroid Build Coastguard Worker             idx--;
652*61046927SAndroid Build Coastguard Worker          } while (phi->opcode == aco_opcode::p_phi &&
653*61046927SAndroid Build Coastguard Worker                   pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
654*61046927SAndroid Build Coastguard Worker          std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
655*61046927SAndroid Build Coastguard Worker          pred.instructions.insert(it, std::move(spill));
656*61046927SAndroid Build Coastguard Worker       }
657*61046927SAndroid Build Coastguard Worker    }
658*61046927SAndroid Build Coastguard Worker 
659*61046927SAndroid Build Coastguard Worker    /* iterate all (other) spilled variables for which to spill at the predecessor */
660*61046927SAndroid Build Coastguard Worker    // TODO: would be better to have them sorted: first vgprs and first with longest distance
661*61046927SAndroid Build Coastguard Worker    for (std::pair<Temp, uint32_t> pair : ctx.spills_entry[block_idx]) {
662*61046927SAndroid Build Coastguard Worker       /* if variable is not live-in, it must be from a phi: this works because of CSSA form */
663*61046927SAndroid Build Coastguard Worker       if (!live_in.count(pair.first.id()))
664*61046927SAndroid Build Coastguard Worker          continue;
665*61046927SAndroid Build Coastguard Worker 
666*61046927SAndroid Build Coastguard Worker       Block::edge_vec& preds = pair.first.is_linear() ? block->linear_preds : block->logical_preds;
667*61046927SAndroid Build Coastguard Worker       for (unsigned pred_idx : preds) {
668*61046927SAndroid Build Coastguard Worker          /* variable is already spilled at predecessor */
669*61046927SAndroid Build Coastguard Worker          auto spilled = ctx.spills_exit[pred_idx].find(pair.first);
670*61046927SAndroid Build Coastguard Worker          if (spilled != ctx.spills_exit[pred_idx].end()) {
671*61046927SAndroid Build Coastguard Worker             if (spilled->second != pair.second)
672*61046927SAndroid Build Coastguard Worker                ctx.add_affinity(pair.second, spilled->second);
673*61046927SAndroid Build Coastguard Worker             continue;
674*61046927SAndroid Build Coastguard Worker          }
675*61046927SAndroid Build Coastguard Worker 
676*61046927SAndroid Build Coastguard Worker          /* If this variable is spilled through the entire loop, no need to re-spill.
677*61046927SAndroid Build Coastguard Worker           * It can be reloaded from the same spill-slot it got at the loop-preheader.
678*61046927SAndroid Build Coastguard Worker           * No need to add interferences since every spilled variable in the loop already
679*61046927SAndroid Build Coastguard Worker           * interferes with the spilled loop-variables. Make sure that the spill_ids match.
680*61046927SAndroid Build Coastguard Worker           */
681*61046927SAndroid Build Coastguard Worker          const uint32_t loop_nest_depth = std::min(ctx.program->blocks[pred_idx].loop_nest_depth,
682*61046927SAndroid Build Coastguard Worker                                                    ctx.program->blocks[block_idx].loop_nest_depth);
683*61046927SAndroid Build Coastguard Worker          if (loop_nest_depth) {
684*61046927SAndroid Build Coastguard Worker             auto spill = ctx.loop[loop_nest_depth - 1].spills.find(pair.first);
685*61046927SAndroid Build Coastguard Worker             if (spill != ctx.loop[loop_nest_depth - 1].spills.end() && spill->second == pair.second)
686*61046927SAndroid Build Coastguard Worker                continue;
687*61046927SAndroid Build Coastguard Worker          }
688*61046927SAndroid Build Coastguard Worker 
689*61046927SAndroid Build Coastguard Worker          /* add interferences between spilled variable and predecessors exit spills */
690*61046927SAndroid Build Coastguard Worker          for (std::pair<Temp, uint32_t> exit_spill : ctx.spills_exit[pred_idx])
691*61046927SAndroid Build Coastguard Worker             ctx.add_interference(exit_spill.second, pair.second);
692*61046927SAndroid Build Coastguard Worker 
693*61046927SAndroid Build Coastguard Worker          /* variable is in register at predecessor and has to be spilled */
694*61046927SAndroid Build Coastguard Worker          /* rename if necessary */
695*61046927SAndroid Build Coastguard Worker          Temp var = pair.first;
696*61046927SAndroid Build Coastguard Worker          std::map<Temp, Temp>::iterator rename_it = ctx.renames[pred_idx].find(var);
697*61046927SAndroid Build Coastguard Worker          if (rename_it != ctx.renames[pred_idx].end()) {
698*61046927SAndroid Build Coastguard Worker             var = rename_it->second;
699*61046927SAndroid Build Coastguard Worker             ctx.renames[pred_idx].erase(rename_it);
700*61046927SAndroid Build Coastguard Worker          }
701*61046927SAndroid Build Coastguard Worker 
702*61046927SAndroid Build Coastguard Worker          aco_ptr<Instruction> spill{create_instruction(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
703*61046927SAndroid Build Coastguard Worker          spill->operands[0] = Operand(var);
704*61046927SAndroid Build Coastguard Worker          spill->operands[1] = Operand::c32(pair.second);
705*61046927SAndroid Build Coastguard Worker          Block& pred = ctx.program->blocks[pred_idx];
706*61046927SAndroid Build Coastguard Worker          unsigned idx = pred.instructions.size();
707*61046927SAndroid Build Coastguard Worker          do {
708*61046927SAndroid Build Coastguard Worker             assert(idx != 0);
709*61046927SAndroid Build Coastguard Worker             idx--;
710*61046927SAndroid Build Coastguard Worker          } while (pair.first.type() == RegType::vgpr &&
711*61046927SAndroid Build Coastguard Worker                   pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
712*61046927SAndroid Build Coastguard Worker          std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
713*61046927SAndroid Build Coastguard Worker          pred.instructions.insert(it, std::move(spill));
714*61046927SAndroid Build Coastguard Worker       }
715*61046927SAndroid Build Coastguard Worker    }
716*61046927SAndroid Build Coastguard Worker 
717*61046927SAndroid Build Coastguard Worker    /* iterate phis for which operands to reload */
718*61046927SAndroid Build Coastguard Worker    for (aco_ptr<Instruction>& phi : block->instructions) {
719*61046927SAndroid Build Coastguard Worker       if (!is_phi(phi))
720*61046927SAndroid Build Coastguard Worker          break;
721*61046927SAndroid Build Coastguard Worker       if (phi->definitions[0].isKill())
722*61046927SAndroid Build Coastguard Worker          continue;
723*61046927SAndroid Build Coastguard Worker 
724*61046927SAndroid Build Coastguard Worker       assert(!phi->definitions[0].isTemp() ||
725*61046927SAndroid Build Coastguard Worker              !ctx.spills_entry[block_idx].count(phi->definitions[0].getTemp()));
726*61046927SAndroid Build Coastguard Worker 
727*61046927SAndroid Build Coastguard Worker       Block::edge_vec& preds =
728*61046927SAndroid Build Coastguard Worker          phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds;
729*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < phi->operands.size(); i++) {
730*61046927SAndroid Build Coastguard Worker          if (!phi->operands[i].isTemp())
731*61046927SAndroid Build Coastguard Worker             continue;
732*61046927SAndroid Build Coastguard Worker          unsigned pred_idx = preds[i];
733*61046927SAndroid Build Coastguard Worker 
734*61046927SAndroid Build Coastguard Worker          /* if the operand was reloaded, rename */
735*61046927SAndroid Build Coastguard Worker          if (!ctx.spills_exit[pred_idx].count(phi->operands[i].getTemp())) {
736*61046927SAndroid Build Coastguard Worker             std::map<Temp, Temp>::iterator it =
737*61046927SAndroid Build Coastguard Worker                ctx.renames[pred_idx].find(phi->operands[i].getTemp());
738*61046927SAndroid Build Coastguard Worker             if (it != ctx.renames[pred_idx].end()) {
739*61046927SAndroid Build Coastguard Worker                phi->operands[i].setTemp(it->second);
740*61046927SAndroid Build Coastguard Worker                /* prevent the defining instruction from being DCE'd if it could be rematerialized */
741*61046927SAndroid Build Coastguard Worker             } else {
742*61046927SAndroid Build Coastguard Worker                auto remat_it = ctx.remat.find(phi->operands[i].getTemp());
743*61046927SAndroid Build Coastguard Worker                if (remat_it != ctx.remat.end()) {
744*61046927SAndroid Build Coastguard Worker                   ctx.unused_remats.erase(remat_it->second.instr);
745*61046927SAndroid Build Coastguard Worker                }
746*61046927SAndroid Build Coastguard Worker             }
747*61046927SAndroid Build Coastguard Worker             continue;
748*61046927SAndroid Build Coastguard Worker          }
749*61046927SAndroid Build Coastguard Worker 
750*61046927SAndroid Build Coastguard Worker          Temp tmp = phi->operands[i].getTemp();
751*61046927SAndroid Build Coastguard Worker 
752*61046927SAndroid Build Coastguard Worker          /* reload phi operand at end of predecessor block */
753*61046927SAndroid Build Coastguard Worker          Temp new_name = ctx.program->allocateTmp(tmp.regClass());
754*61046927SAndroid Build Coastguard Worker          Block& pred = ctx.program->blocks[pred_idx];
755*61046927SAndroid Build Coastguard Worker          unsigned idx = pred.instructions.size();
756*61046927SAndroid Build Coastguard Worker          do {
757*61046927SAndroid Build Coastguard Worker             assert(idx != 0);
758*61046927SAndroid Build Coastguard Worker             idx--;
759*61046927SAndroid Build Coastguard Worker          } while (phi->opcode == aco_opcode::p_phi &&
760*61046927SAndroid Build Coastguard Worker                   pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
761*61046927SAndroid Build Coastguard Worker          std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
762*61046927SAndroid Build Coastguard Worker          aco_ptr<Instruction> reload =
763*61046927SAndroid Build Coastguard Worker             do_reload(ctx, tmp, new_name, ctx.spills_exit[pred_idx][tmp]);
764*61046927SAndroid Build Coastguard Worker 
765*61046927SAndroid Build Coastguard Worker          /* reload spilled exec mask directly to exec */
766*61046927SAndroid Build Coastguard Worker          if (!phi->definitions[0].isTemp()) {
767*61046927SAndroid Build Coastguard Worker             assert(phi->definitions[0].isFixed() && phi->definitions[0].physReg() == exec);
768*61046927SAndroid Build Coastguard Worker             reload->definitions[0] = phi->definitions[0];
769*61046927SAndroid Build Coastguard Worker             phi->operands[i] = Operand(exec, ctx.program->lane_mask);
770*61046927SAndroid Build Coastguard Worker          } else {
771*61046927SAndroid Build Coastguard Worker             ctx.spills_exit[pred_idx].erase(tmp);
772*61046927SAndroid Build Coastguard Worker             ctx.renames[pred_idx][tmp] = new_name;
773*61046927SAndroid Build Coastguard Worker             phi->operands[i].setTemp(new_name);
774*61046927SAndroid Build Coastguard Worker          }
775*61046927SAndroid Build Coastguard Worker 
776*61046927SAndroid Build Coastguard Worker          pred.instructions.insert(it, std::move(reload));
777*61046927SAndroid Build Coastguard Worker       }
778*61046927SAndroid Build Coastguard Worker    }
779*61046927SAndroid Build Coastguard Worker 
780*61046927SAndroid Build Coastguard Worker    /* iterate live variables for which to reload */
781*61046927SAndroid Build Coastguard Worker    for (unsigned t : live_in) {
782*61046927SAndroid Build Coastguard Worker       const RegClass rc = ctx.program->temp_rc[t];
783*61046927SAndroid Build Coastguard Worker       Temp var = Temp(t, rc);
784*61046927SAndroid Build Coastguard Worker 
785*61046927SAndroid Build Coastguard Worker       /* skip spilled variables */
786*61046927SAndroid Build Coastguard Worker       if (ctx.spills_entry[block_idx].count(var))
787*61046927SAndroid Build Coastguard Worker          continue;
788*61046927SAndroid Build Coastguard Worker 
789*61046927SAndroid Build Coastguard Worker       Block::edge_vec& preds = rc.is_linear() ? block->linear_preds : block->logical_preds;
790*61046927SAndroid Build Coastguard Worker       for (unsigned pred_idx : preds) {
791*61046927SAndroid Build Coastguard Worker          /* skip if the variable is not spilled at the predecessor */
792*61046927SAndroid Build Coastguard Worker          if (!ctx.spills_exit[pred_idx].count(var))
793*61046927SAndroid Build Coastguard Worker             continue;
794*61046927SAndroid Build Coastguard Worker 
795*61046927SAndroid Build Coastguard Worker          /* variable is spilled at predecessor and has to be reloaded */
796*61046927SAndroid Build Coastguard Worker          Temp new_name = ctx.program->allocateTmp(rc);
797*61046927SAndroid Build Coastguard Worker          Block& pred = ctx.program->blocks[pred_idx];
798*61046927SAndroid Build Coastguard Worker          unsigned idx = pred.instructions.size();
799*61046927SAndroid Build Coastguard Worker          do {
800*61046927SAndroid Build Coastguard Worker             assert(idx != 0);
801*61046927SAndroid Build Coastguard Worker             idx--;
802*61046927SAndroid Build Coastguard Worker          } while (rc.type() == RegType::vgpr &&
803*61046927SAndroid Build Coastguard Worker                   pred.instructions[idx]->opcode != aco_opcode::p_logical_end);
804*61046927SAndroid Build Coastguard Worker          std::vector<aco_ptr<Instruction>>::iterator it = std::next(pred.instructions.begin(), idx);
805*61046927SAndroid Build Coastguard Worker 
806*61046927SAndroid Build Coastguard Worker          aco_ptr<Instruction> reload =
807*61046927SAndroid Build Coastguard Worker             do_reload(ctx, var, new_name, ctx.spills_exit[pred.index][var]);
808*61046927SAndroid Build Coastguard Worker          pred.instructions.insert(it, std::move(reload));
809*61046927SAndroid Build Coastguard Worker 
810*61046927SAndroid Build Coastguard Worker          ctx.spills_exit[pred.index].erase(var);
811*61046927SAndroid Build Coastguard Worker          ctx.renames[pred.index][var] = new_name;
812*61046927SAndroid Build Coastguard Worker       }
813*61046927SAndroid Build Coastguard Worker 
814*61046927SAndroid Build Coastguard Worker       /* check if we have to create a new phi for this variable */
815*61046927SAndroid Build Coastguard Worker       Temp rename = Temp();
816*61046927SAndroid Build Coastguard Worker       bool is_same = true;
817*61046927SAndroid Build Coastguard Worker       for (unsigned pred_idx : preds) {
818*61046927SAndroid Build Coastguard Worker          if (!ctx.renames[pred_idx].count(var)) {
819*61046927SAndroid Build Coastguard Worker             if (rename == Temp())
820*61046927SAndroid Build Coastguard Worker                rename = var;
821*61046927SAndroid Build Coastguard Worker             else
822*61046927SAndroid Build Coastguard Worker                is_same = rename == var;
823*61046927SAndroid Build Coastguard Worker          } else {
824*61046927SAndroid Build Coastguard Worker             if (rename == Temp())
825*61046927SAndroid Build Coastguard Worker                rename = ctx.renames[pred_idx][var];
826*61046927SAndroid Build Coastguard Worker             else
827*61046927SAndroid Build Coastguard Worker                is_same = rename == ctx.renames[pred_idx][var];
828*61046927SAndroid Build Coastguard Worker          }
829*61046927SAndroid Build Coastguard Worker 
830*61046927SAndroid Build Coastguard Worker          if (!is_same)
831*61046927SAndroid Build Coastguard Worker             break;
832*61046927SAndroid Build Coastguard Worker       }
833*61046927SAndroid Build Coastguard Worker 
834*61046927SAndroid Build Coastguard Worker       if (!is_same) {
835*61046927SAndroid Build Coastguard Worker          /* the variable was renamed differently in the predecessors: we have to create a phi */
836*61046927SAndroid Build Coastguard Worker          aco_opcode opcode = rc.is_linear() ? aco_opcode::p_linear_phi : aco_opcode::p_phi;
837*61046927SAndroid Build Coastguard Worker          aco_ptr<Instruction> phi{create_instruction(opcode, Format::PSEUDO, preds.size(), 1)};
838*61046927SAndroid Build Coastguard Worker          rename = ctx.program->allocateTmp(rc);
839*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i < phi->operands.size(); i++) {
840*61046927SAndroid Build Coastguard Worker             Temp tmp;
841*61046927SAndroid Build Coastguard Worker             if (ctx.renames[preds[i]].count(var)) {
842*61046927SAndroid Build Coastguard Worker                tmp = ctx.renames[preds[i]][var];
843*61046927SAndroid Build Coastguard Worker             } else if (preds[i] >= block_idx) {
844*61046927SAndroid Build Coastguard Worker                tmp = rename;
845*61046927SAndroid Build Coastguard Worker             } else {
846*61046927SAndroid Build Coastguard Worker                tmp = var;
847*61046927SAndroid Build Coastguard Worker                /* prevent the defining instruction from being DCE'd if it could be rematerialized */
848*61046927SAndroid Build Coastguard Worker                if (ctx.remat.count(tmp))
849*61046927SAndroid Build Coastguard Worker                   ctx.unused_remats.erase(ctx.remat[tmp].instr);
850*61046927SAndroid Build Coastguard Worker             }
851*61046927SAndroid Build Coastguard Worker             phi->operands[i] = Operand(tmp);
852*61046927SAndroid Build Coastguard Worker          }
853*61046927SAndroid Build Coastguard Worker          phi->definitions[0] = Definition(rename);
854*61046927SAndroid Build Coastguard Worker          phi->register_demand = block->live_in_demand;
855*61046927SAndroid Build Coastguard Worker          block->instructions.insert(block->instructions.begin(), std::move(phi));
856*61046927SAndroid Build Coastguard Worker       }
857*61046927SAndroid Build Coastguard Worker 
858*61046927SAndroid Build Coastguard Worker       /* the variable was renamed: add new name to renames */
859*61046927SAndroid Build Coastguard Worker       if (!(rename == Temp() || rename == var))
860*61046927SAndroid Build Coastguard Worker          ctx.renames[block_idx][var] = rename;
861*61046927SAndroid Build Coastguard Worker    }
862*61046927SAndroid Build Coastguard Worker }
863*61046927SAndroid Build Coastguard Worker 
864*61046927SAndroid Build Coastguard Worker void
process_block(spill_ctx & ctx,unsigned block_idx,Block * block,RegisterDemand spilled_registers)865*61046927SAndroid Build Coastguard Worker process_block(spill_ctx& ctx, unsigned block_idx, Block* block, RegisterDemand spilled_registers)
866*61046927SAndroid Build Coastguard Worker {
867*61046927SAndroid Build Coastguard Worker    assert(!ctx.processed[block_idx]);
868*61046927SAndroid Build Coastguard Worker 
869*61046927SAndroid Build Coastguard Worker    std::vector<aco_ptr<Instruction>> instructions;
870*61046927SAndroid Build Coastguard Worker    unsigned idx = 0;
871*61046927SAndroid Build Coastguard Worker 
872*61046927SAndroid Build Coastguard Worker    /* phis are handled separately */
873*61046927SAndroid Build Coastguard Worker    while (block->instructions[idx]->opcode == aco_opcode::p_phi ||
874*61046927SAndroid Build Coastguard Worker           block->instructions[idx]->opcode == aco_opcode::p_linear_phi) {
875*61046927SAndroid Build Coastguard Worker       const Definition def = block->instructions[idx]->definitions[0];
876*61046927SAndroid Build Coastguard Worker       if (def.isTemp() && !def.isKill() && def.tempId() < ctx.ssa_infos.size())
877*61046927SAndroid Build Coastguard Worker          ctx.program->live.live_in[block_idx].insert(def.tempId());
878*61046927SAndroid Build Coastguard Worker       instructions.emplace_back(std::move(block->instructions[idx++]));
879*61046927SAndroid Build Coastguard Worker    }
880*61046927SAndroid Build Coastguard Worker 
881*61046927SAndroid Build Coastguard Worker    auto& current_spills = ctx.spills_exit[block_idx];
882*61046927SAndroid Build Coastguard Worker 
883*61046927SAndroid Build Coastguard Worker    while (idx < block->instructions.size()) {
884*61046927SAndroid Build Coastguard Worker       aco_ptr<Instruction>& instr = block->instructions[idx];
885*61046927SAndroid Build Coastguard Worker 
886*61046927SAndroid Build Coastguard Worker       /* Spilling is handled as part of phis (they should always have the same or higher register
887*61046927SAndroid Build Coastguard Worker        * demand). If we try to spill here, we might not be able to reduce the register demand enough
888*61046927SAndroid Build Coastguard Worker        * because there is no path to spill constant/undef phi operands. */
889*61046927SAndroid Build Coastguard Worker       if (instr->opcode == aco_opcode::p_branch) {
890*61046927SAndroid Build Coastguard Worker          instructions.emplace_back(std::move(instr));
891*61046927SAndroid Build Coastguard Worker          idx++;
892*61046927SAndroid Build Coastguard Worker          continue;
893*61046927SAndroid Build Coastguard Worker       }
894*61046927SAndroid Build Coastguard Worker 
895*61046927SAndroid Build Coastguard Worker       std::map<Temp, std::pair<Temp, uint32_t>> reloads;
896*61046927SAndroid Build Coastguard Worker 
897*61046927SAndroid Build Coastguard Worker       /* rename and reload operands */
898*61046927SAndroid Build Coastguard Worker       for (Operand& op : instr->operands) {
899*61046927SAndroid Build Coastguard Worker          if (!op.isTemp())
900*61046927SAndroid Build Coastguard Worker             continue;
901*61046927SAndroid Build Coastguard Worker 
902*61046927SAndroid Build Coastguard Worker          if (op.isFirstKill())
903*61046927SAndroid Build Coastguard Worker             ctx.program->live.live_in[block_idx].erase(op.tempId());
904*61046927SAndroid Build Coastguard Worker          ctx.ssa_infos[op.tempId()].num_uses--;
905*61046927SAndroid Build Coastguard Worker 
906*61046927SAndroid Build Coastguard Worker          if (!current_spills.count(op.getTemp()))
907*61046927SAndroid Build Coastguard Worker             continue;
908*61046927SAndroid Build Coastguard Worker 
909*61046927SAndroid Build Coastguard Worker          /* the Operand is spilled: add it to reloads */
910*61046927SAndroid Build Coastguard Worker          Temp new_tmp = ctx.program->allocateTmp(op.regClass());
911*61046927SAndroid Build Coastguard Worker          ctx.renames[block_idx][op.getTemp()] = new_tmp;
912*61046927SAndroid Build Coastguard Worker          reloads[new_tmp] = std::make_pair(op.getTemp(), current_spills[op.getTemp()]);
913*61046927SAndroid Build Coastguard Worker          current_spills.erase(op.getTemp());
914*61046927SAndroid Build Coastguard Worker          spilled_registers -= new_tmp;
915*61046927SAndroid Build Coastguard Worker       }
916*61046927SAndroid Build Coastguard Worker 
917*61046927SAndroid Build Coastguard Worker       /* check if register demand is low enough during and after the current instruction */
918*61046927SAndroid Build Coastguard Worker       if (block->register_demand.exceeds(ctx.target_pressure)) {
919*61046927SAndroid Build Coastguard Worker          RegisterDemand new_demand = instr->register_demand;
920*61046927SAndroid Build Coastguard Worker 
921*61046927SAndroid Build Coastguard Worker          /* if reg pressure is too high, spill variable with furthest next use */
922*61046927SAndroid Build Coastguard Worker          while ((new_demand - spilled_registers).exceeds(ctx.target_pressure)) {
923*61046927SAndroid Build Coastguard Worker             float score = 0.0;
924*61046927SAndroid Build Coastguard Worker             Temp to_spill;
925*61046927SAndroid Build Coastguard Worker             unsigned do_rematerialize = 0;
926*61046927SAndroid Build Coastguard Worker             unsigned avoid_respill = 0;
927*61046927SAndroid Build Coastguard Worker             RegType type = RegType::sgpr;
928*61046927SAndroid Build Coastguard Worker             if (new_demand.vgpr - spilled_registers.vgpr > ctx.target_pressure.vgpr)
929*61046927SAndroid Build Coastguard Worker                type = RegType::vgpr;
930*61046927SAndroid Build Coastguard Worker 
931*61046927SAndroid Build Coastguard Worker             for (unsigned t : ctx.program->live.live_in[block_idx]) {
932*61046927SAndroid Build Coastguard Worker                RegClass rc = ctx.program->temp_rc[t];
933*61046927SAndroid Build Coastguard Worker                Temp var = Temp(t, rc);
934*61046927SAndroid Build Coastguard Worker                if (rc.type() != type || current_spills.count(var) || rc.is_linear_vgpr())
935*61046927SAndroid Build Coastguard Worker                   continue;
936*61046927SAndroid Build Coastguard Worker 
937*61046927SAndroid Build Coastguard Worker                unsigned can_rematerialize = ctx.remat.count(var);
938*61046927SAndroid Build Coastguard Worker                unsigned loop_variable = block->loop_nest_depth && ctx.loop.back().spills.count(var);
939*61046927SAndroid Build Coastguard Worker                if (avoid_respill > loop_variable || do_rematerialize > can_rematerialize)
940*61046927SAndroid Build Coastguard Worker                   continue;
941*61046927SAndroid Build Coastguard Worker 
942*61046927SAndroid Build Coastguard Worker                if (can_rematerialize > do_rematerialize || loop_variable > avoid_respill ||
943*61046927SAndroid Build Coastguard Worker                    ctx.ssa_infos[t].score() > score) {
944*61046927SAndroid Build Coastguard Worker                   /* Don't spill operands */
945*61046927SAndroid Build Coastguard Worker                   if (std::any_of(instr->operands.begin(), instr->operands.end(),
946*61046927SAndroid Build Coastguard Worker                                   [&](Operand& op) { return op.isTemp() && op.getTemp() == var; }))
947*61046927SAndroid Build Coastguard Worker                      continue;
948*61046927SAndroid Build Coastguard Worker 
949*61046927SAndroid Build Coastguard Worker                   to_spill = var;
950*61046927SAndroid Build Coastguard Worker                   score = ctx.ssa_infos[t].score();
951*61046927SAndroid Build Coastguard Worker                   do_rematerialize = can_rematerialize;
952*61046927SAndroid Build Coastguard Worker                   avoid_respill = loop_variable;
953*61046927SAndroid Build Coastguard Worker                }
954*61046927SAndroid Build Coastguard Worker             }
955*61046927SAndroid Build Coastguard Worker             assert(score != 0.0);
956*61046927SAndroid Build Coastguard Worker 
957*61046927SAndroid Build Coastguard Worker             if (avoid_respill) {
958*61046927SAndroid Build Coastguard Worker                /* This variable is spilled at the loop-header of the current loop.
959*61046927SAndroid Build Coastguard Worker                 * Re-use the spill-slot in order to avoid an extra store.
960*61046927SAndroid Build Coastguard Worker                 */
961*61046927SAndroid Build Coastguard Worker                current_spills[to_spill] = ctx.loop.back().spills[to_spill];
962*61046927SAndroid Build Coastguard Worker                spilled_registers += to_spill;
963*61046927SAndroid Build Coastguard Worker                continue;
964*61046927SAndroid Build Coastguard Worker             }
965*61046927SAndroid Build Coastguard Worker 
966*61046927SAndroid Build Coastguard Worker             uint32_t spill_id = ctx.add_to_spills(to_spill, current_spills);
967*61046927SAndroid Build Coastguard Worker             /* add interferences with reloads */
968*61046927SAndroid Build Coastguard Worker             for (std::pair<const Temp, std::pair<Temp, uint32_t>>& pair : reloads)
969*61046927SAndroid Build Coastguard Worker                ctx.add_interference(spill_id, pair.second.second);
970*61046927SAndroid Build Coastguard Worker 
971*61046927SAndroid Build Coastguard Worker             spilled_registers += to_spill;
972*61046927SAndroid Build Coastguard Worker 
973*61046927SAndroid Build Coastguard Worker             /* rename if necessary */
974*61046927SAndroid Build Coastguard Worker             if (ctx.renames[block_idx].count(to_spill)) {
975*61046927SAndroid Build Coastguard Worker                to_spill = ctx.renames[block_idx][to_spill];
976*61046927SAndroid Build Coastguard Worker             }
977*61046927SAndroid Build Coastguard Worker 
978*61046927SAndroid Build Coastguard Worker             /* add spill to new instructions */
979*61046927SAndroid Build Coastguard Worker             aco_ptr<Instruction> spill{
980*61046927SAndroid Build Coastguard Worker                create_instruction(aco_opcode::p_spill, Format::PSEUDO, 2, 0)};
981*61046927SAndroid Build Coastguard Worker             spill->operands[0] = Operand(to_spill);
982*61046927SAndroid Build Coastguard Worker             spill->operands[1] = Operand::c32(spill_id);
983*61046927SAndroid Build Coastguard Worker             instructions.emplace_back(std::move(spill));
984*61046927SAndroid Build Coastguard Worker          }
985*61046927SAndroid Build Coastguard Worker       }
986*61046927SAndroid Build Coastguard Worker 
987*61046927SAndroid Build Coastguard Worker       for (const Definition& def : instr->definitions) {
988*61046927SAndroid Build Coastguard Worker          if (def.isTemp() && !def.isKill())
989*61046927SAndroid Build Coastguard Worker             ctx.program->live.live_in[block_idx].insert(def.tempId());
990*61046927SAndroid Build Coastguard Worker       }
991*61046927SAndroid Build Coastguard Worker       /* rename operands */
992*61046927SAndroid Build Coastguard Worker       for (Operand& op : instr->operands) {
993*61046927SAndroid Build Coastguard Worker          if (op.isTemp()) {
994*61046927SAndroid Build Coastguard Worker             auto rename_it = ctx.renames[block_idx].find(op.getTemp());
995*61046927SAndroid Build Coastguard Worker             if (rename_it != ctx.renames[block_idx].end()) {
996*61046927SAndroid Build Coastguard Worker                op.setTemp(rename_it->second);
997*61046927SAndroid Build Coastguard Worker             } else {
998*61046927SAndroid Build Coastguard Worker                /* prevent its defining instruction from being DCE'd if it could be rematerialized */
999*61046927SAndroid Build Coastguard Worker                auto remat_it = ctx.remat.find(op.getTemp());
1000*61046927SAndroid Build Coastguard Worker                if (remat_it != ctx.remat.end()) {
1001*61046927SAndroid Build Coastguard Worker                   ctx.unused_remats.erase(remat_it->second.instr);
1002*61046927SAndroid Build Coastguard Worker                }
1003*61046927SAndroid Build Coastguard Worker             }
1004*61046927SAndroid Build Coastguard Worker          }
1005*61046927SAndroid Build Coastguard Worker       }
1006*61046927SAndroid Build Coastguard Worker 
1007*61046927SAndroid Build Coastguard Worker       /* add reloads and instruction to new instructions */
1008*61046927SAndroid Build Coastguard Worker       for (std::pair<const Temp, std::pair<Temp, uint32_t>>& pair : reloads) {
1009*61046927SAndroid Build Coastguard Worker          aco_ptr<Instruction> reload =
1010*61046927SAndroid Build Coastguard Worker             do_reload(ctx, pair.second.first, pair.first, pair.second.second);
1011*61046927SAndroid Build Coastguard Worker          instructions.emplace_back(std::move(reload));
1012*61046927SAndroid Build Coastguard Worker       }
1013*61046927SAndroid Build Coastguard Worker       instructions.emplace_back(std::move(instr));
1014*61046927SAndroid Build Coastguard Worker       idx++;
1015*61046927SAndroid Build Coastguard Worker    }
1016*61046927SAndroid Build Coastguard Worker 
1017*61046927SAndroid Build Coastguard Worker    block->instructions = std::move(instructions);
1018*61046927SAndroid Build Coastguard Worker }
1019*61046927SAndroid Build Coastguard Worker 
1020*61046927SAndroid Build Coastguard Worker void
spill_block(spill_ctx & ctx,unsigned block_idx)1021*61046927SAndroid Build Coastguard Worker spill_block(spill_ctx& ctx, unsigned block_idx)
1022*61046927SAndroid Build Coastguard Worker {
1023*61046927SAndroid Build Coastguard Worker    Block* block = &ctx.program->blocks[block_idx];
1024*61046927SAndroid Build Coastguard Worker 
1025*61046927SAndroid Build Coastguard Worker    /* determine set of variables which are spilled at the beginning of the block */
1026*61046927SAndroid Build Coastguard Worker    RegisterDemand spilled_registers = init_live_in_vars(ctx, block, block_idx);
1027*61046927SAndroid Build Coastguard Worker 
1028*61046927SAndroid Build Coastguard Worker    if (!(block->kind & block_kind_loop_header)) {
1029*61046927SAndroid Build Coastguard Worker       /* add spill/reload code on incoming control flow edges */
1030*61046927SAndroid Build Coastguard Worker       add_coupling_code(ctx, block, ctx.program->live.live_in[block_idx]);
1031*61046927SAndroid Build Coastguard Worker    }
1032*61046927SAndroid Build Coastguard Worker 
1033*61046927SAndroid Build Coastguard Worker    assert(ctx.spills_exit[block_idx].empty());
1034*61046927SAndroid Build Coastguard Worker    ctx.spills_exit[block_idx] = ctx.spills_entry[block_idx];
1035*61046927SAndroid Build Coastguard Worker    process_block(ctx, block_idx, block, spilled_registers);
1036*61046927SAndroid Build Coastguard Worker 
1037*61046927SAndroid Build Coastguard Worker    ctx.processed[block_idx] = true;
1038*61046927SAndroid Build Coastguard Worker 
1039*61046927SAndroid Build Coastguard Worker    /* check if the next block leaves the current loop */
1040*61046927SAndroid Build Coastguard Worker    if (block->loop_nest_depth == 0 ||
1041*61046927SAndroid Build Coastguard Worker        ctx.program->blocks[block_idx + 1].loop_nest_depth >= block->loop_nest_depth)
1042*61046927SAndroid Build Coastguard Worker       return;
1043*61046927SAndroid Build Coastguard Worker 
1044*61046927SAndroid Build Coastguard Worker    uint32_t loop_header_idx = ctx.loop.back().index;
1045*61046927SAndroid Build Coastguard Worker 
1046*61046927SAndroid Build Coastguard Worker    /* preserve original renames at end of loop header block */
1047*61046927SAndroid Build Coastguard Worker    aco::map<Temp, Temp> renames = std::move(ctx.renames[loop_header_idx]);
1048*61046927SAndroid Build Coastguard Worker 
1049*61046927SAndroid Build Coastguard Worker    /* add coupling code to all loop header predecessors */
1050*61046927SAndroid Build Coastguard Worker    for (unsigned t : ctx.loop.back().live_in)
1051*61046927SAndroid Build Coastguard Worker       ctx.ssa_infos[t].num_uses--;
1052*61046927SAndroid Build Coastguard Worker    add_coupling_code(ctx, &ctx.program->blocks[loop_header_idx], ctx.loop.back().live_in);
1053*61046927SAndroid Build Coastguard Worker    renames.swap(ctx.renames[loop_header_idx]);
1054*61046927SAndroid Build Coastguard Worker 
1055*61046927SAndroid Build Coastguard Worker    /* remove loop header info from stack */
1056*61046927SAndroid Build Coastguard Worker    ctx.loop.pop_back();
1057*61046927SAndroid Build Coastguard Worker    if (renames.empty())
1058*61046927SAndroid Build Coastguard Worker       return;
1059*61046927SAndroid Build Coastguard Worker 
1060*61046927SAndroid Build Coastguard Worker    /* Add the new renames to each block */
1061*61046927SAndroid Build Coastguard Worker    for (std::pair<Temp, Temp> rename : renames) {
1062*61046927SAndroid Build Coastguard Worker       /* If there is already a rename, don't overwrite it. */
1063*61046927SAndroid Build Coastguard Worker       for (unsigned idx = loop_header_idx; idx <= block_idx; idx++)
1064*61046927SAndroid Build Coastguard Worker          ctx.renames[idx].insert(rename);
1065*61046927SAndroid Build Coastguard Worker    }
1066*61046927SAndroid Build Coastguard Worker 
1067*61046927SAndroid Build Coastguard Worker    /* propagate new renames through loop: i.e. repair the SSA */
1068*61046927SAndroid Build Coastguard Worker    for (unsigned idx = loop_header_idx; idx <= block_idx; idx++) {
1069*61046927SAndroid Build Coastguard Worker       Block& current = ctx.program->blocks[idx];
1070*61046927SAndroid Build Coastguard Worker       /* rename all uses in this block */
1071*61046927SAndroid Build Coastguard Worker       for (aco_ptr<Instruction>& instr : current.instructions) {
1072*61046927SAndroid Build Coastguard Worker          /* no need to rename the loop header phis once again. */
1073*61046927SAndroid Build Coastguard Worker          if (idx == loop_header_idx && is_phi(instr))
1074*61046927SAndroid Build Coastguard Worker             continue;
1075*61046927SAndroid Build Coastguard Worker 
1076*61046927SAndroid Build Coastguard Worker          for (Operand& op : instr->operands) {
1077*61046927SAndroid Build Coastguard Worker             if (!op.isTemp())
1078*61046927SAndroid Build Coastguard Worker                continue;
1079*61046927SAndroid Build Coastguard Worker 
1080*61046927SAndroid Build Coastguard Worker             auto rename = renames.find(op.getTemp());
1081*61046927SAndroid Build Coastguard Worker             if (rename != renames.end())
1082*61046927SAndroid Build Coastguard Worker                op.setTemp(rename->second);
1083*61046927SAndroid Build Coastguard Worker          }
1084*61046927SAndroid Build Coastguard Worker       }
1085*61046927SAndroid Build Coastguard Worker    }
1086*61046927SAndroid Build Coastguard Worker }
1087*61046927SAndroid Build Coastguard Worker 
1088*61046927SAndroid Build Coastguard Worker Temp
load_scratch_resource(spill_ctx & ctx,Builder & bld,bool apply_scratch_offset)1089*61046927SAndroid Build Coastguard Worker load_scratch_resource(spill_ctx& ctx, Builder& bld, bool apply_scratch_offset)
1090*61046927SAndroid Build Coastguard Worker {
1091*61046927SAndroid Build Coastguard Worker    Temp private_segment_buffer = ctx.program->private_segment_buffer;
1092*61046927SAndroid Build Coastguard Worker    if (!private_segment_buffer.bytes()) {
1093*61046927SAndroid Build Coastguard Worker       Temp addr_lo =
1094*61046927SAndroid Build Coastguard Worker          bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_lo));
1095*61046927SAndroid Build Coastguard Worker       Temp addr_hi =
1096*61046927SAndroid Build Coastguard Worker          bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
1097*61046927SAndroid Build Coastguard Worker       private_segment_buffer =
1098*61046927SAndroid Build Coastguard Worker          bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
1099*61046927SAndroid Build Coastguard Worker    } else if (ctx.program->stage.hw != AC_HW_COMPUTE_SHADER) {
1100*61046927SAndroid Build Coastguard Worker       private_segment_buffer =
1101*61046927SAndroid Build Coastguard Worker          bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand::zero());
1102*61046927SAndroid Build Coastguard Worker    }
1103*61046927SAndroid Build Coastguard Worker 
1104*61046927SAndroid Build Coastguard Worker    if (apply_scratch_offset) {
1105*61046927SAndroid Build Coastguard Worker       Temp addr_lo = bld.tmp(s1);
1106*61046927SAndroid Build Coastguard Worker       Temp addr_hi = bld.tmp(s1);
1107*61046927SAndroid Build Coastguard Worker       bld.pseudo(aco_opcode::p_split_vector, Definition(addr_lo), Definition(addr_hi),
1108*61046927SAndroid Build Coastguard Worker                  private_segment_buffer);
1109*61046927SAndroid Build Coastguard Worker 
1110*61046927SAndroid Build Coastguard Worker       Temp carry = bld.tmp(s1);
1111*61046927SAndroid Build Coastguard Worker       addr_lo = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), addr_lo,
1112*61046927SAndroid Build Coastguard Worker                          ctx.program->scratch_offset);
1113*61046927SAndroid Build Coastguard Worker       addr_hi = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), addr_hi,
1114*61046927SAndroid Build Coastguard Worker                          Operand::c32(0), bld.scc(carry));
1115*61046927SAndroid Build Coastguard Worker 
1116*61046927SAndroid Build Coastguard Worker       private_segment_buffer =
1117*61046927SAndroid Build Coastguard Worker          bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
1118*61046927SAndroid Build Coastguard Worker    }
1119*61046927SAndroid Build Coastguard Worker 
1120*61046927SAndroid Build Coastguard Worker    struct ac_buffer_state ac_state = {0};
1121*61046927SAndroid Build Coastguard Worker    uint32_t desc[4];
1122*61046927SAndroid Build Coastguard Worker 
1123*61046927SAndroid Build Coastguard Worker    ac_state.size = 0xffffffff;
1124*61046927SAndroid Build Coastguard Worker    ac_state.format = PIPE_FORMAT_R32_FLOAT;
1125*61046927SAndroid Build Coastguard Worker    for (int i = 0; i < 4; i++)
1126*61046927SAndroid Build Coastguard Worker       ac_state.swizzle[i] = PIPE_SWIZZLE_0;
1127*61046927SAndroid Build Coastguard Worker    /* older generations need element size = 4 bytes. element size removed in GFX9 */
1128*61046927SAndroid Build Coastguard Worker    ac_state.element_size = ctx.program->gfx_level <= GFX8 ? 1u : 0u;
1129*61046927SAndroid Build Coastguard Worker    ac_state.index_stride = ctx.program->wave_size == 64 ? 3u : 2u;
1130*61046927SAndroid Build Coastguard Worker    ac_state.add_tid = true;
1131*61046927SAndroid Build Coastguard Worker    ac_state.gfx10_oob_select = V_008F0C_OOB_SELECT_RAW;
1132*61046927SAndroid Build Coastguard Worker 
1133*61046927SAndroid Build Coastguard Worker    ac_build_buffer_descriptor(ctx.program->gfx_level, &ac_state, desc);
1134*61046927SAndroid Build Coastguard Worker 
1135*61046927SAndroid Build Coastguard Worker    return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), private_segment_buffer,
1136*61046927SAndroid Build Coastguard Worker                      Operand::c32(desc[2]), Operand::c32(desc[3]));
1137*61046927SAndroid Build Coastguard Worker }
1138*61046927SAndroid Build Coastguard Worker 
1139*61046927SAndroid Build Coastguard Worker void
setup_vgpr_spill_reload(spill_ctx & ctx,Block & block,std::vector<aco_ptr<Instruction>> & instructions,uint32_t spill_slot,Temp & scratch_offset,unsigned * offset)1140*61046927SAndroid Build Coastguard Worker setup_vgpr_spill_reload(spill_ctx& ctx, Block& block,
1141*61046927SAndroid Build Coastguard Worker                         std::vector<aco_ptr<Instruction>>& instructions, uint32_t spill_slot,
1142*61046927SAndroid Build Coastguard Worker                         Temp& scratch_offset, unsigned* offset)
1143*61046927SAndroid Build Coastguard Worker {
1144*61046927SAndroid Build Coastguard Worker    uint32_t scratch_size = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size;
1145*61046927SAndroid Build Coastguard Worker 
1146*61046927SAndroid Build Coastguard Worker    uint32_t offset_range;
1147*61046927SAndroid Build Coastguard Worker    if (ctx.program->gfx_level >= GFX9) {
1148*61046927SAndroid Build Coastguard Worker       offset_range =
1149*61046927SAndroid Build Coastguard Worker          ctx.program->dev.scratch_global_offset_max - ctx.program->dev.scratch_global_offset_min;
1150*61046927SAndroid Build Coastguard Worker    } else {
1151*61046927SAndroid Build Coastguard Worker       if (scratch_size < 4095)
1152*61046927SAndroid Build Coastguard Worker          offset_range = 4095 - scratch_size;
1153*61046927SAndroid Build Coastguard Worker       else
1154*61046927SAndroid Build Coastguard Worker          offset_range = 0;
1155*61046927SAndroid Build Coastguard Worker    }
1156*61046927SAndroid Build Coastguard Worker 
1157*61046927SAndroid Build Coastguard Worker    bool overflow = (ctx.vgpr_spill_slots - 1) * 4 > offset_range;
1158*61046927SAndroid Build Coastguard Worker 
1159*61046927SAndroid Build Coastguard Worker    Builder rsrc_bld(ctx.program);
1160*61046927SAndroid Build Coastguard Worker    if (block.kind & block_kind_top_level) {
1161*61046927SAndroid Build Coastguard Worker       rsrc_bld.reset(&instructions);
1162*61046927SAndroid Build Coastguard Worker    } else if (ctx.scratch_rsrc == Temp() && (!overflow || ctx.program->gfx_level < GFX9)) {
1163*61046927SAndroid Build Coastguard Worker       Block* tl_block = &block;
1164*61046927SAndroid Build Coastguard Worker       while (!(tl_block->kind & block_kind_top_level))
1165*61046927SAndroid Build Coastguard Worker          tl_block = &ctx.program->blocks[tl_block->linear_idom];
1166*61046927SAndroid Build Coastguard Worker 
1167*61046927SAndroid Build Coastguard Worker       /* find p_logical_end */
1168*61046927SAndroid Build Coastguard Worker       std::vector<aco_ptr<Instruction>>& prev_instructions = tl_block->instructions;
1169*61046927SAndroid Build Coastguard Worker       unsigned idx = prev_instructions.size() - 1;
1170*61046927SAndroid Build Coastguard Worker       while (prev_instructions[idx]->opcode != aco_opcode::p_logical_end)
1171*61046927SAndroid Build Coastguard Worker          idx--;
1172*61046927SAndroid Build Coastguard Worker       rsrc_bld.reset(&prev_instructions, std::next(prev_instructions.begin(), idx));
1173*61046927SAndroid Build Coastguard Worker    }
1174*61046927SAndroid Build Coastguard Worker 
1175*61046927SAndroid Build Coastguard Worker    /* If spilling overflows the constant offset range at any point, we need to emit the soffset
1176*61046927SAndroid Build Coastguard Worker     * before every spill/reload to avoid increasing register demand.
1177*61046927SAndroid Build Coastguard Worker     */
1178*61046927SAndroid Build Coastguard Worker    Builder offset_bld = rsrc_bld;
1179*61046927SAndroid Build Coastguard Worker    if (overflow)
1180*61046927SAndroid Build Coastguard Worker       offset_bld.reset(&instructions);
1181*61046927SAndroid Build Coastguard Worker 
1182*61046927SAndroid Build Coastguard Worker    *offset = spill_slot * 4;
1183*61046927SAndroid Build Coastguard Worker    if (ctx.program->gfx_level >= GFX9) {
1184*61046927SAndroid Build Coastguard Worker       *offset += ctx.program->dev.scratch_global_offset_min;
1185*61046927SAndroid Build Coastguard Worker 
1186*61046927SAndroid Build Coastguard Worker       if (ctx.scratch_rsrc == Temp() || overflow) {
1187*61046927SAndroid Build Coastguard Worker          int32_t saddr = scratch_size - ctx.program->dev.scratch_global_offset_min;
1188*61046927SAndroid Build Coastguard Worker          if ((int32_t)*offset > (int32_t)ctx.program->dev.scratch_global_offset_max) {
1189*61046927SAndroid Build Coastguard Worker             saddr += (int32_t)*offset;
1190*61046927SAndroid Build Coastguard Worker             *offset = 0;
1191*61046927SAndroid Build Coastguard Worker          }
1192*61046927SAndroid Build Coastguard Worker 
1193*61046927SAndroid Build Coastguard Worker          /* GFX9+ uses scratch_* instructions, which don't use a resource. */
1194*61046927SAndroid Build Coastguard Worker          ctx.scratch_rsrc = offset_bld.copy(offset_bld.def(s1), Operand::c32(saddr));
1195*61046927SAndroid Build Coastguard Worker       }
1196*61046927SAndroid Build Coastguard Worker    } else {
1197*61046927SAndroid Build Coastguard Worker       if (ctx.scratch_rsrc == Temp())
1198*61046927SAndroid Build Coastguard Worker          ctx.scratch_rsrc = load_scratch_resource(ctx, rsrc_bld, overflow);
1199*61046927SAndroid Build Coastguard Worker 
1200*61046927SAndroid Build Coastguard Worker       if (overflow) {
1201*61046927SAndroid Build Coastguard Worker          uint32_t soffset =
1202*61046927SAndroid Build Coastguard Worker             ctx.program->config->scratch_bytes_per_wave + *offset * ctx.program->wave_size;
1203*61046927SAndroid Build Coastguard Worker          *offset = 0;
1204*61046927SAndroid Build Coastguard Worker 
1205*61046927SAndroid Build Coastguard Worker          scratch_offset = offset_bld.copy(offset_bld.def(s1), Operand::c32(soffset));
1206*61046927SAndroid Build Coastguard Worker       } else {
1207*61046927SAndroid Build Coastguard Worker          *offset += scratch_size;
1208*61046927SAndroid Build Coastguard Worker       }
1209*61046927SAndroid Build Coastguard Worker    }
1210*61046927SAndroid Build Coastguard Worker }
1211*61046927SAndroid Build Coastguard Worker 
1212*61046927SAndroid Build Coastguard Worker void
spill_vgpr(spill_ctx & ctx,Block & block,std::vector<aco_ptr<Instruction>> & instructions,aco_ptr<Instruction> & spill,std::vector<uint32_t> & slots)1213*61046927SAndroid Build Coastguard Worker spill_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& instructions,
1214*61046927SAndroid Build Coastguard Worker            aco_ptr<Instruction>& spill, std::vector<uint32_t>& slots)
1215*61046927SAndroid Build Coastguard Worker {
1216*61046927SAndroid Build Coastguard Worker    ctx.program->config->spilled_vgprs += spill->operands[0].size();
1217*61046927SAndroid Build Coastguard Worker 
1218*61046927SAndroid Build Coastguard Worker    uint32_t spill_id = spill->operands[1].constantValue();
1219*61046927SAndroid Build Coastguard Worker    uint32_t spill_slot = slots[spill_id];
1220*61046927SAndroid Build Coastguard Worker 
1221*61046927SAndroid Build Coastguard Worker    Temp scratch_offset = ctx.program->scratch_offset;
1222*61046927SAndroid Build Coastguard Worker    unsigned offset;
1223*61046927SAndroid Build Coastguard Worker    setup_vgpr_spill_reload(ctx, block, instructions, spill_slot, scratch_offset, &offset);
1224*61046927SAndroid Build Coastguard Worker 
1225*61046927SAndroid Build Coastguard Worker    assert(spill->operands[0].isTemp());
1226*61046927SAndroid Build Coastguard Worker    Temp temp = spill->operands[0].getTemp();
1227*61046927SAndroid Build Coastguard Worker    assert(temp.type() == RegType::vgpr && !temp.is_linear());
1228*61046927SAndroid Build Coastguard Worker 
1229*61046927SAndroid Build Coastguard Worker    Builder bld(ctx.program, &instructions);
1230*61046927SAndroid Build Coastguard Worker    if (temp.size() > 1) {
1231*61046927SAndroid Build Coastguard Worker       Instruction* split{
1232*61046927SAndroid Build Coastguard Worker          create_instruction(aco_opcode::p_split_vector, Format::PSEUDO, 1, temp.size())};
1233*61046927SAndroid Build Coastguard Worker       split->operands[0] = Operand(temp);
1234*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < temp.size(); i++)
1235*61046927SAndroid Build Coastguard Worker          split->definitions[i] = bld.def(v1);
1236*61046927SAndroid Build Coastguard Worker       bld.insert(split);
1237*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < temp.size(); i++, offset += 4) {
1238*61046927SAndroid Build Coastguard Worker          Temp elem = split->definitions[i].getTemp();
1239*61046927SAndroid Build Coastguard Worker          if (ctx.program->gfx_level >= GFX9) {
1240*61046927SAndroid Build Coastguard Worker             bld.scratch(aco_opcode::scratch_store_dword, Operand(v1), ctx.scratch_rsrc, elem,
1241*61046927SAndroid Build Coastguard Worker                         offset, memory_sync_info(storage_vgpr_spill, semantic_private));
1242*61046927SAndroid Build Coastguard Worker          } else {
1243*61046927SAndroid Build Coastguard Worker             Instruction* instr = bld.mubuf(aco_opcode::buffer_store_dword, ctx.scratch_rsrc,
1244*61046927SAndroid Build Coastguard Worker                                            Operand(v1), scratch_offset, elem, offset, false);
1245*61046927SAndroid Build Coastguard Worker             instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
1246*61046927SAndroid Build Coastguard Worker             instr->mubuf().cache.value = ac_swizzled;
1247*61046927SAndroid Build Coastguard Worker          }
1248*61046927SAndroid Build Coastguard Worker       }
1249*61046927SAndroid Build Coastguard Worker    } else if (ctx.program->gfx_level >= GFX9) {
1250*61046927SAndroid Build Coastguard Worker       bld.scratch(aco_opcode::scratch_store_dword, Operand(v1), ctx.scratch_rsrc, temp, offset,
1251*61046927SAndroid Build Coastguard Worker                   memory_sync_info(storage_vgpr_spill, semantic_private));
1252*61046927SAndroid Build Coastguard Worker    } else {
1253*61046927SAndroid Build Coastguard Worker       Instruction* instr = bld.mubuf(aco_opcode::buffer_store_dword, ctx.scratch_rsrc, Operand(v1),
1254*61046927SAndroid Build Coastguard Worker                                      scratch_offset, temp, offset, false);
1255*61046927SAndroid Build Coastguard Worker       instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
1256*61046927SAndroid Build Coastguard Worker       instr->mubuf().cache.value = ac_swizzled;
1257*61046927SAndroid Build Coastguard Worker    }
1258*61046927SAndroid Build Coastguard Worker }
1259*61046927SAndroid Build Coastguard Worker 
1260*61046927SAndroid Build Coastguard Worker void
reload_vgpr(spill_ctx & ctx,Block & block,std::vector<aco_ptr<Instruction>> & instructions,aco_ptr<Instruction> & reload,std::vector<uint32_t> & slots)1261*61046927SAndroid Build Coastguard Worker reload_vgpr(spill_ctx& ctx, Block& block, std::vector<aco_ptr<Instruction>>& instructions,
1262*61046927SAndroid Build Coastguard Worker             aco_ptr<Instruction>& reload, std::vector<uint32_t>& slots)
1263*61046927SAndroid Build Coastguard Worker {
1264*61046927SAndroid Build Coastguard Worker    uint32_t spill_id = reload->operands[0].constantValue();
1265*61046927SAndroid Build Coastguard Worker    uint32_t spill_slot = slots[spill_id];
1266*61046927SAndroid Build Coastguard Worker 
1267*61046927SAndroid Build Coastguard Worker    Temp scratch_offset = ctx.program->scratch_offset;
1268*61046927SAndroid Build Coastguard Worker    unsigned offset;
1269*61046927SAndroid Build Coastguard Worker    setup_vgpr_spill_reload(ctx, block, instructions, spill_slot, scratch_offset, &offset);
1270*61046927SAndroid Build Coastguard Worker 
1271*61046927SAndroid Build Coastguard Worker    Definition def = reload->definitions[0];
1272*61046927SAndroid Build Coastguard Worker 
1273*61046927SAndroid Build Coastguard Worker    Builder bld(ctx.program, &instructions);
1274*61046927SAndroid Build Coastguard Worker    if (def.size() > 1) {
1275*61046927SAndroid Build Coastguard Worker       Instruction* vec{
1276*61046927SAndroid Build Coastguard Worker          create_instruction(aco_opcode::p_create_vector, Format::PSEUDO, def.size(), 1)};
1277*61046927SAndroid Build Coastguard Worker       vec->definitions[0] = def;
1278*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < def.size(); i++, offset += 4) {
1279*61046927SAndroid Build Coastguard Worker          Temp tmp = bld.tmp(v1);
1280*61046927SAndroid Build Coastguard Worker          vec->operands[i] = Operand(tmp);
1281*61046927SAndroid Build Coastguard Worker          if (ctx.program->gfx_level >= GFX9) {
1282*61046927SAndroid Build Coastguard Worker             bld.scratch(aco_opcode::scratch_load_dword, Definition(tmp), Operand(v1),
1283*61046927SAndroid Build Coastguard Worker                         ctx.scratch_rsrc, offset,
1284*61046927SAndroid Build Coastguard Worker                         memory_sync_info(storage_vgpr_spill, semantic_private));
1285*61046927SAndroid Build Coastguard Worker          } else {
1286*61046927SAndroid Build Coastguard Worker             Instruction* instr =
1287*61046927SAndroid Build Coastguard Worker                bld.mubuf(aco_opcode::buffer_load_dword, Definition(tmp), ctx.scratch_rsrc,
1288*61046927SAndroid Build Coastguard Worker                          Operand(v1), scratch_offset, offset, false);
1289*61046927SAndroid Build Coastguard Worker             instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
1290*61046927SAndroid Build Coastguard Worker             instr->mubuf().cache.value = ac_swizzled;
1291*61046927SAndroid Build Coastguard Worker          }
1292*61046927SAndroid Build Coastguard Worker       }
1293*61046927SAndroid Build Coastguard Worker       bld.insert(vec);
1294*61046927SAndroid Build Coastguard Worker    } else if (ctx.program->gfx_level >= GFX9) {
1295*61046927SAndroid Build Coastguard Worker       bld.scratch(aco_opcode::scratch_load_dword, def, Operand(v1), ctx.scratch_rsrc, offset,
1296*61046927SAndroid Build Coastguard Worker                   memory_sync_info(storage_vgpr_spill, semantic_private));
1297*61046927SAndroid Build Coastguard Worker    } else {
1298*61046927SAndroid Build Coastguard Worker       Instruction* instr = bld.mubuf(aco_opcode::buffer_load_dword, def, ctx.scratch_rsrc,
1299*61046927SAndroid Build Coastguard Worker                                      Operand(v1), scratch_offset, offset, false);
1300*61046927SAndroid Build Coastguard Worker       instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
1301*61046927SAndroid Build Coastguard Worker       instr->mubuf().cache.value = ac_swizzled;
1302*61046927SAndroid Build Coastguard Worker    }
1303*61046927SAndroid Build Coastguard Worker }
1304*61046927SAndroid Build Coastguard Worker 
1305*61046927SAndroid Build Coastguard Worker void
add_interferences(spill_ctx & ctx,std::vector<bool> & is_assigned,std::vector<uint32_t> & slots,std::vector<bool> & slots_used,unsigned id)1306*61046927SAndroid Build Coastguard Worker add_interferences(spill_ctx& ctx, std::vector<bool>& is_assigned, std::vector<uint32_t>& slots,
1307*61046927SAndroid Build Coastguard Worker                   std::vector<bool>& slots_used, unsigned id)
1308*61046927SAndroid Build Coastguard Worker {
1309*61046927SAndroid Build Coastguard Worker    for (unsigned other : ctx.interferences[id].second) {
1310*61046927SAndroid Build Coastguard Worker       if (!is_assigned[other])
1311*61046927SAndroid Build Coastguard Worker          continue;
1312*61046927SAndroid Build Coastguard Worker 
1313*61046927SAndroid Build Coastguard Worker       RegClass other_rc = ctx.interferences[other].first;
1314*61046927SAndroid Build Coastguard Worker       unsigned slot = slots[other];
1315*61046927SAndroid Build Coastguard Worker       std::fill(slots_used.begin() + slot, slots_used.begin() + slot + other_rc.size(), true);
1316*61046927SAndroid Build Coastguard Worker    }
1317*61046927SAndroid Build Coastguard Worker }
1318*61046927SAndroid Build Coastguard Worker 
1319*61046927SAndroid Build Coastguard Worker unsigned
find_available_slot(std::vector<bool> & used,unsigned wave_size,unsigned size,bool is_sgpr)1320*61046927SAndroid Build Coastguard Worker find_available_slot(std::vector<bool>& used, unsigned wave_size, unsigned size, bool is_sgpr)
1321*61046927SAndroid Build Coastguard Worker {
1322*61046927SAndroid Build Coastguard Worker    unsigned wave_size_minus_one = wave_size - 1;
1323*61046927SAndroid Build Coastguard Worker    unsigned slot = 0;
1324*61046927SAndroid Build Coastguard Worker 
1325*61046927SAndroid Build Coastguard Worker    while (true) {
1326*61046927SAndroid Build Coastguard Worker       bool available = true;
1327*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < size; i++) {
1328*61046927SAndroid Build Coastguard Worker          if (slot + i < used.size() && used[slot + i]) {
1329*61046927SAndroid Build Coastguard Worker             available = false;
1330*61046927SAndroid Build Coastguard Worker             break;
1331*61046927SAndroid Build Coastguard Worker          }
1332*61046927SAndroid Build Coastguard Worker       }
1333*61046927SAndroid Build Coastguard Worker       if (!available) {
1334*61046927SAndroid Build Coastguard Worker          slot++;
1335*61046927SAndroid Build Coastguard Worker          continue;
1336*61046927SAndroid Build Coastguard Worker       }
1337*61046927SAndroid Build Coastguard Worker 
1338*61046927SAndroid Build Coastguard Worker       if (is_sgpr && ((slot & wave_size_minus_one) > wave_size - size)) {
1339*61046927SAndroid Build Coastguard Worker          slot = align(slot, wave_size);
1340*61046927SAndroid Build Coastguard Worker          continue;
1341*61046927SAndroid Build Coastguard Worker       }
1342*61046927SAndroid Build Coastguard Worker 
1343*61046927SAndroid Build Coastguard Worker       std::fill(used.begin(), used.end(), false);
1344*61046927SAndroid Build Coastguard Worker 
1345*61046927SAndroid Build Coastguard Worker       if (slot + size > used.size())
1346*61046927SAndroid Build Coastguard Worker          used.resize(slot + size);
1347*61046927SAndroid Build Coastguard Worker 
1348*61046927SAndroid Build Coastguard Worker       return slot;
1349*61046927SAndroid Build Coastguard Worker    }
1350*61046927SAndroid Build Coastguard Worker }
1351*61046927SAndroid Build Coastguard Worker 
1352*61046927SAndroid Build Coastguard Worker void
assign_spill_slots_helper(spill_ctx & ctx,RegType type,std::vector<bool> & is_assigned,std::vector<uint32_t> & slots,unsigned * num_slots)1353*61046927SAndroid Build Coastguard Worker assign_spill_slots_helper(spill_ctx& ctx, RegType type, std::vector<bool>& is_assigned,
1354*61046927SAndroid Build Coastguard Worker                           std::vector<uint32_t>& slots, unsigned* num_slots)
1355*61046927SAndroid Build Coastguard Worker {
1356*61046927SAndroid Build Coastguard Worker    std::vector<bool> slots_used;
1357*61046927SAndroid Build Coastguard Worker 
1358*61046927SAndroid Build Coastguard Worker    /* assign slots for ids with affinities first */
1359*61046927SAndroid Build Coastguard Worker    for (std::vector<uint32_t>& vec : ctx.affinities) {
1360*61046927SAndroid Build Coastguard Worker       if (ctx.interferences[vec[0]].first.type() != type)
1361*61046927SAndroid Build Coastguard Worker          continue;
1362*61046927SAndroid Build Coastguard Worker 
1363*61046927SAndroid Build Coastguard Worker       for (unsigned id : vec) {
1364*61046927SAndroid Build Coastguard Worker          if (!ctx.is_reloaded[id])
1365*61046927SAndroid Build Coastguard Worker             continue;
1366*61046927SAndroid Build Coastguard Worker 
1367*61046927SAndroid Build Coastguard Worker          add_interferences(ctx, is_assigned, slots, slots_used, id);
1368*61046927SAndroid Build Coastguard Worker       }
1369*61046927SAndroid Build Coastguard Worker 
1370*61046927SAndroid Build Coastguard Worker       unsigned slot = find_available_slot(
1371*61046927SAndroid Build Coastguard Worker          slots_used, ctx.wave_size, ctx.interferences[vec[0]].first.size(), type == RegType::sgpr);
1372*61046927SAndroid Build Coastguard Worker 
1373*61046927SAndroid Build Coastguard Worker       for (unsigned id : vec) {
1374*61046927SAndroid Build Coastguard Worker          assert(!is_assigned[id]);
1375*61046927SAndroid Build Coastguard Worker 
1376*61046927SAndroid Build Coastguard Worker          if (ctx.is_reloaded[id]) {
1377*61046927SAndroid Build Coastguard Worker             slots[id] = slot;
1378*61046927SAndroid Build Coastguard Worker             is_assigned[id] = true;
1379*61046927SAndroid Build Coastguard Worker          }
1380*61046927SAndroid Build Coastguard Worker       }
1381*61046927SAndroid Build Coastguard Worker    }
1382*61046927SAndroid Build Coastguard Worker 
1383*61046927SAndroid Build Coastguard Worker    /* assign slots for ids without affinities */
1384*61046927SAndroid Build Coastguard Worker    for (unsigned id = 0; id < ctx.interferences.size(); id++) {
1385*61046927SAndroid Build Coastguard Worker       if (is_assigned[id] || !ctx.is_reloaded[id] || ctx.interferences[id].first.type() != type)
1386*61046927SAndroid Build Coastguard Worker          continue;
1387*61046927SAndroid Build Coastguard Worker 
1388*61046927SAndroid Build Coastguard Worker       add_interferences(ctx, is_assigned, slots, slots_used, id);
1389*61046927SAndroid Build Coastguard Worker 
1390*61046927SAndroid Build Coastguard Worker       unsigned slot = find_available_slot(
1391*61046927SAndroid Build Coastguard Worker          slots_used, ctx.wave_size, ctx.interferences[id].first.size(), type == RegType::sgpr);
1392*61046927SAndroid Build Coastguard Worker 
1393*61046927SAndroid Build Coastguard Worker       slots[id] = slot;
1394*61046927SAndroid Build Coastguard Worker       is_assigned[id] = true;
1395*61046927SAndroid Build Coastguard Worker    }
1396*61046927SAndroid Build Coastguard Worker 
1397*61046927SAndroid Build Coastguard Worker    *num_slots = slots_used.size();
1398*61046927SAndroid Build Coastguard Worker }
1399*61046927SAndroid Build Coastguard Worker 
1400*61046927SAndroid Build Coastguard Worker void
end_unused_spill_vgprs(spill_ctx & ctx,Block & block,std::vector<Temp> & vgpr_spill_temps,const std::vector<uint32_t> & slots,const aco::unordered_map<Temp,uint32_t> & spills)1401*61046927SAndroid Build Coastguard Worker end_unused_spill_vgprs(spill_ctx& ctx, Block& block, std::vector<Temp>& vgpr_spill_temps,
1402*61046927SAndroid Build Coastguard Worker                        const std::vector<uint32_t>& slots,
1403*61046927SAndroid Build Coastguard Worker                        const aco::unordered_map<Temp, uint32_t>& spills)
1404*61046927SAndroid Build Coastguard Worker {
1405*61046927SAndroid Build Coastguard Worker    std::vector<bool> is_used(vgpr_spill_temps.size());
1406*61046927SAndroid Build Coastguard Worker    for (std::pair<Temp, uint32_t> pair : spills) {
1407*61046927SAndroid Build Coastguard Worker       if (pair.first.type() == RegType::sgpr && ctx.is_reloaded[pair.second])
1408*61046927SAndroid Build Coastguard Worker          is_used[slots[pair.second] / ctx.wave_size] = true;
1409*61046927SAndroid Build Coastguard Worker    }
1410*61046927SAndroid Build Coastguard Worker 
1411*61046927SAndroid Build Coastguard Worker    std::vector<Temp> temps;
1412*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < vgpr_spill_temps.size(); i++) {
1413*61046927SAndroid Build Coastguard Worker       if (vgpr_spill_temps[i].id() && !is_used[i]) {
1414*61046927SAndroid Build Coastguard Worker          temps.push_back(vgpr_spill_temps[i]);
1415*61046927SAndroid Build Coastguard Worker          vgpr_spill_temps[i] = Temp();
1416*61046927SAndroid Build Coastguard Worker       }
1417*61046927SAndroid Build Coastguard Worker    }
1418*61046927SAndroid Build Coastguard Worker    if (temps.empty() || block.linear_preds.empty())
1419*61046927SAndroid Build Coastguard Worker       return;
1420*61046927SAndroid Build Coastguard Worker 
1421*61046927SAndroid Build Coastguard Worker    aco_ptr<Instruction> destr{
1422*61046927SAndroid Build Coastguard Worker       create_instruction(aco_opcode::p_end_linear_vgpr, Format::PSEUDO, temps.size(), 0)};
1423*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < temps.size(); i++)
1424*61046927SAndroid Build Coastguard Worker       destr->operands[i] = Operand(temps[i]);
1425*61046927SAndroid Build Coastguard Worker 
1426*61046927SAndroid Build Coastguard Worker    std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin();
1427*61046927SAndroid Build Coastguard Worker    while (is_phi(*it))
1428*61046927SAndroid Build Coastguard Worker       ++it;
1429*61046927SAndroid Build Coastguard Worker    block.instructions.insert(it, std::move(destr));
1430*61046927SAndroid Build Coastguard Worker }
1431*61046927SAndroid Build Coastguard Worker 
1432*61046927SAndroid Build Coastguard Worker void
assign_spill_slots(spill_ctx & ctx,unsigned spills_to_vgpr)1433*61046927SAndroid Build Coastguard Worker assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
1434*61046927SAndroid Build Coastguard Worker {
1435*61046927SAndroid Build Coastguard Worker    std::vector<uint32_t> slots(ctx.interferences.size());
1436*61046927SAndroid Build Coastguard Worker    std::vector<bool> is_assigned(ctx.interferences.size());
1437*61046927SAndroid Build Coastguard Worker 
1438*61046927SAndroid Build Coastguard Worker    /* first, handle affinities: just merge all interferences into both spill ids */
1439*61046927SAndroid Build Coastguard Worker    for (std::vector<uint32_t>& vec : ctx.affinities) {
1440*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < vec.size(); i++) {
1441*61046927SAndroid Build Coastguard Worker          for (unsigned j = i + 1; j < vec.size(); j++) {
1442*61046927SAndroid Build Coastguard Worker             assert(vec[i] != vec[j]);
1443*61046927SAndroid Build Coastguard Worker             bool reloaded = ctx.is_reloaded[vec[i]] || ctx.is_reloaded[vec[j]];
1444*61046927SAndroid Build Coastguard Worker             ctx.is_reloaded[vec[i]] = reloaded;
1445*61046927SAndroid Build Coastguard Worker             ctx.is_reloaded[vec[j]] = reloaded;
1446*61046927SAndroid Build Coastguard Worker          }
1447*61046927SAndroid Build Coastguard Worker       }
1448*61046927SAndroid Build Coastguard Worker    }
1449*61046927SAndroid Build Coastguard Worker    for (ASSERTED uint32_t i = 0; i < ctx.interferences.size(); i++)
1450*61046927SAndroid Build Coastguard Worker       for (ASSERTED uint32_t id : ctx.interferences[i].second)
1451*61046927SAndroid Build Coastguard Worker          assert(i != id);
1452*61046927SAndroid Build Coastguard Worker 
1453*61046927SAndroid Build Coastguard Worker    /* for each spill slot, assign as many spill ids as possible */
1454*61046927SAndroid Build Coastguard Worker    assign_spill_slots_helper(ctx, RegType::sgpr, is_assigned, slots, &ctx.sgpr_spill_slots);
1455*61046927SAndroid Build Coastguard Worker    assign_spill_slots_helper(ctx, RegType::vgpr, is_assigned, slots, &ctx.vgpr_spill_slots);
1456*61046927SAndroid Build Coastguard Worker 
1457*61046927SAndroid Build Coastguard Worker    for (unsigned id = 0; id < is_assigned.size(); id++)
1458*61046927SAndroid Build Coastguard Worker       assert(is_assigned[id] || !ctx.is_reloaded[id]);
1459*61046927SAndroid Build Coastguard Worker 
1460*61046927SAndroid Build Coastguard Worker    for (std::vector<uint32_t>& vec : ctx.affinities) {
1461*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < vec.size(); i++) {
1462*61046927SAndroid Build Coastguard Worker          for (unsigned j = i + 1; j < vec.size(); j++) {
1463*61046927SAndroid Build Coastguard Worker             assert(is_assigned[vec[i]] == is_assigned[vec[j]]);
1464*61046927SAndroid Build Coastguard Worker             if (!is_assigned[vec[i]])
1465*61046927SAndroid Build Coastguard Worker                continue;
1466*61046927SAndroid Build Coastguard Worker             assert(ctx.is_reloaded[vec[i]] == ctx.is_reloaded[vec[j]]);
1467*61046927SAndroid Build Coastguard Worker             assert(ctx.interferences[vec[i]].first.type() ==
1468*61046927SAndroid Build Coastguard Worker                    ctx.interferences[vec[j]].first.type());
1469*61046927SAndroid Build Coastguard Worker             assert(slots[vec[i]] == slots[vec[j]]);
1470*61046927SAndroid Build Coastguard Worker          }
1471*61046927SAndroid Build Coastguard Worker       }
1472*61046927SAndroid Build Coastguard Worker    }
1473*61046927SAndroid Build Coastguard Worker 
1474*61046927SAndroid Build Coastguard Worker    /* hope, we didn't mess up */
1475*61046927SAndroid Build Coastguard Worker    std::vector<Temp> vgpr_spill_temps((ctx.sgpr_spill_slots + ctx.wave_size - 1) / ctx.wave_size);
1476*61046927SAndroid Build Coastguard Worker    assert(vgpr_spill_temps.size() <= spills_to_vgpr);
1477*61046927SAndroid Build Coastguard Worker 
1478*61046927SAndroid Build Coastguard Worker    /* replace pseudo instructions with actual hardware instructions */
1479*61046927SAndroid Build Coastguard Worker    unsigned last_top_level_block_idx = 0;
1480*61046927SAndroid Build Coastguard Worker    for (Block& block : ctx.program->blocks) {
1481*61046927SAndroid Build Coastguard Worker 
1482*61046927SAndroid Build Coastguard Worker       if (block.kind & block_kind_top_level) {
1483*61046927SAndroid Build Coastguard Worker          last_top_level_block_idx = block.index;
1484*61046927SAndroid Build Coastguard Worker 
1485*61046927SAndroid Build Coastguard Worker          end_unused_spill_vgprs(ctx, block, vgpr_spill_temps, slots, ctx.spills_entry[block.index]);
1486*61046927SAndroid Build Coastguard Worker 
1487*61046927SAndroid Build Coastguard Worker          /* If the block has no predecessors (for example in RT resume shaders),
1488*61046927SAndroid Build Coastguard Worker           * we cannot reuse the current scratch_rsrc temp because its definition is unreachable */
1489*61046927SAndroid Build Coastguard Worker          if (block.linear_preds.empty())
1490*61046927SAndroid Build Coastguard Worker             ctx.scratch_rsrc = Temp();
1491*61046927SAndroid Build Coastguard Worker       }
1492*61046927SAndroid Build Coastguard Worker 
1493*61046927SAndroid Build Coastguard Worker       std::vector<aco_ptr<Instruction>>::iterator it;
1494*61046927SAndroid Build Coastguard Worker       std::vector<aco_ptr<Instruction>> instructions;
1495*61046927SAndroid Build Coastguard Worker       instructions.reserve(block.instructions.size());
1496*61046927SAndroid Build Coastguard Worker       Builder bld(ctx.program, &instructions);
1497*61046927SAndroid Build Coastguard Worker       for (it = block.instructions.begin(); it != block.instructions.end(); ++it) {
1498*61046927SAndroid Build Coastguard Worker 
1499*61046927SAndroid Build Coastguard Worker          if ((*it)->opcode == aco_opcode::p_spill) {
1500*61046927SAndroid Build Coastguard Worker             uint32_t spill_id = (*it)->operands[1].constantValue();
1501*61046927SAndroid Build Coastguard Worker 
1502*61046927SAndroid Build Coastguard Worker             if (!ctx.is_reloaded[spill_id]) {
1503*61046927SAndroid Build Coastguard Worker                /* never reloaded, so don't spill */
1504*61046927SAndroid Build Coastguard Worker             } else if (!is_assigned[spill_id]) {
1505*61046927SAndroid Build Coastguard Worker                unreachable("No spill slot assigned for spill id");
1506*61046927SAndroid Build Coastguard Worker             } else if (ctx.interferences[spill_id].first.type() == RegType::vgpr) {
1507*61046927SAndroid Build Coastguard Worker                spill_vgpr(ctx, block, instructions, *it, slots);
1508*61046927SAndroid Build Coastguard Worker             } else {
1509*61046927SAndroid Build Coastguard Worker                ctx.program->config->spilled_sgprs += (*it)->operands[0].size();
1510*61046927SAndroid Build Coastguard Worker 
1511*61046927SAndroid Build Coastguard Worker                uint32_t spill_slot = slots[spill_id];
1512*61046927SAndroid Build Coastguard Worker 
1513*61046927SAndroid Build Coastguard Worker                /* check if the linear vgpr already exists */
1514*61046927SAndroid Build Coastguard Worker                if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) {
1515*61046927SAndroid Build Coastguard Worker                   Temp linear_vgpr = ctx.program->allocateTmp(v1.as_linear());
1516*61046927SAndroid Build Coastguard Worker                   vgpr_spill_temps[spill_slot / ctx.wave_size] = linear_vgpr;
1517*61046927SAndroid Build Coastguard Worker                   aco_ptr<Instruction> create{
1518*61046927SAndroid Build Coastguard Worker                      create_instruction(aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
1519*61046927SAndroid Build Coastguard Worker                   create->definitions[0] = Definition(linear_vgpr);
1520*61046927SAndroid Build Coastguard Worker                   /* find the right place to insert this definition */
1521*61046927SAndroid Build Coastguard Worker                   if (last_top_level_block_idx == block.index) {
1522*61046927SAndroid Build Coastguard Worker                      /* insert right before the current instruction */
1523*61046927SAndroid Build Coastguard Worker                      instructions.emplace_back(std::move(create));
1524*61046927SAndroid Build Coastguard Worker                   } else {
1525*61046927SAndroid Build Coastguard Worker                      assert(last_top_level_block_idx < block.index);
1526*61046927SAndroid Build Coastguard Worker                      /* insert after p_logical_end of the last top-level block */
1527*61046927SAndroid Build Coastguard Worker                      std::vector<aco_ptr<Instruction>>& block_instrs =
1528*61046927SAndroid Build Coastguard Worker                         ctx.program->blocks[last_top_level_block_idx].instructions;
1529*61046927SAndroid Build Coastguard Worker                      auto insert_point =
1530*61046927SAndroid Build Coastguard Worker                         std::find_if(block_instrs.rbegin(), block_instrs.rend(),
1531*61046927SAndroid Build Coastguard Worker                                      [](const auto& iter) {
1532*61046927SAndroid Build Coastguard Worker                                         return iter->opcode == aco_opcode::p_logical_end;
1533*61046927SAndroid Build Coastguard Worker                                      })
1534*61046927SAndroid Build Coastguard Worker                            .base();
1535*61046927SAndroid Build Coastguard Worker                      block_instrs.insert(insert_point, std::move(create));
1536*61046927SAndroid Build Coastguard Worker                   }
1537*61046927SAndroid Build Coastguard Worker                }
1538*61046927SAndroid Build Coastguard Worker 
1539*61046927SAndroid Build Coastguard Worker                /* spill sgpr: just add the vgpr temp to operands */
1540*61046927SAndroid Build Coastguard Worker                Instruction* spill = create_instruction(aco_opcode::p_spill, Format::PSEUDO, 3, 0);
1541*61046927SAndroid Build Coastguard Worker                spill->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]);
1542*61046927SAndroid Build Coastguard Worker                spill->operands[1] = Operand::c32(spill_slot % ctx.wave_size);
1543*61046927SAndroid Build Coastguard Worker                spill->operands[2] = (*it)->operands[0];
1544*61046927SAndroid Build Coastguard Worker                instructions.emplace_back(aco_ptr<Instruction>(spill));
1545*61046927SAndroid Build Coastguard Worker             }
1546*61046927SAndroid Build Coastguard Worker 
1547*61046927SAndroid Build Coastguard Worker          } else if ((*it)->opcode == aco_opcode::p_reload) {
1548*61046927SAndroid Build Coastguard Worker             uint32_t spill_id = (*it)->operands[0].constantValue();
1549*61046927SAndroid Build Coastguard Worker             assert(ctx.is_reloaded[spill_id]);
1550*61046927SAndroid Build Coastguard Worker 
1551*61046927SAndroid Build Coastguard Worker             if (!is_assigned[spill_id]) {
1552*61046927SAndroid Build Coastguard Worker                unreachable("No spill slot assigned for spill id");
1553*61046927SAndroid Build Coastguard Worker             } else if (ctx.interferences[spill_id].first.type() == RegType::vgpr) {
1554*61046927SAndroid Build Coastguard Worker                reload_vgpr(ctx, block, instructions, *it, slots);
1555*61046927SAndroid Build Coastguard Worker             } else {
1556*61046927SAndroid Build Coastguard Worker                uint32_t spill_slot = slots[spill_id];
1557*61046927SAndroid Build Coastguard Worker 
1558*61046927SAndroid Build Coastguard Worker                /* check if the linear vgpr already exists */
1559*61046927SAndroid Build Coastguard Worker                if (vgpr_spill_temps[spill_slot / ctx.wave_size] == Temp()) {
1560*61046927SAndroid Build Coastguard Worker                   Temp linear_vgpr = ctx.program->allocateTmp(v1.as_linear());
1561*61046927SAndroid Build Coastguard Worker                   vgpr_spill_temps[spill_slot / ctx.wave_size] = linear_vgpr;
1562*61046927SAndroid Build Coastguard Worker                   aco_ptr<Instruction> create{
1563*61046927SAndroid Build Coastguard Worker                      create_instruction(aco_opcode::p_start_linear_vgpr, Format::PSEUDO, 0, 1)};
1564*61046927SAndroid Build Coastguard Worker                   create->definitions[0] = Definition(linear_vgpr);
1565*61046927SAndroid Build Coastguard Worker                   /* find the right place to insert this definition */
1566*61046927SAndroid Build Coastguard Worker                   if (last_top_level_block_idx == block.index) {
1567*61046927SAndroid Build Coastguard Worker                      /* insert right before the current instruction */
1568*61046927SAndroid Build Coastguard Worker                      instructions.emplace_back(std::move(create));
1569*61046927SAndroid Build Coastguard Worker                   } else {
1570*61046927SAndroid Build Coastguard Worker                      assert(last_top_level_block_idx < block.index);
1571*61046927SAndroid Build Coastguard Worker                      /* insert after p_logical_end of the last top-level block */
1572*61046927SAndroid Build Coastguard Worker                      std::vector<aco_ptr<Instruction>>& block_instrs =
1573*61046927SAndroid Build Coastguard Worker                         ctx.program->blocks[last_top_level_block_idx].instructions;
1574*61046927SAndroid Build Coastguard Worker                      auto insert_point =
1575*61046927SAndroid Build Coastguard Worker                         std::find_if(block_instrs.rbegin(), block_instrs.rend(),
1576*61046927SAndroid Build Coastguard Worker                                      [](const auto& iter) {
1577*61046927SAndroid Build Coastguard Worker                                         return iter->opcode == aco_opcode::p_logical_end;
1578*61046927SAndroid Build Coastguard Worker                                      })
1579*61046927SAndroid Build Coastguard Worker                            .base();
1580*61046927SAndroid Build Coastguard Worker                      block_instrs.insert(insert_point, std::move(create));
1581*61046927SAndroid Build Coastguard Worker                   }
1582*61046927SAndroid Build Coastguard Worker                }
1583*61046927SAndroid Build Coastguard Worker 
1584*61046927SAndroid Build Coastguard Worker                /* reload sgpr: just add the vgpr temp to operands */
1585*61046927SAndroid Build Coastguard Worker                Instruction* reload = create_instruction(aco_opcode::p_reload, Format::PSEUDO, 2, 1);
1586*61046927SAndroid Build Coastguard Worker                reload->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]);
1587*61046927SAndroid Build Coastguard Worker                reload->operands[1] = Operand::c32(spill_slot % ctx.wave_size);
1588*61046927SAndroid Build Coastguard Worker                reload->definitions[0] = (*it)->definitions[0];
1589*61046927SAndroid Build Coastguard Worker                instructions.emplace_back(aco_ptr<Instruction>(reload));
1590*61046927SAndroid Build Coastguard Worker             }
1591*61046927SAndroid Build Coastguard Worker          } else if (!ctx.unused_remats.count(it->get())) {
1592*61046927SAndroid Build Coastguard Worker             instructions.emplace_back(std::move(*it));
1593*61046927SAndroid Build Coastguard Worker          }
1594*61046927SAndroid Build Coastguard Worker       }
1595*61046927SAndroid Build Coastguard Worker       block.instructions = std::move(instructions);
1596*61046927SAndroid Build Coastguard Worker    }
1597*61046927SAndroid Build Coastguard Worker 
1598*61046927SAndroid Build Coastguard Worker    /* update required scratch memory */
1599*61046927SAndroid Build Coastguard Worker    ctx.program->config->scratch_bytes_per_wave += ctx.vgpr_spill_slots * 4 * ctx.program->wave_size;
1600*61046927SAndroid Build Coastguard Worker }
1601*61046927SAndroid Build Coastguard Worker 
1602*61046927SAndroid Build Coastguard Worker } /* end namespace */
1603*61046927SAndroid Build Coastguard Worker 
1604*61046927SAndroid Build Coastguard Worker void
spill(Program * program)1605*61046927SAndroid Build Coastguard Worker spill(Program* program)
1606*61046927SAndroid Build Coastguard Worker {
1607*61046927SAndroid Build Coastguard Worker    program->config->spilled_vgprs = 0;
1608*61046927SAndroid Build Coastguard Worker    program->config->spilled_sgprs = 0;
1609*61046927SAndroid Build Coastguard Worker 
1610*61046927SAndroid Build Coastguard Worker    program->progress = CompilationProgress::after_spilling;
1611*61046927SAndroid Build Coastguard Worker 
1612*61046927SAndroid Build Coastguard Worker    /* no spilling when register pressure is low enough */
1613*61046927SAndroid Build Coastguard Worker    if (program->num_waves > 0)
1614*61046927SAndroid Build Coastguard Worker       return;
1615*61046927SAndroid Build Coastguard Worker 
1616*61046927SAndroid Build Coastguard Worker    /* lower to CSSA before spilling to ensure correctness w.r.t. phis */
1617*61046927SAndroid Build Coastguard Worker    lower_to_cssa(program);
1618*61046927SAndroid Build Coastguard Worker 
1619*61046927SAndroid Build Coastguard Worker    /* calculate target register demand */
1620*61046927SAndroid Build Coastguard Worker    const RegisterDemand demand = program->max_reg_demand; /* current max */
1621*61046927SAndroid Build Coastguard Worker    const uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->min_waves);
1622*61046927SAndroid Build Coastguard Worker    const uint16_t vgpr_limit = get_addr_vgpr_from_waves(program, program->min_waves);
1623*61046927SAndroid Build Coastguard Worker    uint16_t extra_vgprs = 0;
1624*61046927SAndroid Build Coastguard Worker    uint16_t extra_sgprs = 0;
1625*61046927SAndroid Build Coastguard Worker 
1626*61046927SAndroid Build Coastguard Worker    /* calculate extra VGPRs required for spilling SGPRs */
1627*61046927SAndroid Build Coastguard Worker    if (demand.sgpr > sgpr_limit) {
1628*61046927SAndroid Build Coastguard Worker       unsigned sgpr_spills = demand.sgpr - sgpr_limit;
1629*61046927SAndroid Build Coastguard Worker       extra_vgprs = DIV_ROUND_UP(sgpr_spills * 2, program->wave_size) + 1;
1630*61046927SAndroid Build Coastguard Worker    }
1631*61046927SAndroid Build Coastguard Worker    /* add extra SGPRs required for spilling VGPRs */
1632*61046927SAndroid Build Coastguard Worker    if (demand.vgpr + extra_vgprs > vgpr_limit) {
1633*61046927SAndroid Build Coastguard Worker       if (program->gfx_level >= GFX9)
1634*61046927SAndroid Build Coastguard Worker          extra_sgprs = 1; /* SADDR */
1635*61046927SAndroid Build Coastguard Worker       else
1636*61046927SAndroid Build Coastguard Worker          extra_sgprs = 5; /* scratch_resource (s4) + scratch_offset (s1) */
1637*61046927SAndroid Build Coastguard Worker       if (demand.sgpr + extra_sgprs > sgpr_limit) {
1638*61046927SAndroid Build Coastguard Worker          /* re-calculate in case something has changed */
1639*61046927SAndroid Build Coastguard Worker          unsigned sgpr_spills = demand.sgpr + extra_sgprs - sgpr_limit;
1640*61046927SAndroid Build Coastguard Worker          extra_vgprs = DIV_ROUND_UP(sgpr_spills * 2, program->wave_size) + 1;
1641*61046927SAndroid Build Coastguard Worker       }
1642*61046927SAndroid Build Coastguard Worker    }
1643*61046927SAndroid Build Coastguard Worker    /* the spiller has to target the following register demand */
1644*61046927SAndroid Build Coastguard Worker    const RegisterDemand target(vgpr_limit - extra_vgprs, sgpr_limit - extra_sgprs);
1645*61046927SAndroid Build Coastguard Worker 
1646*61046927SAndroid Build Coastguard Worker    /* initialize ctx */
1647*61046927SAndroid Build Coastguard Worker    spill_ctx ctx(target, program);
1648*61046927SAndroid Build Coastguard Worker    gather_ssa_use_info(ctx);
1649*61046927SAndroid Build Coastguard Worker    get_rematerialize_info(ctx);
1650*61046927SAndroid Build Coastguard Worker 
1651*61046927SAndroid Build Coastguard Worker    /* create spills and reloads */
1652*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < program->blocks.size(); i++)
1653*61046927SAndroid Build Coastguard Worker       spill_block(ctx, i);
1654*61046927SAndroid Build Coastguard Worker 
1655*61046927SAndroid Build Coastguard Worker    /* assign spill slots and DCE rematerialized code */
1656*61046927SAndroid Build Coastguard Worker    assign_spill_slots(ctx, extra_vgprs);
1657*61046927SAndroid Build Coastguard Worker 
1658*61046927SAndroid Build Coastguard Worker    /* update live variable information */
1659*61046927SAndroid Build Coastguard Worker    live_var_analysis(program);
1660*61046927SAndroid Build Coastguard Worker 
1661*61046927SAndroid Build Coastguard Worker    assert(program->num_waves > 0);
1662*61046927SAndroid Build Coastguard Worker }
1663*61046927SAndroid Build Coastguard Worker 
1664*61046927SAndroid Build Coastguard Worker } // namespace aco
1665