1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker * Copyright © 2016-2017 Broadcom
3*61046927SAndroid Build Coastguard Worker *
4*61046927SAndroid Build Coastguard Worker * Permission is hereby granted, free of charge, to any person obtaining a
5*61046927SAndroid Build Coastguard Worker * copy of this software and associated documentation files (the "Software"),
6*61046927SAndroid Build Coastguard Worker * to deal in the Software without restriction, including without limitation
7*61046927SAndroid Build Coastguard Worker * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*61046927SAndroid Build Coastguard Worker * and/or sell copies of the Software, and to permit persons to whom the
9*61046927SAndroid Build Coastguard Worker * Software is furnished to do so, subject to the following conditions:
10*61046927SAndroid Build Coastguard Worker *
11*61046927SAndroid Build Coastguard Worker * The above copyright notice and this permission notice (including the next
12*61046927SAndroid Build Coastguard Worker * paragraph) shall be included in all copies or substantial portions of the
13*61046927SAndroid Build Coastguard Worker * Software.
14*61046927SAndroid Build Coastguard Worker *
15*61046927SAndroid Build Coastguard Worker * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16*61046927SAndroid Build Coastguard Worker * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17*61046927SAndroid Build Coastguard Worker * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18*61046927SAndroid Build Coastguard Worker * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19*61046927SAndroid Build Coastguard Worker * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20*61046927SAndroid Build Coastguard Worker * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21*61046927SAndroid Build Coastguard Worker * IN THE SOFTWARE.
22*61046927SAndroid Build Coastguard Worker */
23*61046927SAndroid Build Coastguard Worker
24*61046927SAndroid Build Coastguard Worker #include "broadcom/common/v3d_device_info.h"
25*61046927SAndroid Build Coastguard Worker #include "v3d_compiler.h"
26*61046927SAndroid Build Coastguard Worker #include "compiler/nir/nir_schedule.h"
27*61046927SAndroid Build Coastguard Worker #include "compiler/nir/nir_builder.h"
28*61046927SAndroid Build Coastguard Worker
29*61046927SAndroid Build Coastguard Worker int
vir_get_nsrc(struct qinst * inst)30*61046927SAndroid Build Coastguard Worker vir_get_nsrc(struct qinst *inst)
31*61046927SAndroid Build Coastguard Worker {
32*61046927SAndroid Build Coastguard Worker switch (inst->qpu.type) {
33*61046927SAndroid Build Coastguard Worker case V3D_QPU_INSTR_TYPE_BRANCH:
34*61046927SAndroid Build Coastguard Worker return 0;
35*61046927SAndroid Build Coastguard Worker case V3D_QPU_INSTR_TYPE_ALU:
36*61046927SAndroid Build Coastguard Worker if (inst->qpu.alu.add.op != V3D_QPU_A_NOP)
37*61046927SAndroid Build Coastguard Worker return v3d_qpu_add_op_num_src(inst->qpu.alu.add.op);
38*61046927SAndroid Build Coastguard Worker else
39*61046927SAndroid Build Coastguard Worker return v3d_qpu_mul_op_num_src(inst->qpu.alu.mul.op);
40*61046927SAndroid Build Coastguard Worker }
41*61046927SAndroid Build Coastguard Worker
42*61046927SAndroid Build Coastguard Worker return 0;
43*61046927SAndroid Build Coastguard Worker }
44*61046927SAndroid Build Coastguard Worker
45*61046927SAndroid Build Coastguard Worker /**
46*61046927SAndroid Build Coastguard Worker * Returns whether the instruction has any side effects that must be
47*61046927SAndroid Build Coastguard Worker * preserved.
48*61046927SAndroid Build Coastguard Worker */
49*61046927SAndroid Build Coastguard Worker bool
vir_has_side_effects(struct v3d_compile * c,struct qinst * inst)50*61046927SAndroid Build Coastguard Worker vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
51*61046927SAndroid Build Coastguard Worker {
52*61046927SAndroid Build Coastguard Worker switch (inst->qpu.type) {
53*61046927SAndroid Build Coastguard Worker case V3D_QPU_INSTR_TYPE_BRANCH:
54*61046927SAndroid Build Coastguard Worker return true;
55*61046927SAndroid Build Coastguard Worker case V3D_QPU_INSTR_TYPE_ALU:
56*61046927SAndroid Build Coastguard Worker switch (inst->qpu.alu.add.op) {
57*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_SETREVF:
58*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_SETMSF:
59*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_VPMSETUP:
60*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_STVPMV:
61*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_STVPMD:
62*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_STVPMP:
63*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_VPMWT:
64*61046927SAndroid Build Coastguard Worker case V3D_QPU_A_TMUWT:
65*61046927SAndroid Build Coastguard Worker return true;
66*61046927SAndroid Build Coastguard Worker default:
67*61046927SAndroid Build Coastguard Worker break;
68*61046927SAndroid Build Coastguard Worker }
69*61046927SAndroid Build Coastguard Worker
70*61046927SAndroid Build Coastguard Worker switch (inst->qpu.alu.mul.op) {
71*61046927SAndroid Build Coastguard Worker case V3D_QPU_M_MULTOP:
72*61046927SAndroid Build Coastguard Worker return true;
73*61046927SAndroid Build Coastguard Worker default:
74*61046927SAndroid Build Coastguard Worker break;
75*61046927SAndroid Build Coastguard Worker }
76*61046927SAndroid Build Coastguard Worker }
77*61046927SAndroid Build Coastguard Worker
78*61046927SAndroid Build Coastguard Worker if (inst->qpu.sig.ldtmu ||
79*61046927SAndroid Build Coastguard Worker inst->qpu.sig.ldvary ||
80*61046927SAndroid Build Coastguard Worker inst->qpu.sig.ldtlbu ||
81*61046927SAndroid Build Coastguard Worker inst->qpu.sig.ldtlb ||
82*61046927SAndroid Build Coastguard Worker inst->qpu.sig.wrtmuc ||
83*61046927SAndroid Build Coastguard Worker inst->qpu.sig.thrsw) {
84*61046927SAndroid Build Coastguard Worker return true;
85*61046927SAndroid Build Coastguard Worker }
86*61046927SAndroid Build Coastguard Worker
87*61046927SAndroid Build Coastguard Worker /* ldunifa works like ldunif: it reads an element and advances the
88*61046927SAndroid Build Coastguard Worker * pointer, so each read has a side effect (we don't care for ldunif
89*61046927SAndroid Build Coastguard Worker * because we reconstruct the uniform stream buffer after compiling
90*61046927SAndroid Build Coastguard Worker * with the surviving uniforms), so allowing DCE to remove
91*61046927SAndroid Build Coastguard Worker * one would break follow-up loads. We could fix this by emitting a
92*61046927SAndroid Build Coastguard Worker * unifa for each ldunifa, but each unifa requires 3 delay slots
93*61046927SAndroid Build Coastguard Worker * before a ldunifa, so that would be quite expensive.
94*61046927SAndroid Build Coastguard Worker */
95*61046927SAndroid Build Coastguard Worker if (inst->qpu.sig.ldunifa || inst->qpu.sig.ldunifarf)
96*61046927SAndroid Build Coastguard Worker return true;
97*61046927SAndroid Build Coastguard Worker
98*61046927SAndroid Build Coastguard Worker return false;
99*61046927SAndroid Build Coastguard Worker }
100*61046927SAndroid Build Coastguard Worker
101*61046927SAndroid Build Coastguard Worker bool
vir_is_raw_mov(struct qinst * inst)102*61046927SAndroid Build Coastguard Worker vir_is_raw_mov(struct qinst *inst)
103*61046927SAndroid Build Coastguard Worker {
104*61046927SAndroid Build Coastguard Worker if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
105*61046927SAndroid Build Coastguard Worker (inst->qpu.alu.mul.op != V3D_QPU_M_FMOV &&
106*61046927SAndroid Build Coastguard Worker inst->qpu.alu.mul.op != V3D_QPU_M_MOV)) {
107*61046927SAndroid Build Coastguard Worker return false;
108*61046927SAndroid Build Coastguard Worker }
109*61046927SAndroid Build Coastguard Worker
110*61046927SAndroid Build Coastguard Worker if (inst->qpu.alu.add.output_pack != V3D_QPU_PACK_NONE ||
111*61046927SAndroid Build Coastguard Worker inst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE) {
112*61046927SAndroid Build Coastguard Worker return false;
113*61046927SAndroid Build Coastguard Worker }
114*61046927SAndroid Build Coastguard Worker
115*61046927SAndroid Build Coastguard Worker if (inst->qpu.alu.add.a.unpack != V3D_QPU_UNPACK_NONE ||
116*61046927SAndroid Build Coastguard Worker inst->qpu.alu.add.b.unpack != V3D_QPU_UNPACK_NONE ||
117*61046927SAndroid Build Coastguard Worker inst->qpu.alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
118*61046927SAndroid Build Coastguard Worker inst->qpu.alu.mul.b.unpack != V3D_QPU_UNPACK_NONE) {
119*61046927SAndroid Build Coastguard Worker return false;
120*61046927SAndroid Build Coastguard Worker }
121*61046927SAndroid Build Coastguard Worker
122*61046927SAndroid Build Coastguard Worker if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
123*61046927SAndroid Build Coastguard Worker inst->qpu.flags.mc != V3D_QPU_COND_NONE)
124*61046927SAndroid Build Coastguard Worker return false;
125*61046927SAndroid Build Coastguard Worker
126*61046927SAndroid Build Coastguard Worker return true;
127*61046927SAndroid Build Coastguard Worker }
128*61046927SAndroid Build Coastguard Worker
129*61046927SAndroid Build Coastguard Worker bool
vir_is_add(struct qinst * inst)130*61046927SAndroid Build Coastguard Worker vir_is_add(struct qinst *inst)
131*61046927SAndroid Build Coastguard Worker {
132*61046927SAndroid Build Coastguard Worker return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
133*61046927SAndroid Build Coastguard Worker inst->qpu.alu.add.op != V3D_QPU_A_NOP);
134*61046927SAndroid Build Coastguard Worker }
135*61046927SAndroid Build Coastguard Worker
136*61046927SAndroid Build Coastguard Worker bool
vir_is_mul(struct qinst * inst)137*61046927SAndroid Build Coastguard Worker vir_is_mul(struct qinst *inst)
138*61046927SAndroid Build Coastguard Worker {
139*61046927SAndroid Build Coastguard Worker return (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
140*61046927SAndroid Build Coastguard Worker inst->qpu.alu.mul.op != V3D_QPU_M_NOP);
141*61046927SAndroid Build Coastguard Worker }
142*61046927SAndroid Build Coastguard Worker
143*61046927SAndroid Build Coastguard Worker bool
vir_is_tex(const struct v3d_device_info * devinfo,struct qinst * inst)144*61046927SAndroid Build Coastguard Worker vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst)
145*61046927SAndroid Build Coastguard Worker {
146*61046927SAndroid Build Coastguard Worker if (inst->dst.file == QFILE_MAGIC)
147*61046927SAndroid Build Coastguard Worker return v3d_qpu_magic_waddr_is_tmu(devinfo, inst->dst.index);
148*61046927SAndroid Build Coastguard Worker
149*61046927SAndroid Build Coastguard Worker if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
150*61046927SAndroid Build Coastguard Worker inst->qpu.alu.add.op == V3D_QPU_A_TMUWT) {
151*61046927SAndroid Build Coastguard Worker return true;
152*61046927SAndroid Build Coastguard Worker }
153*61046927SAndroid Build Coastguard Worker
154*61046927SAndroid Build Coastguard Worker return false;
155*61046927SAndroid Build Coastguard Worker }
156*61046927SAndroid Build Coastguard Worker
157*61046927SAndroid Build Coastguard Worker bool
vir_writes_r4_implicitly(const struct v3d_device_info * devinfo,struct qinst * inst)158*61046927SAndroid Build Coastguard Worker vir_writes_r4_implicitly(const struct v3d_device_info *devinfo,
159*61046927SAndroid Build Coastguard Worker struct qinst *inst)
160*61046927SAndroid Build Coastguard Worker {
161*61046927SAndroid Build Coastguard Worker if (!devinfo->has_accumulators)
162*61046927SAndroid Build Coastguard Worker return false;
163*61046927SAndroid Build Coastguard Worker
164*61046927SAndroid Build Coastguard Worker switch (inst->dst.file) {
165*61046927SAndroid Build Coastguard Worker case QFILE_MAGIC:
166*61046927SAndroid Build Coastguard Worker switch (inst->dst.index) {
167*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_RECIP:
168*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_RSQRT:
169*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_EXP:
170*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_LOG:
171*61046927SAndroid Build Coastguard Worker case V3D_QPU_WADDR_SIN:
172*61046927SAndroid Build Coastguard Worker return true;
173*61046927SAndroid Build Coastguard Worker }
174*61046927SAndroid Build Coastguard Worker break;
175*61046927SAndroid Build Coastguard Worker default:
176*61046927SAndroid Build Coastguard Worker break;
177*61046927SAndroid Build Coastguard Worker }
178*61046927SAndroid Build Coastguard Worker
179*61046927SAndroid Build Coastguard Worker return false;
180*61046927SAndroid Build Coastguard Worker }
181*61046927SAndroid Build Coastguard Worker
182*61046927SAndroid Build Coastguard Worker void
vir_set_unpack(struct qinst * inst,int src,enum v3d_qpu_input_unpack unpack)183*61046927SAndroid Build Coastguard Worker vir_set_unpack(struct qinst *inst, int src,
184*61046927SAndroid Build Coastguard Worker enum v3d_qpu_input_unpack unpack)
185*61046927SAndroid Build Coastguard Worker {
186*61046927SAndroid Build Coastguard Worker assert(src == 0 || src == 1);
187*61046927SAndroid Build Coastguard Worker
188*61046927SAndroid Build Coastguard Worker if (vir_is_add(inst)) {
189*61046927SAndroid Build Coastguard Worker if (src == 0)
190*61046927SAndroid Build Coastguard Worker inst->qpu.alu.add.a.unpack = unpack;
191*61046927SAndroid Build Coastguard Worker else
192*61046927SAndroid Build Coastguard Worker inst->qpu.alu.add.b.unpack = unpack;
193*61046927SAndroid Build Coastguard Worker } else {
194*61046927SAndroid Build Coastguard Worker assert(vir_is_mul(inst));
195*61046927SAndroid Build Coastguard Worker if (src == 0)
196*61046927SAndroid Build Coastguard Worker inst->qpu.alu.mul.a.unpack = unpack;
197*61046927SAndroid Build Coastguard Worker else
198*61046927SAndroid Build Coastguard Worker inst->qpu.alu.mul.b.unpack = unpack;
199*61046927SAndroid Build Coastguard Worker }
200*61046927SAndroid Build Coastguard Worker }
201*61046927SAndroid Build Coastguard Worker
202*61046927SAndroid Build Coastguard Worker void
vir_set_pack(struct qinst * inst,enum v3d_qpu_output_pack pack)203*61046927SAndroid Build Coastguard Worker vir_set_pack(struct qinst *inst, enum v3d_qpu_output_pack pack)
204*61046927SAndroid Build Coastguard Worker {
205*61046927SAndroid Build Coastguard Worker if (vir_is_add(inst)) {
206*61046927SAndroid Build Coastguard Worker inst->qpu.alu.add.output_pack = pack;
207*61046927SAndroid Build Coastguard Worker } else {
208*61046927SAndroid Build Coastguard Worker assert(vir_is_mul(inst));
209*61046927SAndroid Build Coastguard Worker inst->qpu.alu.mul.output_pack = pack;
210*61046927SAndroid Build Coastguard Worker }
211*61046927SAndroid Build Coastguard Worker }
212*61046927SAndroid Build Coastguard Worker
213*61046927SAndroid Build Coastguard Worker void
vir_set_cond(struct qinst * inst,enum v3d_qpu_cond cond)214*61046927SAndroid Build Coastguard Worker vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond)
215*61046927SAndroid Build Coastguard Worker {
216*61046927SAndroid Build Coastguard Worker if (vir_is_add(inst)) {
217*61046927SAndroid Build Coastguard Worker inst->qpu.flags.ac = cond;
218*61046927SAndroid Build Coastguard Worker } else {
219*61046927SAndroid Build Coastguard Worker assert(vir_is_mul(inst));
220*61046927SAndroid Build Coastguard Worker inst->qpu.flags.mc = cond;
221*61046927SAndroid Build Coastguard Worker }
222*61046927SAndroid Build Coastguard Worker }
223*61046927SAndroid Build Coastguard Worker
224*61046927SAndroid Build Coastguard Worker enum v3d_qpu_cond
vir_get_cond(struct qinst * inst)225*61046927SAndroid Build Coastguard Worker vir_get_cond(struct qinst *inst)
226*61046927SAndroid Build Coastguard Worker {
227*61046927SAndroid Build Coastguard Worker assert(inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU);
228*61046927SAndroid Build Coastguard Worker
229*61046927SAndroid Build Coastguard Worker if (vir_is_add(inst))
230*61046927SAndroid Build Coastguard Worker return inst->qpu.flags.ac;
231*61046927SAndroid Build Coastguard Worker else if (vir_is_mul(inst))
232*61046927SAndroid Build Coastguard Worker return inst->qpu.flags.mc;
233*61046927SAndroid Build Coastguard Worker else /* NOP */
234*61046927SAndroid Build Coastguard Worker return V3D_QPU_COND_NONE;
235*61046927SAndroid Build Coastguard Worker }
236*61046927SAndroid Build Coastguard Worker
237*61046927SAndroid Build Coastguard Worker void
vir_set_pf(struct v3d_compile * c,struct qinst * inst,enum v3d_qpu_pf pf)238*61046927SAndroid Build Coastguard Worker vir_set_pf(struct v3d_compile *c, struct qinst *inst, enum v3d_qpu_pf pf)
239*61046927SAndroid Build Coastguard Worker {
240*61046927SAndroid Build Coastguard Worker c->flags_temp = -1;
241*61046927SAndroid Build Coastguard Worker if (vir_is_add(inst)) {
242*61046927SAndroid Build Coastguard Worker inst->qpu.flags.apf = pf;
243*61046927SAndroid Build Coastguard Worker } else {
244*61046927SAndroid Build Coastguard Worker assert(vir_is_mul(inst));
245*61046927SAndroid Build Coastguard Worker inst->qpu.flags.mpf = pf;
246*61046927SAndroid Build Coastguard Worker }
247*61046927SAndroid Build Coastguard Worker }
248*61046927SAndroid Build Coastguard Worker
249*61046927SAndroid Build Coastguard Worker void
vir_set_uf(struct v3d_compile * c,struct qinst * inst,enum v3d_qpu_uf uf)250*61046927SAndroid Build Coastguard Worker vir_set_uf(struct v3d_compile *c, struct qinst *inst, enum v3d_qpu_uf uf)
251*61046927SAndroid Build Coastguard Worker {
252*61046927SAndroid Build Coastguard Worker c->flags_temp = -1;
253*61046927SAndroid Build Coastguard Worker if (vir_is_add(inst)) {
254*61046927SAndroid Build Coastguard Worker inst->qpu.flags.auf = uf;
255*61046927SAndroid Build Coastguard Worker } else {
256*61046927SAndroid Build Coastguard Worker assert(vir_is_mul(inst));
257*61046927SAndroid Build Coastguard Worker inst->qpu.flags.muf = uf;
258*61046927SAndroid Build Coastguard Worker }
259*61046927SAndroid Build Coastguard Worker }
260*61046927SAndroid Build Coastguard Worker
261*61046927SAndroid Build Coastguard Worker #if 0
262*61046927SAndroid Build Coastguard Worker uint8_t
263*61046927SAndroid Build Coastguard Worker vir_channels_written(struct qinst *inst)
264*61046927SAndroid Build Coastguard Worker {
265*61046927SAndroid Build Coastguard Worker if (vir_is_mul(inst)) {
266*61046927SAndroid Build Coastguard Worker switch (inst->dst.pack) {
267*61046927SAndroid Build Coastguard Worker case QPU_PACK_MUL_NOP:
268*61046927SAndroid Build Coastguard Worker case QPU_PACK_MUL_8888:
269*61046927SAndroid Build Coastguard Worker return 0xf;
270*61046927SAndroid Build Coastguard Worker case QPU_PACK_MUL_8A:
271*61046927SAndroid Build Coastguard Worker return 0x1;
272*61046927SAndroid Build Coastguard Worker case QPU_PACK_MUL_8B:
273*61046927SAndroid Build Coastguard Worker return 0x2;
274*61046927SAndroid Build Coastguard Worker case QPU_PACK_MUL_8C:
275*61046927SAndroid Build Coastguard Worker return 0x4;
276*61046927SAndroid Build Coastguard Worker case QPU_PACK_MUL_8D:
277*61046927SAndroid Build Coastguard Worker return 0x8;
278*61046927SAndroid Build Coastguard Worker }
279*61046927SAndroid Build Coastguard Worker } else {
280*61046927SAndroid Build Coastguard Worker switch (inst->dst.pack) {
281*61046927SAndroid Build Coastguard Worker case QPU_PACK_A_NOP:
282*61046927SAndroid Build Coastguard Worker case QPU_PACK_A_8888:
283*61046927SAndroid Build Coastguard Worker case QPU_PACK_A_8888_SAT:
284*61046927SAndroid Build Coastguard Worker case QPU_PACK_A_32_SAT:
285*61046927SAndroid Build Coastguard Worker return 0xf;
286*61046927SAndroid Build Coastguard Worker case QPU_PACK_A_8A:
287*61046927SAndroid Build Coastguard Worker case QPU_PACK_A_8A_SAT:
288*61046927SAndroid Build Coastguard Worker return 0x1;
289*61046927SAndroid Build Coastguard Worker case QPU_PACK_A_8B:
290*61046927SAndroid Build Coastguard Worker case QPU_PACK_A_8B_SAT:
291*61046927SAndroid Build Coastguard Worker return 0x2;
292*61046927SAndroid Build Coastguard Worker case QPU_PACK_A_8C:
293*61046927SAndroid Build Coastguard Worker case QPU_PACK_A_8C_SAT:
294*61046927SAndroid Build Coastguard Worker return 0x4;
295*61046927SAndroid Build Coastguard Worker case QPU_PACK_A_8D:
296*61046927SAndroid Build Coastguard Worker case QPU_PACK_A_8D_SAT:
297*61046927SAndroid Build Coastguard Worker return 0x8;
298*61046927SAndroid Build Coastguard Worker case QPU_PACK_A_16A:
299*61046927SAndroid Build Coastguard Worker case QPU_PACK_A_16A_SAT:
300*61046927SAndroid Build Coastguard Worker return 0x3;
301*61046927SAndroid Build Coastguard Worker case QPU_PACK_A_16B:
302*61046927SAndroid Build Coastguard Worker case QPU_PACK_A_16B_SAT:
303*61046927SAndroid Build Coastguard Worker return 0xc;
304*61046927SAndroid Build Coastguard Worker }
305*61046927SAndroid Build Coastguard Worker }
306*61046927SAndroid Build Coastguard Worker unreachable("Bad pack field");
307*61046927SAndroid Build Coastguard Worker }
308*61046927SAndroid Build Coastguard Worker #endif
309*61046927SAndroid Build Coastguard Worker
310*61046927SAndroid Build Coastguard Worker struct qreg
vir_get_temp(struct v3d_compile * c)311*61046927SAndroid Build Coastguard Worker vir_get_temp(struct v3d_compile *c)
312*61046927SAndroid Build Coastguard Worker {
313*61046927SAndroid Build Coastguard Worker struct qreg reg;
314*61046927SAndroid Build Coastguard Worker
315*61046927SAndroid Build Coastguard Worker reg.file = QFILE_TEMP;
316*61046927SAndroid Build Coastguard Worker reg.index = c->num_temps++;
317*61046927SAndroid Build Coastguard Worker
318*61046927SAndroid Build Coastguard Worker if (c->num_temps > c->defs_array_size) {
319*61046927SAndroid Build Coastguard Worker uint32_t old_size = c->defs_array_size;
320*61046927SAndroid Build Coastguard Worker c->defs_array_size = MAX2(old_size * 2, 16);
321*61046927SAndroid Build Coastguard Worker
322*61046927SAndroid Build Coastguard Worker c->defs = reralloc(c, c->defs, struct qinst *,
323*61046927SAndroid Build Coastguard Worker c->defs_array_size);
324*61046927SAndroid Build Coastguard Worker memset(&c->defs[old_size], 0,
325*61046927SAndroid Build Coastguard Worker sizeof(c->defs[0]) * (c->defs_array_size - old_size));
326*61046927SAndroid Build Coastguard Worker
327*61046927SAndroid Build Coastguard Worker c->spillable = reralloc(c, c->spillable,
328*61046927SAndroid Build Coastguard Worker BITSET_WORD,
329*61046927SAndroid Build Coastguard Worker BITSET_WORDS(c->defs_array_size));
330*61046927SAndroid Build Coastguard Worker for (int i = old_size; i < c->defs_array_size; i++)
331*61046927SAndroid Build Coastguard Worker BITSET_SET(c->spillable, i);
332*61046927SAndroid Build Coastguard Worker }
333*61046927SAndroid Build Coastguard Worker
334*61046927SAndroid Build Coastguard Worker return reg;
335*61046927SAndroid Build Coastguard Worker }
336*61046927SAndroid Build Coastguard Worker
337*61046927SAndroid Build Coastguard Worker struct qinst *
vir_add_inst(enum v3d_qpu_add_op op,struct qreg dst,struct qreg src0,struct qreg src1)338*61046927SAndroid Build Coastguard Worker vir_add_inst(enum v3d_qpu_add_op op, struct qreg dst, struct qreg src0, struct qreg src1)
339*61046927SAndroid Build Coastguard Worker {
340*61046927SAndroid Build Coastguard Worker struct qinst *inst = calloc(1, sizeof(*inst));
341*61046927SAndroid Build Coastguard Worker
342*61046927SAndroid Build Coastguard Worker inst->qpu = v3d_qpu_nop();
343*61046927SAndroid Build Coastguard Worker inst->qpu.alu.add.op = op;
344*61046927SAndroid Build Coastguard Worker
345*61046927SAndroid Build Coastguard Worker inst->dst = dst;
346*61046927SAndroid Build Coastguard Worker inst->src[0] = src0;
347*61046927SAndroid Build Coastguard Worker inst->src[1] = src1;
348*61046927SAndroid Build Coastguard Worker inst->uniform = ~0;
349*61046927SAndroid Build Coastguard Worker
350*61046927SAndroid Build Coastguard Worker inst->ip = -1;
351*61046927SAndroid Build Coastguard Worker
352*61046927SAndroid Build Coastguard Worker return inst;
353*61046927SAndroid Build Coastguard Worker }
354*61046927SAndroid Build Coastguard Worker
355*61046927SAndroid Build Coastguard Worker struct qinst *
vir_mul_inst(enum v3d_qpu_mul_op op,struct qreg dst,struct qreg src0,struct qreg src1)356*61046927SAndroid Build Coastguard Worker vir_mul_inst(enum v3d_qpu_mul_op op, struct qreg dst, struct qreg src0, struct qreg src1)
357*61046927SAndroid Build Coastguard Worker {
358*61046927SAndroid Build Coastguard Worker struct qinst *inst = calloc(1, sizeof(*inst));
359*61046927SAndroid Build Coastguard Worker
360*61046927SAndroid Build Coastguard Worker inst->qpu = v3d_qpu_nop();
361*61046927SAndroid Build Coastguard Worker inst->qpu.alu.mul.op = op;
362*61046927SAndroid Build Coastguard Worker
363*61046927SAndroid Build Coastguard Worker inst->dst = dst;
364*61046927SAndroid Build Coastguard Worker inst->src[0] = src0;
365*61046927SAndroid Build Coastguard Worker inst->src[1] = src1;
366*61046927SAndroid Build Coastguard Worker inst->uniform = ~0;
367*61046927SAndroid Build Coastguard Worker
368*61046927SAndroid Build Coastguard Worker inst->ip = -1;
369*61046927SAndroid Build Coastguard Worker
370*61046927SAndroid Build Coastguard Worker return inst;
371*61046927SAndroid Build Coastguard Worker }
372*61046927SAndroid Build Coastguard Worker
373*61046927SAndroid Build Coastguard Worker struct qinst *
vir_branch_inst(struct v3d_compile * c,enum v3d_qpu_branch_cond cond)374*61046927SAndroid Build Coastguard Worker vir_branch_inst(struct v3d_compile *c, enum v3d_qpu_branch_cond cond)
375*61046927SAndroid Build Coastguard Worker {
376*61046927SAndroid Build Coastguard Worker struct qinst *inst = calloc(1, sizeof(*inst));
377*61046927SAndroid Build Coastguard Worker
378*61046927SAndroid Build Coastguard Worker inst->qpu = v3d_qpu_nop();
379*61046927SAndroid Build Coastguard Worker inst->qpu.type = V3D_QPU_INSTR_TYPE_BRANCH;
380*61046927SAndroid Build Coastguard Worker inst->qpu.branch.cond = cond;
381*61046927SAndroid Build Coastguard Worker inst->qpu.branch.msfign = V3D_QPU_MSFIGN_NONE;
382*61046927SAndroid Build Coastguard Worker inst->qpu.branch.bdi = V3D_QPU_BRANCH_DEST_REL;
383*61046927SAndroid Build Coastguard Worker inst->qpu.branch.ub = true;
384*61046927SAndroid Build Coastguard Worker inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL;
385*61046927SAndroid Build Coastguard Worker
386*61046927SAndroid Build Coastguard Worker inst->dst = vir_nop_reg();
387*61046927SAndroid Build Coastguard Worker inst->uniform = vir_get_uniform_index(c, QUNIFORM_CONSTANT, 0);
388*61046927SAndroid Build Coastguard Worker
389*61046927SAndroid Build Coastguard Worker inst->ip = -1;
390*61046927SAndroid Build Coastguard Worker
391*61046927SAndroid Build Coastguard Worker return inst;
392*61046927SAndroid Build Coastguard Worker }
393*61046927SAndroid Build Coastguard Worker
394*61046927SAndroid Build Coastguard Worker static void
vir_emit(struct v3d_compile * c,struct qinst * inst)395*61046927SAndroid Build Coastguard Worker vir_emit(struct v3d_compile *c, struct qinst *inst)
396*61046927SAndroid Build Coastguard Worker {
397*61046927SAndroid Build Coastguard Worker inst->ip = -1;
398*61046927SAndroid Build Coastguard Worker
399*61046927SAndroid Build Coastguard Worker switch (c->cursor.mode) {
400*61046927SAndroid Build Coastguard Worker case vir_cursor_add:
401*61046927SAndroid Build Coastguard Worker list_add(&inst->link, c->cursor.link);
402*61046927SAndroid Build Coastguard Worker break;
403*61046927SAndroid Build Coastguard Worker case vir_cursor_addtail:
404*61046927SAndroid Build Coastguard Worker list_addtail(&inst->link, c->cursor.link);
405*61046927SAndroid Build Coastguard Worker break;
406*61046927SAndroid Build Coastguard Worker }
407*61046927SAndroid Build Coastguard Worker
408*61046927SAndroid Build Coastguard Worker c->cursor = vir_after_inst(inst);
409*61046927SAndroid Build Coastguard Worker c->live_intervals_valid = false;
410*61046927SAndroid Build Coastguard Worker }
411*61046927SAndroid Build Coastguard Worker
412*61046927SAndroid Build Coastguard Worker /* Updates inst to write to a new temporary, emits it, and notes the def. */
413*61046927SAndroid Build Coastguard Worker struct qreg
vir_emit_def(struct v3d_compile * c,struct qinst * inst)414*61046927SAndroid Build Coastguard Worker vir_emit_def(struct v3d_compile *c, struct qinst *inst)
415*61046927SAndroid Build Coastguard Worker {
416*61046927SAndroid Build Coastguard Worker assert(inst->dst.file == QFILE_NULL);
417*61046927SAndroid Build Coastguard Worker
418*61046927SAndroid Build Coastguard Worker /* If we're emitting an instruction that's a def, it had better be
419*61046927SAndroid Build Coastguard Worker * writing a register.
420*61046927SAndroid Build Coastguard Worker */
421*61046927SAndroid Build Coastguard Worker if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
422*61046927SAndroid Build Coastguard Worker assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP ||
423*61046927SAndroid Build Coastguard Worker v3d_qpu_add_op_has_dst(inst->qpu.alu.add.op));
424*61046927SAndroid Build Coastguard Worker assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP ||
425*61046927SAndroid Build Coastguard Worker v3d_qpu_mul_op_has_dst(inst->qpu.alu.mul.op));
426*61046927SAndroid Build Coastguard Worker }
427*61046927SAndroid Build Coastguard Worker
428*61046927SAndroid Build Coastguard Worker inst->dst = vir_get_temp(c);
429*61046927SAndroid Build Coastguard Worker
430*61046927SAndroid Build Coastguard Worker if (inst->dst.file == QFILE_TEMP)
431*61046927SAndroid Build Coastguard Worker c->defs[inst->dst.index] = inst;
432*61046927SAndroid Build Coastguard Worker
433*61046927SAndroid Build Coastguard Worker vir_emit(c, inst);
434*61046927SAndroid Build Coastguard Worker
435*61046927SAndroid Build Coastguard Worker return inst->dst;
436*61046927SAndroid Build Coastguard Worker }
437*61046927SAndroid Build Coastguard Worker
438*61046927SAndroid Build Coastguard Worker struct qinst *
vir_emit_nondef(struct v3d_compile * c,struct qinst * inst)439*61046927SAndroid Build Coastguard Worker vir_emit_nondef(struct v3d_compile *c, struct qinst *inst)
440*61046927SAndroid Build Coastguard Worker {
441*61046927SAndroid Build Coastguard Worker if (inst->dst.file == QFILE_TEMP)
442*61046927SAndroid Build Coastguard Worker c->defs[inst->dst.index] = NULL;
443*61046927SAndroid Build Coastguard Worker
444*61046927SAndroid Build Coastguard Worker vir_emit(c, inst);
445*61046927SAndroid Build Coastguard Worker
446*61046927SAndroid Build Coastguard Worker return inst;
447*61046927SAndroid Build Coastguard Worker }
448*61046927SAndroid Build Coastguard Worker
449*61046927SAndroid Build Coastguard Worker struct qblock *
vir_new_block(struct v3d_compile * c)450*61046927SAndroid Build Coastguard Worker vir_new_block(struct v3d_compile *c)
451*61046927SAndroid Build Coastguard Worker {
452*61046927SAndroid Build Coastguard Worker struct qblock *block = rzalloc(c, struct qblock);
453*61046927SAndroid Build Coastguard Worker
454*61046927SAndroid Build Coastguard Worker list_inithead(&block->instructions);
455*61046927SAndroid Build Coastguard Worker
456*61046927SAndroid Build Coastguard Worker block->predecessors = _mesa_set_create(block,
457*61046927SAndroid Build Coastguard Worker _mesa_hash_pointer,
458*61046927SAndroid Build Coastguard Worker _mesa_key_pointer_equal);
459*61046927SAndroid Build Coastguard Worker
460*61046927SAndroid Build Coastguard Worker block->index = c->next_block_index++;
461*61046927SAndroid Build Coastguard Worker
462*61046927SAndroid Build Coastguard Worker return block;
463*61046927SAndroid Build Coastguard Worker }
464*61046927SAndroid Build Coastguard Worker
465*61046927SAndroid Build Coastguard Worker void
vir_set_emit_block(struct v3d_compile * c,struct qblock * block)466*61046927SAndroid Build Coastguard Worker vir_set_emit_block(struct v3d_compile *c, struct qblock *block)
467*61046927SAndroid Build Coastguard Worker {
468*61046927SAndroid Build Coastguard Worker c->cur_block = block;
469*61046927SAndroid Build Coastguard Worker c->cursor = vir_after_block(block);
470*61046927SAndroid Build Coastguard Worker list_addtail(&block->link, &c->blocks);
471*61046927SAndroid Build Coastguard Worker }
472*61046927SAndroid Build Coastguard Worker
473*61046927SAndroid Build Coastguard Worker struct qblock *
vir_entry_block(struct v3d_compile * c)474*61046927SAndroid Build Coastguard Worker vir_entry_block(struct v3d_compile *c)
475*61046927SAndroid Build Coastguard Worker {
476*61046927SAndroid Build Coastguard Worker return list_first_entry(&c->blocks, struct qblock, link);
477*61046927SAndroid Build Coastguard Worker }
478*61046927SAndroid Build Coastguard Worker
479*61046927SAndroid Build Coastguard Worker struct qblock *
vir_exit_block(struct v3d_compile * c)480*61046927SAndroid Build Coastguard Worker vir_exit_block(struct v3d_compile *c)
481*61046927SAndroid Build Coastguard Worker {
482*61046927SAndroid Build Coastguard Worker return list_last_entry(&c->blocks, struct qblock, link);
483*61046927SAndroid Build Coastguard Worker }
484*61046927SAndroid Build Coastguard Worker
485*61046927SAndroid Build Coastguard Worker void
vir_link_blocks(struct qblock * predecessor,struct qblock * successor)486*61046927SAndroid Build Coastguard Worker vir_link_blocks(struct qblock *predecessor, struct qblock *successor)
487*61046927SAndroid Build Coastguard Worker {
488*61046927SAndroid Build Coastguard Worker _mesa_set_add(successor->predecessors, predecessor);
489*61046927SAndroid Build Coastguard Worker if (predecessor->successors[0]) {
490*61046927SAndroid Build Coastguard Worker assert(!predecessor->successors[1]);
491*61046927SAndroid Build Coastguard Worker predecessor->successors[1] = successor;
492*61046927SAndroid Build Coastguard Worker } else {
493*61046927SAndroid Build Coastguard Worker predecessor->successors[0] = successor;
494*61046927SAndroid Build Coastguard Worker }
495*61046927SAndroid Build Coastguard Worker }
496*61046927SAndroid Build Coastguard Worker
497*61046927SAndroid Build Coastguard Worker const struct v3d_compiler *
v3d_compiler_init(const struct v3d_device_info * devinfo,uint32_t max_inline_uniform_buffers)498*61046927SAndroid Build Coastguard Worker v3d_compiler_init(const struct v3d_device_info *devinfo,
499*61046927SAndroid Build Coastguard Worker uint32_t max_inline_uniform_buffers)
500*61046927SAndroid Build Coastguard Worker {
501*61046927SAndroid Build Coastguard Worker struct v3d_compiler *compiler = rzalloc(NULL, struct v3d_compiler);
502*61046927SAndroid Build Coastguard Worker if (!compiler)
503*61046927SAndroid Build Coastguard Worker return NULL;
504*61046927SAndroid Build Coastguard Worker
505*61046927SAndroid Build Coastguard Worker compiler->devinfo = devinfo;
506*61046927SAndroid Build Coastguard Worker compiler->max_inline_uniform_buffers = max_inline_uniform_buffers;
507*61046927SAndroid Build Coastguard Worker
508*61046927SAndroid Build Coastguard Worker if (!vir_init_reg_sets(compiler)) {
509*61046927SAndroid Build Coastguard Worker ralloc_free(compiler);
510*61046927SAndroid Build Coastguard Worker return NULL;
511*61046927SAndroid Build Coastguard Worker }
512*61046927SAndroid Build Coastguard Worker
513*61046927SAndroid Build Coastguard Worker return compiler;
514*61046927SAndroid Build Coastguard Worker }
515*61046927SAndroid Build Coastguard Worker
516*61046927SAndroid Build Coastguard Worker void
v3d_compiler_free(const struct v3d_compiler * compiler)517*61046927SAndroid Build Coastguard Worker v3d_compiler_free(const struct v3d_compiler *compiler)
518*61046927SAndroid Build Coastguard Worker {
519*61046927SAndroid Build Coastguard Worker ralloc_free((void *)compiler);
520*61046927SAndroid Build Coastguard Worker }
521*61046927SAndroid Build Coastguard Worker
522*61046927SAndroid Build Coastguard Worker struct v3d_compiler_strategy {
523*61046927SAndroid Build Coastguard Worker const char *name;
524*61046927SAndroid Build Coastguard Worker uint32_t max_threads;
525*61046927SAndroid Build Coastguard Worker uint32_t min_threads;
526*61046927SAndroid Build Coastguard Worker bool disable_general_tmu_sched;
527*61046927SAndroid Build Coastguard Worker bool disable_gcm;
528*61046927SAndroid Build Coastguard Worker bool disable_loop_unrolling;
529*61046927SAndroid Build Coastguard Worker bool disable_ubo_load_sorting;
530*61046927SAndroid Build Coastguard Worker bool move_buffer_loads;
531*61046927SAndroid Build Coastguard Worker bool disable_tmu_pipelining;
532*61046927SAndroid Build Coastguard Worker uint32_t max_tmu_spills;
533*61046927SAndroid Build Coastguard Worker };
534*61046927SAndroid Build Coastguard Worker
535*61046927SAndroid Build Coastguard Worker static struct v3d_compile *
vir_compile_init(const struct v3d_compiler * compiler,struct v3d_key * key,nir_shader * s,void (* debug_output)(const char * msg,void * debug_output_data),void * debug_output_data,int program_id,int variant_id,uint32_t compile_strategy_idx,const struct v3d_compiler_strategy * strategy,bool fallback_scheduler)536*61046927SAndroid Build Coastguard Worker vir_compile_init(const struct v3d_compiler *compiler,
537*61046927SAndroid Build Coastguard Worker struct v3d_key *key,
538*61046927SAndroid Build Coastguard Worker nir_shader *s,
539*61046927SAndroid Build Coastguard Worker void (*debug_output)(const char *msg,
540*61046927SAndroid Build Coastguard Worker void *debug_output_data),
541*61046927SAndroid Build Coastguard Worker void *debug_output_data,
542*61046927SAndroid Build Coastguard Worker int program_id, int variant_id,
543*61046927SAndroid Build Coastguard Worker uint32_t compile_strategy_idx,
544*61046927SAndroid Build Coastguard Worker const struct v3d_compiler_strategy *strategy,
545*61046927SAndroid Build Coastguard Worker bool fallback_scheduler)
546*61046927SAndroid Build Coastguard Worker {
547*61046927SAndroid Build Coastguard Worker struct v3d_compile *c = rzalloc(NULL, struct v3d_compile);
548*61046927SAndroid Build Coastguard Worker
549*61046927SAndroid Build Coastguard Worker c->compiler = compiler;
550*61046927SAndroid Build Coastguard Worker c->devinfo = compiler->devinfo;
551*61046927SAndroid Build Coastguard Worker c->key = key;
552*61046927SAndroid Build Coastguard Worker c->program_id = program_id;
553*61046927SAndroid Build Coastguard Worker c->variant_id = variant_id;
554*61046927SAndroid Build Coastguard Worker c->compile_strategy_idx = compile_strategy_idx;
555*61046927SAndroid Build Coastguard Worker c->threads = strategy->max_threads;
556*61046927SAndroid Build Coastguard Worker c->debug_output = debug_output;
557*61046927SAndroid Build Coastguard Worker c->debug_output_data = debug_output_data;
558*61046927SAndroid Build Coastguard Worker c->compilation_result = V3D_COMPILATION_SUCCEEDED;
559*61046927SAndroid Build Coastguard Worker c->min_threads_for_reg_alloc = strategy->min_threads;
560*61046927SAndroid Build Coastguard Worker c->max_tmu_spills = strategy->max_tmu_spills;
561*61046927SAndroid Build Coastguard Worker c->fallback_scheduler = fallback_scheduler;
562*61046927SAndroid Build Coastguard Worker c->disable_general_tmu_sched = strategy->disable_general_tmu_sched;
563*61046927SAndroid Build Coastguard Worker c->disable_tmu_pipelining = strategy->disable_tmu_pipelining;
564*61046927SAndroid Build Coastguard Worker c->disable_constant_ubo_load_sorting = strategy->disable_ubo_load_sorting;
565*61046927SAndroid Build Coastguard Worker c->move_buffer_loads = strategy->move_buffer_loads;
566*61046927SAndroid Build Coastguard Worker c->disable_gcm = strategy->disable_gcm;
567*61046927SAndroid Build Coastguard Worker c->disable_loop_unrolling = V3D_DBG(NO_LOOP_UNROLL)
568*61046927SAndroid Build Coastguard Worker ? true : strategy->disable_loop_unrolling;
569*61046927SAndroid Build Coastguard Worker
570*61046927SAndroid Build Coastguard Worker
571*61046927SAndroid Build Coastguard Worker s = nir_shader_clone(c, s);
572*61046927SAndroid Build Coastguard Worker c->s = s;
573*61046927SAndroid Build Coastguard Worker
574*61046927SAndroid Build Coastguard Worker list_inithead(&c->blocks);
575*61046927SAndroid Build Coastguard Worker vir_set_emit_block(c, vir_new_block(c));
576*61046927SAndroid Build Coastguard Worker
577*61046927SAndroid Build Coastguard Worker c->output_position_index = -1;
578*61046927SAndroid Build Coastguard Worker c->output_sample_mask_index = -1;
579*61046927SAndroid Build Coastguard Worker
580*61046927SAndroid Build Coastguard Worker c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer,
581*61046927SAndroid Build Coastguard Worker _mesa_key_pointer_equal);
582*61046927SAndroid Build Coastguard Worker
583*61046927SAndroid Build Coastguard Worker c->tmu.outstanding_regs = _mesa_pointer_set_create(c);
584*61046927SAndroid Build Coastguard Worker c->flags_temp = -1;
585*61046927SAndroid Build Coastguard Worker
586*61046927SAndroid Build Coastguard Worker return c;
587*61046927SAndroid Build Coastguard Worker }
588*61046927SAndroid Build Coastguard Worker
589*61046927SAndroid Build Coastguard Worker static int
type_size_vec4(const struct glsl_type * type,bool bindless)590*61046927SAndroid Build Coastguard Worker type_size_vec4(const struct glsl_type *type, bool bindless)
591*61046927SAndroid Build Coastguard Worker {
592*61046927SAndroid Build Coastguard Worker return glsl_count_attribute_slots(type, false);
593*61046927SAndroid Build Coastguard Worker }
594*61046927SAndroid Build Coastguard Worker
595*61046927SAndroid Build Coastguard Worker static enum nir_lower_tex_packing
lower_tex_packing_cb(const nir_tex_instr * tex,const void * data)596*61046927SAndroid Build Coastguard Worker lower_tex_packing_cb(const nir_tex_instr *tex, const void *data)
597*61046927SAndroid Build Coastguard Worker {
598*61046927SAndroid Build Coastguard Worker struct v3d_compile *c = (struct v3d_compile *) data;
599*61046927SAndroid Build Coastguard Worker
600*61046927SAndroid Build Coastguard Worker int sampler_index = nir_tex_instr_need_sampler(tex) ?
601*61046927SAndroid Build Coastguard Worker tex->sampler_index : tex->backend_flags;
602*61046927SAndroid Build Coastguard Worker
603*61046927SAndroid Build Coastguard Worker assert(sampler_index < c->key->num_samplers_used);
604*61046927SAndroid Build Coastguard Worker return c->key->sampler[sampler_index].return_size == 16 ?
605*61046927SAndroid Build Coastguard Worker nir_lower_tex_packing_16 : nir_lower_tex_packing_none;
606*61046927SAndroid Build Coastguard Worker }
607*61046927SAndroid Build Coastguard Worker
608*61046927SAndroid Build Coastguard Worker static bool
v3d_nir_lower_null_pointers_cb(nir_builder * b,nir_intrinsic_instr * intr,void * _state)609*61046927SAndroid Build Coastguard Worker v3d_nir_lower_null_pointers_cb(nir_builder *b,
610*61046927SAndroid Build Coastguard Worker nir_intrinsic_instr *intr,
611*61046927SAndroid Build Coastguard Worker void *_state)
612*61046927SAndroid Build Coastguard Worker {
613*61046927SAndroid Build Coastguard Worker uint32_t buffer_src_idx;
614*61046927SAndroid Build Coastguard Worker
615*61046927SAndroid Build Coastguard Worker switch (intr->intrinsic) {
616*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_ubo:
617*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_ssbo:
618*61046927SAndroid Build Coastguard Worker buffer_src_idx = 0;
619*61046927SAndroid Build Coastguard Worker break;
620*61046927SAndroid Build Coastguard Worker case nir_intrinsic_store_ssbo:
621*61046927SAndroid Build Coastguard Worker buffer_src_idx = 1;
622*61046927SAndroid Build Coastguard Worker break;
623*61046927SAndroid Build Coastguard Worker default:
624*61046927SAndroid Build Coastguard Worker return false;
625*61046927SAndroid Build Coastguard Worker }
626*61046927SAndroid Build Coastguard Worker
627*61046927SAndroid Build Coastguard Worker /* If index if constant we are good */
628*61046927SAndroid Build Coastguard Worker nir_src *src = &intr->src[buffer_src_idx];
629*61046927SAndroid Build Coastguard Worker if (nir_src_is_const(*src))
630*61046927SAndroid Build Coastguard Worker return false;
631*61046927SAndroid Build Coastguard Worker
632*61046927SAndroid Build Coastguard Worker /* Otherwise, see if it comes from a bcsel including a null pointer */
633*61046927SAndroid Build Coastguard Worker if (src->ssa->parent_instr->type != nir_instr_type_alu)
634*61046927SAndroid Build Coastguard Worker return false;
635*61046927SAndroid Build Coastguard Worker
636*61046927SAndroid Build Coastguard Worker nir_alu_instr *alu = nir_instr_as_alu(src->ssa->parent_instr);
637*61046927SAndroid Build Coastguard Worker if (alu->op != nir_op_bcsel)
638*61046927SAndroid Build Coastguard Worker return false;
639*61046927SAndroid Build Coastguard Worker
640*61046927SAndroid Build Coastguard Worker /* A null pointer is specified using block index 0xffffffff */
641*61046927SAndroid Build Coastguard Worker int32_t null_src_idx = -1;
642*61046927SAndroid Build Coastguard Worker for (int i = 1; i < 3; i++) {
643*61046927SAndroid Build Coastguard Worker /* FIXME: since we are running this before optimization maybe
644*61046927SAndroid Build Coastguard Worker * we need to also handle the case where we may have bcsel
645*61046927SAndroid Build Coastguard Worker * chain that we need to recurse?
646*61046927SAndroid Build Coastguard Worker */
647*61046927SAndroid Build Coastguard Worker if (!nir_src_is_const(alu->src[i].src))
648*61046927SAndroid Build Coastguard Worker continue;
649*61046927SAndroid Build Coastguard Worker if (nir_src_comp_as_uint(alu->src[i].src, 0) != 0xffffffff)
650*61046927SAndroid Build Coastguard Worker continue;
651*61046927SAndroid Build Coastguard Worker
652*61046927SAndroid Build Coastguard Worker /* One of the bcsel srcs is a null pointer reference */
653*61046927SAndroid Build Coastguard Worker null_src_idx = i;
654*61046927SAndroid Build Coastguard Worker break;
655*61046927SAndroid Build Coastguard Worker }
656*61046927SAndroid Build Coastguard Worker
657*61046927SAndroid Build Coastguard Worker if (null_src_idx < 0)
658*61046927SAndroid Build Coastguard Worker return false;
659*61046927SAndroid Build Coastguard Worker
660*61046927SAndroid Build Coastguard Worker assert(null_src_idx == 1 || null_src_idx == 2);
661*61046927SAndroid Build Coastguard Worker int32_t copy_src_idx = null_src_idx == 1 ? 2 : 1;
662*61046927SAndroid Build Coastguard Worker
663*61046927SAndroid Build Coastguard Worker /* Rewrite the null pointer reference so we use the same buffer index
664*61046927SAndroid Build Coastguard Worker * as the other bcsel branch. This will allow optimization to remove
665*61046927SAndroid Build Coastguard Worker * the bcsel and we should then end up with a constant buffer index
666*61046927SAndroid Build Coastguard Worker * like we need.
667*61046927SAndroid Build Coastguard Worker */
668*61046927SAndroid Build Coastguard Worker b->cursor = nir_before_instr(&alu->instr);
669*61046927SAndroid Build Coastguard Worker nir_def *copy = nir_mov(b, alu->src[copy_src_idx].src.ssa);
670*61046927SAndroid Build Coastguard Worker nir_src_rewrite(&alu->src[null_src_idx].src, copy);
671*61046927SAndroid Build Coastguard Worker
672*61046927SAndroid Build Coastguard Worker return true;
673*61046927SAndroid Build Coastguard Worker }
674*61046927SAndroid Build Coastguard Worker
675*61046927SAndroid Build Coastguard Worker static bool
v3d_nir_lower_null_pointers(nir_shader * s)676*61046927SAndroid Build Coastguard Worker v3d_nir_lower_null_pointers(nir_shader *s)
677*61046927SAndroid Build Coastguard Worker {
678*61046927SAndroid Build Coastguard Worker return nir_shader_intrinsics_pass(s, v3d_nir_lower_null_pointers_cb,
679*61046927SAndroid Build Coastguard Worker nir_metadata_control_flow, NULL);
680*61046927SAndroid Build Coastguard Worker }
681*61046927SAndroid Build Coastguard Worker
682*61046927SAndroid Build Coastguard Worker static unsigned
lower_bit_size_cb(const nir_instr * instr,void * _data)683*61046927SAndroid Build Coastguard Worker lower_bit_size_cb(const nir_instr *instr, void *_data)
684*61046927SAndroid Build Coastguard Worker {
685*61046927SAndroid Build Coastguard Worker if (instr->type != nir_instr_type_alu)
686*61046927SAndroid Build Coastguard Worker return 0;
687*61046927SAndroid Build Coastguard Worker
688*61046927SAndroid Build Coastguard Worker nir_alu_instr *alu = nir_instr_as_alu(instr);
689*61046927SAndroid Build Coastguard Worker
690*61046927SAndroid Build Coastguard Worker switch (alu->op) {
691*61046927SAndroid Build Coastguard Worker case nir_op_mov:
692*61046927SAndroid Build Coastguard Worker case nir_op_vec2:
693*61046927SAndroid Build Coastguard Worker case nir_op_vec3:
694*61046927SAndroid Build Coastguard Worker case nir_op_vec4:
695*61046927SAndroid Build Coastguard Worker case nir_op_vec5:
696*61046927SAndroid Build Coastguard Worker case nir_op_vec8:
697*61046927SAndroid Build Coastguard Worker case nir_op_vec16:
698*61046927SAndroid Build Coastguard Worker case nir_op_b2i8:
699*61046927SAndroid Build Coastguard Worker case nir_op_b2f16:
700*61046927SAndroid Build Coastguard Worker case nir_op_b2i16:
701*61046927SAndroid Build Coastguard Worker case nir_op_b2f32:
702*61046927SAndroid Build Coastguard Worker case nir_op_b2i32:
703*61046927SAndroid Build Coastguard Worker case nir_op_f2f16:
704*61046927SAndroid Build Coastguard Worker case nir_op_f2f16_rtne:
705*61046927SAndroid Build Coastguard Worker case nir_op_f2f16_rtz:
706*61046927SAndroid Build Coastguard Worker case nir_op_f2f32:
707*61046927SAndroid Build Coastguard Worker case nir_op_f2i32:
708*61046927SAndroid Build Coastguard Worker case nir_op_f2u32:
709*61046927SAndroid Build Coastguard Worker case nir_op_i2i8:
710*61046927SAndroid Build Coastguard Worker case nir_op_i2i16:
711*61046927SAndroid Build Coastguard Worker case nir_op_i2f16:
712*61046927SAndroid Build Coastguard Worker case nir_op_i2f32:
713*61046927SAndroid Build Coastguard Worker case nir_op_i2i32:
714*61046927SAndroid Build Coastguard Worker case nir_op_u2u8:
715*61046927SAndroid Build Coastguard Worker case nir_op_u2u16:
716*61046927SAndroid Build Coastguard Worker case nir_op_u2f16:
717*61046927SAndroid Build Coastguard Worker case nir_op_u2f32:
718*61046927SAndroid Build Coastguard Worker case nir_op_u2u32:
719*61046927SAndroid Build Coastguard Worker case nir_op_pack_32_2x16_split:
720*61046927SAndroid Build Coastguard Worker case nir_op_pack_32_4x8_split:
721*61046927SAndroid Build Coastguard Worker case nir_op_pack_half_2x16_split:
722*61046927SAndroid Build Coastguard Worker return 0;
723*61046927SAndroid Build Coastguard Worker
724*61046927SAndroid Build Coastguard Worker /* we need to handle those here as they only work with 32 bits */
725*61046927SAndroid Build Coastguard Worker default:
726*61046927SAndroid Build Coastguard Worker if (alu->src[0].src.ssa->bit_size != 1 && alu->src[0].src.ssa->bit_size < 32)
727*61046927SAndroid Build Coastguard Worker return 32;
728*61046927SAndroid Build Coastguard Worker return 0;
729*61046927SAndroid Build Coastguard Worker }
730*61046927SAndroid Build Coastguard Worker }
731*61046927SAndroid Build Coastguard Worker
732*61046927SAndroid Build Coastguard Worker static void
v3d_lower_nir(struct v3d_compile * c)733*61046927SAndroid Build Coastguard Worker v3d_lower_nir(struct v3d_compile *c)
734*61046927SAndroid Build Coastguard Worker {
735*61046927SAndroid Build Coastguard Worker struct nir_lower_tex_options tex_options = {
736*61046927SAndroid Build Coastguard Worker .lower_txd = true,
737*61046927SAndroid Build Coastguard Worker .lower_tg4_offsets = true,
738*61046927SAndroid Build Coastguard Worker .lower_tg4_broadcom_swizzle = true,
739*61046927SAndroid Build Coastguard Worker
740*61046927SAndroid Build Coastguard Worker .lower_rect = false, /* XXX: Use this on V3D 3.x */
741*61046927SAndroid Build Coastguard Worker .lower_txp = ~0,
742*61046927SAndroid Build Coastguard Worker /* Apply swizzles to all samplers. */
743*61046927SAndroid Build Coastguard Worker .swizzle_result = ~0,
744*61046927SAndroid Build Coastguard Worker .lower_invalid_implicit_lod = true,
745*61046927SAndroid Build Coastguard Worker };
746*61046927SAndroid Build Coastguard Worker
747*61046927SAndroid Build Coastguard Worker /* Lower the format swizzle and (for 32-bit returns)
748*61046927SAndroid Build Coastguard Worker * ARB_texture_swizzle-style swizzle.
749*61046927SAndroid Build Coastguard Worker */
750*61046927SAndroid Build Coastguard Worker assert(c->key->num_tex_used <= ARRAY_SIZE(c->key->tex));
751*61046927SAndroid Build Coastguard Worker for (int i = 0; i < c->key->num_tex_used; i++) {
752*61046927SAndroid Build Coastguard Worker for (int j = 0; j < 4; j++)
753*61046927SAndroid Build Coastguard Worker tex_options.swizzles[i][j] = c->key->tex[i].swizzle[j];
754*61046927SAndroid Build Coastguard Worker }
755*61046927SAndroid Build Coastguard Worker
756*61046927SAndroid Build Coastguard Worker tex_options.lower_tex_packing_cb = lower_tex_packing_cb;
757*61046927SAndroid Build Coastguard Worker tex_options.lower_tex_packing_data = c;
758*61046927SAndroid Build Coastguard Worker
759*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_tex, &tex_options);
760*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_system_values);
761*61046927SAndroid Build Coastguard Worker
762*61046927SAndroid Build Coastguard Worker if (c->s->info.zero_initialize_shared_memory &&
763*61046927SAndroid Build Coastguard Worker c->s->info.shared_size > 0) {
764*61046927SAndroid Build Coastguard Worker /* All our BOs allocate full pages, so the underlying allocation
765*61046927SAndroid Build Coastguard Worker * for shared memory will always be a multiple of 4KB. This
766*61046927SAndroid Build Coastguard Worker * ensures that we can do an exact number of full chunk_size
767*61046927SAndroid Build Coastguard Worker * writes to initialize the memory independently of the actual
768*61046927SAndroid Build Coastguard Worker * shared_size used by the shader, which is a requirement of
769*61046927SAndroid Build Coastguard Worker * the initialization pass.
770*61046927SAndroid Build Coastguard Worker */
771*61046927SAndroid Build Coastguard Worker const unsigned chunk_size = 16; /* max single store size */
772*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_zero_initialize_shared_memory,
773*61046927SAndroid Build Coastguard Worker align(c->s->info.shared_size, chunk_size), chunk_size);
774*61046927SAndroid Build Coastguard Worker }
775*61046927SAndroid Build Coastguard Worker
776*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_compute_system_values, NULL);
777*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_is_helper_invocation);
778*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, v3d_nir_lower_null_pointers);
779*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_bit_size, lower_bit_size_cb, NULL);
780*61046927SAndroid Build Coastguard Worker }
781*61046927SAndroid Build Coastguard Worker
782*61046927SAndroid Build Coastguard Worker static void
v3d_set_prog_data_uniforms(struct v3d_compile * c,struct v3d_prog_data * prog_data)783*61046927SAndroid Build Coastguard Worker v3d_set_prog_data_uniforms(struct v3d_compile *c,
784*61046927SAndroid Build Coastguard Worker struct v3d_prog_data *prog_data)
785*61046927SAndroid Build Coastguard Worker {
786*61046927SAndroid Build Coastguard Worker int count = c->num_uniforms;
787*61046927SAndroid Build Coastguard Worker struct v3d_uniform_list *ulist = &prog_data->uniforms;
788*61046927SAndroid Build Coastguard Worker
789*61046927SAndroid Build Coastguard Worker ulist->count = count;
790*61046927SAndroid Build Coastguard Worker ulist->data = ralloc_array(prog_data, uint32_t, count);
791*61046927SAndroid Build Coastguard Worker memcpy(ulist->data, c->uniform_data,
792*61046927SAndroid Build Coastguard Worker count * sizeof(*ulist->data));
793*61046927SAndroid Build Coastguard Worker ulist->contents = ralloc_array(prog_data, enum quniform_contents, count);
794*61046927SAndroid Build Coastguard Worker memcpy(ulist->contents, c->uniform_contents,
795*61046927SAndroid Build Coastguard Worker count * sizeof(*ulist->contents));
796*61046927SAndroid Build Coastguard Worker }
797*61046927SAndroid Build Coastguard Worker
798*61046927SAndroid Build Coastguard Worker static void
v3d_vs_set_prog_data(struct v3d_compile * c,struct v3d_vs_prog_data * prog_data)799*61046927SAndroid Build Coastguard Worker v3d_vs_set_prog_data(struct v3d_compile *c,
800*61046927SAndroid Build Coastguard Worker struct v3d_vs_prog_data *prog_data)
801*61046927SAndroid Build Coastguard Worker {
802*61046927SAndroid Build Coastguard Worker /* The vertex data gets format converted by the VPM so that
803*61046927SAndroid Build Coastguard Worker * each attribute channel takes up a VPM column. Precompute
804*61046927SAndroid Build Coastguard Worker * the sizes for the shader record.
805*61046927SAndroid Build Coastguard Worker */
806*61046927SAndroid Build Coastguard Worker for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) {
807*61046927SAndroid Build Coastguard Worker prog_data->vattr_sizes[i] = c->vattr_sizes[i];
808*61046927SAndroid Build Coastguard Worker prog_data->vpm_input_size += c->vattr_sizes[i];
809*61046927SAndroid Build Coastguard Worker }
810*61046927SAndroid Build Coastguard Worker
811*61046927SAndroid Build Coastguard Worker memset(prog_data->driver_location_map, -1,
812*61046927SAndroid Build Coastguard Worker sizeof(prog_data->driver_location_map));
813*61046927SAndroid Build Coastguard Worker
814*61046927SAndroid Build Coastguard Worker nir_foreach_shader_in_variable(var, c->s) {
815*61046927SAndroid Build Coastguard Worker prog_data->driver_location_map[var->data.location] =
816*61046927SAndroid Build Coastguard Worker var->data.driver_location;
817*61046927SAndroid Build Coastguard Worker }
818*61046927SAndroid Build Coastguard Worker
819*61046927SAndroid Build Coastguard Worker prog_data->uses_vid = BITSET_TEST(c->s->info.system_values_read,
820*61046927SAndroid Build Coastguard Worker SYSTEM_VALUE_VERTEX_ID) ||
821*61046927SAndroid Build Coastguard Worker BITSET_TEST(c->s->info.system_values_read,
822*61046927SAndroid Build Coastguard Worker SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
823*61046927SAndroid Build Coastguard Worker
824*61046927SAndroid Build Coastguard Worker prog_data->uses_biid = BITSET_TEST(c->s->info.system_values_read,
825*61046927SAndroid Build Coastguard Worker SYSTEM_VALUE_BASE_INSTANCE);
826*61046927SAndroid Build Coastguard Worker
827*61046927SAndroid Build Coastguard Worker prog_data->uses_iid = BITSET_TEST(c->s->info.system_values_read,
828*61046927SAndroid Build Coastguard Worker SYSTEM_VALUE_INSTANCE_ID) ||
829*61046927SAndroid Build Coastguard Worker BITSET_TEST(c->s->info.system_values_read,
830*61046927SAndroid Build Coastguard Worker SYSTEM_VALUE_INSTANCE_INDEX);
831*61046927SAndroid Build Coastguard Worker
832*61046927SAndroid Build Coastguard Worker if (prog_data->uses_vid)
833*61046927SAndroid Build Coastguard Worker prog_data->vpm_input_size++;
834*61046927SAndroid Build Coastguard Worker if (prog_data->uses_biid)
835*61046927SAndroid Build Coastguard Worker prog_data->vpm_input_size++;
836*61046927SAndroid Build Coastguard Worker if (prog_data->uses_iid)
837*61046927SAndroid Build Coastguard Worker prog_data->vpm_input_size++;
838*61046927SAndroid Build Coastguard Worker
839*61046927SAndroid Build Coastguard Worker prog_data->writes_psiz =
840*61046927SAndroid Build Coastguard Worker c->s->info.outputs_written & (1 << VARYING_SLOT_PSIZ);
841*61046927SAndroid Build Coastguard Worker
842*61046927SAndroid Build Coastguard Worker /* Input/output segment size are in sectors (8 rows of 32 bits per
843*61046927SAndroid Build Coastguard Worker * channel).
844*61046927SAndroid Build Coastguard Worker */
845*61046927SAndroid Build Coastguard Worker prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
846*61046927SAndroid Build Coastguard Worker prog_data->vpm_output_size = align(c->vpm_output_size, 8) / 8;
847*61046927SAndroid Build Coastguard Worker
848*61046927SAndroid Build Coastguard Worker /* Set us up for shared input/output segments. This is apparently
849*61046927SAndroid Build Coastguard Worker * necessary for our VCM setup to avoid varying corruption.
850*61046927SAndroid Build Coastguard Worker *
851*61046927SAndroid Build Coastguard Worker * FIXME: initial testing on V3D 7.1 seems to work fine when using
852*61046927SAndroid Build Coastguard Worker * separate segments. So we could try to reevaluate in the future, if
853*61046927SAndroid Build Coastguard Worker * there is any advantage of using separate segments.
854*61046927SAndroid Build Coastguard Worker */
855*61046927SAndroid Build Coastguard Worker prog_data->separate_segments = false;
856*61046927SAndroid Build Coastguard Worker prog_data->vpm_output_size = MAX2(prog_data->vpm_output_size,
857*61046927SAndroid Build Coastguard Worker prog_data->vpm_input_size);
858*61046927SAndroid Build Coastguard Worker prog_data->vpm_input_size = 0;
859*61046927SAndroid Build Coastguard Worker
860*61046927SAndroid Build Coastguard Worker /* Compute VCM cache size. We set up our program to take up less than
861*61046927SAndroid Build Coastguard Worker * half of the VPM, so that any set of bin and render programs won't
862*61046927SAndroid Build Coastguard Worker * run out of space. We need space for at least one input segment,
863*61046927SAndroid Build Coastguard Worker * and then allocate the rest to output segments (one for the current
864*61046927SAndroid Build Coastguard Worker * program, the rest to VCM). The valid range of the VCM cache size
865*61046927SAndroid Build Coastguard Worker * field is 1-4 16-vertex batches, but GFXH-1744 limits us to 2-4
866*61046927SAndroid Build Coastguard Worker * batches.
867*61046927SAndroid Build Coastguard Worker */
868*61046927SAndroid Build Coastguard Worker assert(c->devinfo->vpm_size);
869*61046927SAndroid Build Coastguard Worker int sector_size = V3D_CHANNELS * sizeof(uint32_t) * 8;
870*61046927SAndroid Build Coastguard Worker int vpm_size_in_sectors = c->devinfo->vpm_size / sector_size;
871*61046927SAndroid Build Coastguard Worker int half_vpm = vpm_size_in_sectors / 2;
872*61046927SAndroid Build Coastguard Worker int vpm_output_sectors = half_vpm - prog_data->vpm_input_size;
873*61046927SAndroid Build Coastguard Worker int vpm_output_batches = vpm_output_sectors / prog_data->vpm_output_size;
874*61046927SAndroid Build Coastguard Worker assert(vpm_output_batches >= 2);
875*61046927SAndroid Build Coastguard Worker prog_data->vcm_cache_size = CLAMP(vpm_output_batches - 1, 2, 4);
876*61046927SAndroid Build Coastguard Worker }
877*61046927SAndroid Build Coastguard Worker
878*61046927SAndroid Build Coastguard Worker static void
v3d_gs_set_prog_data(struct v3d_compile * c,struct v3d_gs_prog_data * prog_data)879*61046927SAndroid Build Coastguard Worker v3d_gs_set_prog_data(struct v3d_compile *c,
880*61046927SAndroid Build Coastguard Worker struct v3d_gs_prog_data *prog_data)
881*61046927SAndroid Build Coastguard Worker {
882*61046927SAndroid Build Coastguard Worker prog_data->num_inputs = c->num_inputs;
883*61046927SAndroid Build Coastguard Worker memcpy(prog_data->input_slots, c->input_slots,
884*61046927SAndroid Build Coastguard Worker c->num_inputs * sizeof(*c->input_slots));
885*61046927SAndroid Build Coastguard Worker
886*61046927SAndroid Build Coastguard Worker /* gl_PrimitiveIdIn is written by the GBG into the first word of the
887*61046927SAndroid Build Coastguard Worker * VPM output header automatically and the shader will overwrite
888*61046927SAndroid Build Coastguard Worker * it after reading it if necessary, so it doesn't add to the VPM
889*61046927SAndroid Build Coastguard Worker * size requirements.
890*61046927SAndroid Build Coastguard Worker */
891*61046927SAndroid Build Coastguard Worker prog_data->uses_pid = BITSET_TEST(c->s->info.system_values_read,
892*61046927SAndroid Build Coastguard Worker SYSTEM_VALUE_PRIMITIVE_ID);
893*61046927SAndroid Build Coastguard Worker
894*61046927SAndroid Build Coastguard Worker /* Output segment size is in sectors (8 rows of 32 bits per channel) */
895*61046927SAndroid Build Coastguard Worker prog_data->vpm_output_size = align(c->vpm_output_size, 8) / 8;
896*61046927SAndroid Build Coastguard Worker
897*61046927SAndroid Build Coastguard Worker /* Compute SIMD dispatch width and update VPM output size accordingly
898*61046927SAndroid Build Coastguard Worker * to ensure we can fit our program in memory. Available widths are
899*61046927SAndroid Build Coastguard Worker * 16, 8, 4, 1.
900*61046927SAndroid Build Coastguard Worker *
901*61046927SAndroid Build Coastguard Worker * Notice that at draw time we will have to consider VPM memory
902*61046927SAndroid Build Coastguard Worker * requirements from other stages and choose a smaller dispatch
903*61046927SAndroid Build Coastguard Worker * width if needed to fit the program in VPM memory.
904*61046927SAndroid Build Coastguard Worker */
905*61046927SAndroid Build Coastguard Worker prog_data->simd_width = 16;
906*61046927SAndroid Build Coastguard Worker while ((prog_data->simd_width > 1 && prog_data->vpm_output_size > 16) ||
907*61046927SAndroid Build Coastguard Worker prog_data->simd_width == 2) {
908*61046927SAndroid Build Coastguard Worker prog_data->simd_width >>= 1;
909*61046927SAndroid Build Coastguard Worker prog_data->vpm_output_size =
910*61046927SAndroid Build Coastguard Worker align(prog_data->vpm_output_size, 2) / 2;
911*61046927SAndroid Build Coastguard Worker }
912*61046927SAndroid Build Coastguard Worker assert(prog_data->vpm_output_size <= 16);
913*61046927SAndroid Build Coastguard Worker assert(prog_data->simd_width != 2);
914*61046927SAndroid Build Coastguard Worker
915*61046927SAndroid Build Coastguard Worker prog_data->out_prim_type = c->s->info.gs.output_primitive;
916*61046927SAndroid Build Coastguard Worker prog_data->num_invocations = c->s->info.gs.invocations;
917*61046927SAndroid Build Coastguard Worker
918*61046927SAndroid Build Coastguard Worker prog_data->writes_psiz =
919*61046927SAndroid Build Coastguard Worker c->s->info.outputs_written & (1 << VARYING_SLOT_PSIZ);
920*61046927SAndroid Build Coastguard Worker }
921*61046927SAndroid Build Coastguard Worker
922*61046927SAndroid Build Coastguard Worker static void
v3d_set_fs_prog_data_inputs(struct v3d_compile * c,struct v3d_fs_prog_data * prog_data)923*61046927SAndroid Build Coastguard Worker v3d_set_fs_prog_data_inputs(struct v3d_compile *c,
924*61046927SAndroid Build Coastguard Worker struct v3d_fs_prog_data *prog_data)
925*61046927SAndroid Build Coastguard Worker {
926*61046927SAndroid Build Coastguard Worker prog_data->num_inputs = c->num_inputs;
927*61046927SAndroid Build Coastguard Worker memcpy(prog_data->input_slots, c->input_slots,
928*61046927SAndroid Build Coastguard Worker c->num_inputs * sizeof(*c->input_slots));
929*61046927SAndroid Build Coastguard Worker
930*61046927SAndroid Build Coastguard Worker STATIC_ASSERT(ARRAY_SIZE(prog_data->flat_shade_flags) >
931*61046927SAndroid Build Coastguard Worker (V3D_MAX_FS_INPUTS - 1) / 24);
932*61046927SAndroid Build Coastguard Worker for (int i = 0; i < V3D_MAX_FS_INPUTS; i++) {
933*61046927SAndroid Build Coastguard Worker if (BITSET_TEST(c->flat_shade_flags, i))
934*61046927SAndroid Build Coastguard Worker prog_data->flat_shade_flags[i / 24] |= 1 << (i % 24);
935*61046927SAndroid Build Coastguard Worker
936*61046927SAndroid Build Coastguard Worker if (BITSET_TEST(c->noperspective_flags, i))
937*61046927SAndroid Build Coastguard Worker prog_data->noperspective_flags[i / 24] |= 1 << (i % 24);
938*61046927SAndroid Build Coastguard Worker
939*61046927SAndroid Build Coastguard Worker if (BITSET_TEST(c->centroid_flags, i))
940*61046927SAndroid Build Coastguard Worker prog_data->centroid_flags[i / 24] |= 1 << (i % 24);
941*61046927SAndroid Build Coastguard Worker }
942*61046927SAndroid Build Coastguard Worker }
943*61046927SAndroid Build Coastguard Worker
944*61046927SAndroid Build Coastguard Worker static void
v3d_fs_set_prog_data(struct v3d_compile * c,struct v3d_fs_prog_data * prog_data)945*61046927SAndroid Build Coastguard Worker v3d_fs_set_prog_data(struct v3d_compile *c,
946*61046927SAndroid Build Coastguard Worker struct v3d_fs_prog_data *prog_data)
947*61046927SAndroid Build Coastguard Worker {
948*61046927SAndroid Build Coastguard Worker v3d_set_fs_prog_data_inputs(c, prog_data);
949*61046927SAndroid Build Coastguard Worker prog_data->writes_z = c->writes_z;
950*61046927SAndroid Build Coastguard Worker prog_data->writes_z_from_fep = c->writes_z_from_fep;
951*61046927SAndroid Build Coastguard Worker prog_data->disable_ez = !c->s->info.fs.early_fragment_tests;
952*61046927SAndroid Build Coastguard Worker prog_data->uses_center_w = c->uses_center_w;
953*61046927SAndroid Build Coastguard Worker prog_data->uses_implicit_point_line_varyings =
954*61046927SAndroid Build Coastguard Worker c->uses_implicit_point_line_varyings;
955*61046927SAndroid Build Coastguard Worker prog_data->lock_scoreboard_on_first_thrsw =
956*61046927SAndroid Build Coastguard Worker c->lock_scoreboard_on_first_thrsw;
957*61046927SAndroid Build Coastguard Worker prog_data->force_per_sample_msaa = c->s->info.fs.uses_sample_shading;
958*61046927SAndroid Build Coastguard Worker prog_data->uses_pid = c->fs_uses_primitive_id;
959*61046927SAndroid Build Coastguard Worker }
960*61046927SAndroid Build Coastguard Worker
961*61046927SAndroid Build Coastguard Worker static void
v3d_cs_set_prog_data(struct v3d_compile * c,struct v3d_compute_prog_data * prog_data)962*61046927SAndroid Build Coastguard Worker v3d_cs_set_prog_data(struct v3d_compile *c,
963*61046927SAndroid Build Coastguard Worker struct v3d_compute_prog_data *prog_data)
964*61046927SAndroid Build Coastguard Worker {
965*61046927SAndroid Build Coastguard Worker prog_data->shared_size = c->s->info.shared_size;
966*61046927SAndroid Build Coastguard Worker
967*61046927SAndroid Build Coastguard Worker prog_data->local_size[0] = c->s->info.workgroup_size[0];
968*61046927SAndroid Build Coastguard Worker prog_data->local_size[1] = c->s->info.workgroup_size[1];
969*61046927SAndroid Build Coastguard Worker prog_data->local_size[2] = c->s->info.workgroup_size[2];
970*61046927SAndroid Build Coastguard Worker
971*61046927SAndroid Build Coastguard Worker prog_data->has_subgroups = c->has_subgroups;
972*61046927SAndroid Build Coastguard Worker }
973*61046927SAndroid Build Coastguard Worker
974*61046927SAndroid Build Coastguard Worker static void
v3d_set_prog_data(struct v3d_compile * c,struct v3d_prog_data * prog_data)975*61046927SAndroid Build Coastguard Worker v3d_set_prog_data(struct v3d_compile *c,
976*61046927SAndroid Build Coastguard Worker struct v3d_prog_data *prog_data)
977*61046927SAndroid Build Coastguard Worker {
978*61046927SAndroid Build Coastguard Worker prog_data->threads = c->threads;
979*61046927SAndroid Build Coastguard Worker prog_data->single_seg = !c->last_thrsw;
980*61046927SAndroid Build Coastguard Worker prog_data->spill_size = c->spill_size;
981*61046927SAndroid Build Coastguard Worker prog_data->tmu_spills = c->spills;
982*61046927SAndroid Build Coastguard Worker prog_data->tmu_fills = c->fills;
983*61046927SAndroid Build Coastguard Worker prog_data->tmu_count = c->tmu.total_count;
984*61046927SAndroid Build Coastguard Worker prog_data->qpu_read_stalls = c->qpu_inst_stalled_count;
985*61046927SAndroid Build Coastguard Worker prog_data->compile_strategy_idx = c->compile_strategy_idx;
986*61046927SAndroid Build Coastguard Worker prog_data->tmu_dirty_rcl = c->tmu_dirty_rcl;
987*61046927SAndroid Build Coastguard Worker prog_data->has_control_barrier = c->s->info.uses_control_barrier;
988*61046927SAndroid Build Coastguard Worker prog_data->has_global_address = c->has_global_address;
989*61046927SAndroid Build Coastguard Worker
990*61046927SAndroid Build Coastguard Worker v3d_set_prog_data_uniforms(c, prog_data);
991*61046927SAndroid Build Coastguard Worker
992*61046927SAndroid Build Coastguard Worker switch (c->s->info.stage) {
993*61046927SAndroid Build Coastguard Worker case MESA_SHADER_VERTEX:
994*61046927SAndroid Build Coastguard Worker v3d_vs_set_prog_data(c, (struct v3d_vs_prog_data *)prog_data);
995*61046927SAndroid Build Coastguard Worker break;
996*61046927SAndroid Build Coastguard Worker case MESA_SHADER_GEOMETRY:
997*61046927SAndroid Build Coastguard Worker v3d_gs_set_prog_data(c, (struct v3d_gs_prog_data *)prog_data);
998*61046927SAndroid Build Coastguard Worker break;
999*61046927SAndroid Build Coastguard Worker case MESA_SHADER_FRAGMENT:
1000*61046927SAndroid Build Coastguard Worker v3d_fs_set_prog_data(c, (struct v3d_fs_prog_data *)prog_data);
1001*61046927SAndroid Build Coastguard Worker break;
1002*61046927SAndroid Build Coastguard Worker case MESA_SHADER_COMPUTE:
1003*61046927SAndroid Build Coastguard Worker v3d_cs_set_prog_data(c, (struct v3d_compute_prog_data *)prog_data);
1004*61046927SAndroid Build Coastguard Worker break;
1005*61046927SAndroid Build Coastguard Worker default:
1006*61046927SAndroid Build Coastguard Worker unreachable("unsupported shader stage");
1007*61046927SAndroid Build Coastguard Worker }
1008*61046927SAndroid Build Coastguard Worker }
1009*61046927SAndroid Build Coastguard Worker
1010*61046927SAndroid Build Coastguard Worker static uint64_t *
v3d_return_qpu_insts(struct v3d_compile * c,uint32_t * final_assembly_size)1011*61046927SAndroid Build Coastguard Worker v3d_return_qpu_insts(struct v3d_compile *c, uint32_t *final_assembly_size)
1012*61046927SAndroid Build Coastguard Worker {
1013*61046927SAndroid Build Coastguard Worker *final_assembly_size = c->qpu_inst_count * sizeof(uint64_t);
1014*61046927SAndroid Build Coastguard Worker
1015*61046927SAndroid Build Coastguard Worker uint64_t *qpu_insts = malloc(*final_assembly_size);
1016*61046927SAndroid Build Coastguard Worker if (!qpu_insts)
1017*61046927SAndroid Build Coastguard Worker return NULL;
1018*61046927SAndroid Build Coastguard Worker
1019*61046927SAndroid Build Coastguard Worker memcpy(qpu_insts, c->qpu_insts, *final_assembly_size);
1020*61046927SAndroid Build Coastguard Worker
1021*61046927SAndroid Build Coastguard Worker vir_compile_destroy(c);
1022*61046927SAndroid Build Coastguard Worker
1023*61046927SAndroid Build Coastguard Worker return qpu_insts;
1024*61046927SAndroid Build Coastguard Worker }
1025*61046927SAndroid Build Coastguard Worker
1026*61046927SAndroid Build Coastguard Worker static void
v3d_nir_lower_vs_early(struct v3d_compile * c)1027*61046927SAndroid Build Coastguard Worker v3d_nir_lower_vs_early(struct v3d_compile *c)
1028*61046927SAndroid Build Coastguard Worker {
1029*61046927SAndroid Build Coastguard Worker /* Split our I/O vars and dead code eliminate the unused
1030*61046927SAndroid Build Coastguard Worker * components.
1031*61046927SAndroid Build Coastguard Worker */
1032*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_io_to_scalar_early,
1033*61046927SAndroid Build Coastguard Worker nir_var_shader_in | nir_var_shader_out);
1034*61046927SAndroid Build Coastguard Worker uint64_t used_outputs[4] = {0};
1035*61046927SAndroid Build Coastguard Worker for (int i = 0; i < c->vs_key->num_used_outputs; i++) {
1036*61046927SAndroid Build Coastguard Worker int slot = v3d_slot_get_slot(c->vs_key->used_outputs[i]);
1037*61046927SAndroid Build Coastguard Worker int comp = v3d_slot_get_component(c->vs_key->used_outputs[i]);
1038*61046927SAndroid Build Coastguard Worker used_outputs[comp] |= 1ull << slot;
1039*61046927SAndroid Build Coastguard Worker }
1040*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_remove_unused_io_vars,
1041*61046927SAndroid Build Coastguard Worker nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
1042*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_global_vars_to_local);
1043*61046927SAndroid Build Coastguard Worker v3d_optimize_nir(c, c->s);
1044*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
1045*61046927SAndroid Build Coastguard Worker
1046*61046927SAndroid Build Coastguard Worker /* This must go before nir_lower_io */
1047*61046927SAndroid Build Coastguard Worker if (c->vs_key->per_vertex_point_size)
1048*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_point_size, 1.0f, 0.0f);
1049*61046927SAndroid Build Coastguard Worker
1050*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
1051*61046927SAndroid Build Coastguard Worker type_size_vec4,
1052*61046927SAndroid Build Coastguard Worker (nir_lower_io_options)0);
1053*61046927SAndroid Build Coastguard Worker /* clean up nir_lower_io's deref_var remains and do a constant folding pass
1054*61046927SAndroid Build Coastguard Worker * on the code it generated.
1055*61046927SAndroid Build Coastguard Worker */
1056*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_opt_dce);
1057*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_opt_constant_folding);
1058*61046927SAndroid Build Coastguard Worker }
1059*61046927SAndroid Build Coastguard Worker
1060*61046927SAndroid Build Coastguard Worker static void
v3d_nir_lower_gs_early(struct v3d_compile * c)1061*61046927SAndroid Build Coastguard Worker v3d_nir_lower_gs_early(struct v3d_compile *c)
1062*61046927SAndroid Build Coastguard Worker {
1063*61046927SAndroid Build Coastguard Worker /* Split our I/O vars and dead code eliminate the unused
1064*61046927SAndroid Build Coastguard Worker * components.
1065*61046927SAndroid Build Coastguard Worker */
1066*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_io_to_scalar_early,
1067*61046927SAndroid Build Coastguard Worker nir_var_shader_in | nir_var_shader_out);
1068*61046927SAndroid Build Coastguard Worker uint64_t used_outputs[4] = {0};
1069*61046927SAndroid Build Coastguard Worker for (int i = 0; i < c->gs_key->num_used_outputs; i++) {
1070*61046927SAndroid Build Coastguard Worker int slot = v3d_slot_get_slot(c->gs_key->used_outputs[i]);
1071*61046927SAndroid Build Coastguard Worker int comp = v3d_slot_get_component(c->gs_key->used_outputs[i]);
1072*61046927SAndroid Build Coastguard Worker used_outputs[comp] |= 1ull << slot;
1073*61046927SAndroid Build Coastguard Worker }
1074*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_remove_unused_io_vars,
1075*61046927SAndroid Build Coastguard Worker nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
1076*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_global_vars_to_local);
1077*61046927SAndroid Build Coastguard Worker v3d_optimize_nir(c, c->s);
1078*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
1079*61046927SAndroid Build Coastguard Worker
1080*61046927SAndroid Build Coastguard Worker /* This must go before nir_lower_io */
1081*61046927SAndroid Build Coastguard Worker if (c->gs_key->per_vertex_point_size)
1082*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_point_size, 1.0f, 0.0f);
1083*61046927SAndroid Build Coastguard Worker
1084*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
1085*61046927SAndroid Build Coastguard Worker type_size_vec4,
1086*61046927SAndroid Build Coastguard Worker (nir_lower_io_options)0);
1087*61046927SAndroid Build Coastguard Worker /* clean up nir_lower_io's deref_var remains and do a constant folding pass
1088*61046927SAndroid Build Coastguard Worker * on the code it generated.
1089*61046927SAndroid Build Coastguard Worker */
1090*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_opt_dce);
1091*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_opt_constant_folding);
1092*61046927SAndroid Build Coastguard Worker }
1093*61046927SAndroid Build Coastguard Worker
1094*61046927SAndroid Build Coastguard Worker static void
v3d_fixup_fs_output_types(struct v3d_compile * c)1095*61046927SAndroid Build Coastguard Worker v3d_fixup_fs_output_types(struct v3d_compile *c)
1096*61046927SAndroid Build Coastguard Worker {
1097*61046927SAndroid Build Coastguard Worker nir_foreach_shader_out_variable(var, c->s) {
1098*61046927SAndroid Build Coastguard Worker uint32_t mask = 0;
1099*61046927SAndroid Build Coastguard Worker
1100*61046927SAndroid Build Coastguard Worker switch (var->data.location) {
1101*61046927SAndroid Build Coastguard Worker case FRAG_RESULT_COLOR:
1102*61046927SAndroid Build Coastguard Worker mask = ~0;
1103*61046927SAndroid Build Coastguard Worker break;
1104*61046927SAndroid Build Coastguard Worker case FRAG_RESULT_DATA0:
1105*61046927SAndroid Build Coastguard Worker case FRAG_RESULT_DATA1:
1106*61046927SAndroid Build Coastguard Worker case FRAG_RESULT_DATA2:
1107*61046927SAndroid Build Coastguard Worker case FRAG_RESULT_DATA3:
1108*61046927SAndroid Build Coastguard Worker mask = 1 << (var->data.location - FRAG_RESULT_DATA0);
1109*61046927SAndroid Build Coastguard Worker break;
1110*61046927SAndroid Build Coastguard Worker }
1111*61046927SAndroid Build Coastguard Worker
1112*61046927SAndroid Build Coastguard Worker if (c->fs_key->int_color_rb & mask) {
1113*61046927SAndroid Build Coastguard Worker var->type =
1114*61046927SAndroid Build Coastguard Worker glsl_vector_type(GLSL_TYPE_INT,
1115*61046927SAndroid Build Coastguard Worker glsl_get_components(var->type));
1116*61046927SAndroid Build Coastguard Worker } else if (c->fs_key->uint_color_rb & mask) {
1117*61046927SAndroid Build Coastguard Worker var->type =
1118*61046927SAndroid Build Coastguard Worker glsl_vector_type(GLSL_TYPE_UINT,
1119*61046927SAndroid Build Coastguard Worker glsl_get_components(var->type));
1120*61046927SAndroid Build Coastguard Worker }
1121*61046927SAndroid Build Coastguard Worker }
1122*61046927SAndroid Build Coastguard Worker }
1123*61046927SAndroid Build Coastguard Worker
1124*61046927SAndroid Build Coastguard Worker static void
v3d_nir_lower_fs_early(struct v3d_compile * c)1125*61046927SAndroid Build Coastguard Worker v3d_nir_lower_fs_early(struct v3d_compile *c)
1126*61046927SAndroid Build Coastguard Worker {
1127*61046927SAndroid Build Coastguard Worker if (c->fs_key->int_color_rb || c->fs_key->uint_color_rb)
1128*61046927SAndroid Build Coastguard Worker v3d_fixup_fs_output_types(c);
1129*61046927SAndroid Build Coastguard Worker
1130*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, v3d_nir_lower_logic_ops, c);
1131*61046927SAndroid Build Coastguard Worker
1132*61046927SAndroid Build Coastguard Worker if (c->fs_key->line_smoothing) {
1133*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, v3d_nir_lower_line_smooth);
1134*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_global_vars_to_local);
1135*61046927SAndroid Build Coastguard Worker /* The lowering pass can introduce new sysval reads */
1136*61046927SAndroid Build Coastguard Worker nir_shader_gather_info(c->s, nir_shader_get_entrypoint(c->s));
1137*61046927SAndroid Build Coastguard Worker }
1138*61046927SAndroid Build Coastguard Worker }
1139*61046927SAndroid Build Coastguard Worker
1140*61046927SAndroid Build Coastguard Worker static void
v3d_nir_lower_gs_late(struct v3d_compile * c)1141*61046927SAndroid Build Coastguard Worker v3d_nir_lower_gs_late(struct v3d_compile *c)
1142*61046927SAndroid Build Coastguard Worker {
1143*61046927SAndroid Build Coastguard Worker if (c->key->ucp_enables) {
1144*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_clip_gs, c->key->ucp_enables,
1145*61046927SAndroid Build Coastguard Worker true, NULL);
1146*61046927SAndroid Build Coastguard Worker }
1147*61046927SAndroid Build Coastguard Worker
1148*61046927SAndroid Build Coastguard Worker /* Note: GS output scalarizing must happen after nir_lower_clip_gs. */
1149*61046927SAndroid Build Coastguard Worker NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
1150*61046927SAndroid Build Coastguard Worker }
1151*61046927SAndroid Build Coastguard Worker
1152*61046927SAndroid Build Coastguard Worker static void
v3d_nir_lower_vs_late(struct v3d_compile * c)1153*61046927SAndroid Build Coastguard Worker v3d_nir_lower_vs_late(struct v3d_compile *c)
1154*61046927SAndroid Build Coastguard Worker {
1155*61046927SAndroid Build Coastguard Worker if (c->key->ucp_enables) {
1156*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_clip_vs, c->key->ucp_enables,
1157*61046927SAndroid Build Coastguard Worker false, true, NULL);
1158*61046927SAndroid Build Coastguard Worker NIR_PASS_V(c->s, nir_lower_io_to_scalar,
1159*61046927SAndroid Build Coastguard Worker nir_var_shader_out, NULL, NULL);
1160*61046927SAndroid Build Coastguard Worker }
1161*61046927SAndroid Build Coastguard Worker
1162*61046927SAndroid Build Coastguard Worker /* Note: VS output scalarizing must happen after nir_lower_clip_vs. */
1163*61046927SAndroid Build Coastguard Worker NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
1164*61046927SAndroid Build Coastguard Worker }
1165*61046927SAndroid Build Coastguard Worker
1166*61046927SAndroid Build Coastguard Worker static void
v3d_nir_lower_fs_late(struct v3d_compile * c)1167*61046927SAndroid Build Coastguard Worker v3d_nir_lower_fs_late(struct v3d_compile *c)
1168*61046927SAndroid Build Coastguard Worker {
1169*61046927SAndroid Build Coastguard Worker /* In OpenGL the fragment shader can't read gl_ClipDistance[], but
1170*61046927SAndroid Build Coastguard Worker * Vulkan allows it, in which case the SPIR-V compiler will declare
1171*61046927SAndroid Build Coastguard Worker * VARING_SLOT_CLIP_DIST0 as compact array variable. Pass true as
1172*61046927SAndroid Build Coastguard Worker * the last parameter to always operate with a compact array in both
1173*61046927SAndroid Build Coastguard Worker * OpenGL and Vulkan so we do't have to care about the API we
1174*61046927SAndroid Build Coastguard Worker * are using.
1175*61046927SAndroid Build Coastguard Worker */
1176*61046927SAndroid Build Coastguard Worker if (c->key->ucp_enables)
1177*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_clip_fs, c->key->ucp_enables, true);
1178*61046927SAndroid Build Coastguard Worker
1179*61046927SAndroid Build Coastguard Worker NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_in, NULL, NULL);
1180*61046927SAndroid Build Coastguard Worker }
1181*61046927SAndroid Build Coastguard Worker
1182*61046927SAndroid Build Coastguard Worker static uint32_t
vir_get_max_temps(struct v3d_compile * c)1183*61046927SAndroid Build Coastguard Worker vir_get_max_temps(struct v3d_compile *c)
1184*61046927SAndroid Build Coastguard Worker {
1185*61046927SAndroid Build Coastguard Worker int max_ip = 0;
1186*61046927SAndroid Build Coastguard Worker vir_for_each_inst_inorder(inst, c)
1187*61046927SAndroid Build Coastguard Worker max_ip++;
1188*61046927SAndroid Build Coastguard Worker
1189*61046927SAndroid Build Coastguard Worker uint32_t *pressure = rzalloc_array(NULL, uint32_t, max_ip);
1190*61046927SAndroid Build Coastguard Worker
1191*61046927SAndroid Build Coastguard Worker for (int t = 0; t < c->num_temps; t++) {
1192*61046927SAndroid Build Coastguard Worker for (int i = c->temp_start[t]; (i < c->temp_end[t] &&
1193*61046927SAndroid Build Coastguard Worker i < max_ip); i++) {
1194*61046927SAndroid Build Coastguard Worker if (i > max_ip)
1195*61046927SAndroid Build Coastguard Worker break;
1196*61046927SAndroid Build Coastguard Worker pressure[i]++;
1197*61046927SAndroid Build Coastguard Worker }
1198*61046927SAndroid Build Coastguard Worker }
1199*61046927SAndroid Build Coastguard Worker
1200*61046927SAndroid Build Coastguard Worker uint32_t max_temps = 0;
1201*61046927SAndroid Build Coastguard Worker for (int i = 0; i < max_ip; i++)
1202*61046927SAndroid Build Coastguard Worker max_temps = MAX2(max_temps, pressure[i]);
1203*61046927SAndroid Build Coastguard Worker
1204*61046927SAndroid Build Coastguard Worker ralloc_free(pressure);
1205*61046927SAndroid Build Coastguard Worker
1206*61046927SAndroid Build Coastguard Worker return max_temps;
1207*61046927SAndroid Build Coastguard Worker }
1208*61046927SAndroid Build Coastguard Worker
1209*61046927SAndroid Build Coastguard Worker enum v3d_dependency_class {
1210*61046927SAndroid Build Coastguard Worker V3D_DEPENDENCY_CLASS_GS_VPM_OUTPUT_0
1211*61046927SAndroid Build Coastguard Worker };
1212*61046927SAndroid Build Coastguard Worker
1213*61046927SAndroid Build Coastguard Worker static bool
v3d_intrinsic_dependency_cb(nir_intrinsic_instr * intr,nir_schedule_dependency * dep,void * user_data)1214*61046927SAndroid Build Coastguard Worker v3d_intrinsic_dependency_cb(nir_intrinsic_instr *intr,
1215*61046927SAndroid Build Coastguard Worker nir_schedule_dependency *dep,
1216*61046927SAndroid Build Coastguard Worker void *user_data)
1217*61046927SAndroid Build Coastguard Worker {
1218*61046927SAndroid Build Coastguard Worker struct v3d_compile *c = user_data;
1219*61046927SAndroid Build Coastguard Worker
1220*61046927SAndroid Build Coastguard Worker switch (intr->intrinsic) {
1221*61046927SAndroid Build Coastguard Worker case nir_intrinsic_store_output:
1222*61046927SAndroid Build Coastguard Worker /* Writing to location 0 overwrites the value passed in for
1223*61046927SAndroid Build Coastguard Worker * gl_PrimitiveID on geometry shaders
1224*61046927SAndroid Build Coastguard Worker */
1225*61046927SAndroid Build Coastguard Worker if (c->s->info.stage != MESA_SHADER_GEOMETRY ||
1226*61046927SAndroid Build Coastguard Worker nir_intrinsic_base(intr) != 0)
1227*61046927SAndroid Build Coastguard Worker break;
1228*61046927SAndroid Build Coastguard Worker
1229*61046927SAndroid Build Coastguard Worker nir_const_value *const_value =
1230*61046927SAndroid Build Coastguard Worker nir_src_as_const_value(intr->src[1]);
1231*61046927SAndroid Build Coastguard Worker
1232*61046927SAndroid Build Coastguard Worker if (const_value == NULL)
1233*61046927SAndroid Build Coastguard Worker break;
1234*61046927SAndroid Build Coastguard Worker
1235*61046927SAndroid Build Coastguard Worker uint64_t offset =
1236*61046927SAndroid Build Coastguard Worker nir_const_value_as_uint(*const_value,
1237*61046927SAndroid Build Coastguard Worker nir_src_bit_size(intr->src[1]));
1238*61046927SAndroid Build Coastguard Worker if (offset != 0)
1239*61046927SAndroid Build Coastguard Worker break;
1240*61046927SAndroid Build Coastguard Worker
1241*61046927SAndroid Build Coastguard Worker dep->klass = V3D_DEPENDENCY_CLASS_GS_VPM_OUTPUT_0;
1242*61046927SAndroid Build Coastguard Worker dep->type = NIR_SCHEDULE_WRITE_DEPENDENCY;
1243*61046927SAndroid Build Coastguard Worker return true;
1244*61046927SAndroid Build Coastguard Worker
1245*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_primitive_id:
1246*61046927SAndroid Build Coastguard Worker if (c->s->info.stage != MESA_SHADER_GEOMETRY)
1247*61046927SAndroid Build Coastguard Worker break;
1248*61046927SAndroid Build Coastguard Worker
1249*61046927SAndroid Build Coastguard Worker dep->klass = V3D_DEPENDENCY_CLASS_GS_VPM_OUTPUT_0;
1250*61046927SAndroid Build Coastguard Worker dep->type = NIR_SCHEDULE_READ_DEPENDENCY;
1251*61046927SAndroid Build Coastguard Worker return true;
1252*61046927SAndroid Build Coastguard Worker
1253*61046927SAndroid Build Coastguard Worker default:
1254*61046927SAndroid Build Coastguard Worker break;
1255*61046927SAndroid Build Coastguard Worker }
1256*61046927SAndroid Build Coastguard Worker
1257*61046927SAndroid Build Coastguard Worker return false;
1258*61046927SAndroid Build Coastguard Worker }
1259*61046927SAndroid Build Coastguard Worker
1260*61046927SAndroid Build Coastguard Worker static unsigned
v3d_instr_delay_cb(nir_instr * instr,void * data)1261*61046927SAndroid Build Coastguard Worker v3d_instr_delay_cb(nir_instr *instr, void *data)
1262*61046927SAndroid Build Coastguard Worker {
1263*61046927SAndroid Build Coastguard Worker struct v3d_compile *c = (struct v3d_compile *) data;
1264*61046927SAndroid Build Coastguard Worker
1265*61046927SAndroid Build Coastguard Worker switch (instr->type) {
1266*61046927SAndroid Build Coastguard Worker case nir_instr_type_undef:
1267*61046927SAndroid Build Coastguard Worker case nir_instr_type_load_const:
1268*61046927SAndroid Build Coastguard Worker case nir_instr_type_alu:
1269*61046927SAndroid Build Coastguard Worker case nir_instr_type_deref:
1270*61046927SAndroid Build Coastguard Worker case nir_instr_type_jump:
1271*61046927SAndroid Build Coastguard Worker case nir_instr_type_parallel_copy:
1272*61046927SAndroid Build Coastguard Worker case nir_instr_type_call:
1273*61046927SAndroid Build Coastguard Worker case nir_instr_type_phi:
1274*61046927SAndroid Build Coastguard Worker return 1;
1275*61046927SAndroid Build Coastguard Worker
1276*61046927SAndroid Build Coastguard Worker /* We should not use very large delays for TMU instructions. Typically,
1277*61046927SAndroid Build Coastguard Worker * thread switches will be sufficient to hide all or most of the latency,
1278*61046927SAndroid Build Coastguard Worker * so we typically only need a little bit of extra room. If we over-estimate
1279*61046927SAndroid Build Coastguard Worker * the latency here we may end up unnecessarily delaying the critical path in
1280*61046927SAndroid Build Coastguard Worker * the shader, which would have a negative effect in performance, so here
1281*61046927SAndroid Build Coastguard Worker * we are trying to strike a balance based on empirical testing.
1282*61046927SAndroid Build Coastguard Worker */
1283*61046927SAndroid Build Coastguard Worker case nir_instr_type_intrinsic: {
1284*61046927SAndroid Build Coastguard Worker nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1285*61046927SAndroid Build Coastguard Worker if (!c->disable_general_tmu_sched) {
1286*61046927SAndroid Build Coastguard Worker switch (intr->intrinsic) {
1287*61046927SAndroid Build Coastguard Worker case nir_intrinsic_decl_reg:
1288*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_reg:
1289*61046927SAndroid Build Coastguard Worker case nir_intrinsic_store_reg:
1290*61046927SAndroid Build Coastguard Worker return 0;
1291*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_ssbo:
1292*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_scratch:
1293*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_shared:
1294*61046927SAndroid Build Coastguard Worker case nir_intrinsic_image_load:
1295*61046927SAndroid Build Coastguard Worker return 3;
1296*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_ubo:
1297*61046927SAndroid Build Coastguard Worker if (nir_src_is_divergent(intr->src[1]))
1298*61046927SAndroid Build Coastguard Worker return 3;
1299*61046927SAndroid Build Coastguard Worker FALLTHROUGH;
1300*61046927SAndroid Build Coastguard Worker default:
1301*61046927SAndroid Build Coastguard Worker return 1;
1302*61046927SAndroid Build Coastguard Worker }
1303*61046927SAndroid Build Coastguard Worker } else {
1304*61046927SAndroid Build Coastguard Worker switch (intr->intrinsic) {
1305*61046927SAndroid Build Coastguard Worker case nir_intrinsic_decl_reg:
1306*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_reg:
1307*61046927SAndroid Build Coastguard Worker case nir_intrinsic_store_reg:
1308*61046927SAndroid Build Coastguard Worker return 0;
1309*61046927SAndroid Build Coastguard Worker default:
1310*61046927SAndroid Build Coastguard Worker return 1;
1311*61046927SAndroid Build Coastguard Worker }
1312*61046927SAndroid Build Coastguard Worker }
1313*61046927SAndroid Build Coastguard Worker break;
1314*61046927SAndroid Build Coastguard Worker }
1315*61046927SAndroid Build Coastguard Worker
1316*61046927SAndroid Build Coastguard Worker case nir_instr_type_tex:
1317*61046927SAndroid Build Coastguard Worker return 5;
1318*61046927SAndroid Build Coastguard Worker
1319*61046927SAndroid Build Coastguard Worker case nir_instr_type_debug_info:
1320*61046927SAndroid Build Coastguard Worker return 0;
1321*61046927SAndroid Build Coastguard Worker }
1322*61046927SAndroid Build Coastguard Worker
1323*61046927SAndroid Build Coastguard Worker return 0;
1324*61046927SAndroid Build Coastguard Worker }
1325*61046927SAndroid Build Coastguard Worker
1326*61046927SAndroid Build Coastguard Worker static bool
should_split_wrmask(const nir_instr * instr,const void * data)1327*61046927SAndroid Build Coastguard Worker should_split_wrmask(const nir_instr *instr, const void *data)
1328*61046927SAndroid Build Coastguard Worker {
1329*61046927SAndroid Build Coastguard Worker nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1330*61046927SAndroid Build Coastguard Worker switch (intr->intrinsic) {
1331*61046927SAndroid Build Coastguard Worker case nir_intrinsic_store_ssbo:
1332*61046927SAndroid Build Coastguard Worker case nir_intrinsic_store_shared:
1333*61046927SAndroid Build Coastguard Worker case nir_intrinsic_store_global:
1334*61046927SAndroid Build Coastguard Worker case nir_intrinsic_store_scratch:
1335*61046927SAndroid Build Coastguard Worker return true;
1336*61046927SAndroid Build Coastguard Worker default:
1337*61046927SAndroid Build Coastguard Worker return false;
1338*61046927SAndroid Build Coastguard Worker }
1339*61046927SAndroid Build Coastguard Worker }
1340*61046927SAndroid Build Coastguard Worker
1341*61046927SAndroid Build Coastguard Worker static nir_intrinsic_instr *
nir_instr_as_constant_ubo_load(nir_instr * inst)1342*61046927SAndroid Build Coastguard Worker nir_instr_as_constant_ubo_load(nir_instr *inst)
1343*61046927SAndroid Build Coastguard Worker {
1344*61046927SAndroid Build Coastguard Worker if (inst->type != nir_instr_type_intrinsic)
1345*61046927SAndroid Build Coastguard Worker return NULL;
1346*61046927SAndroid Build Coastguard Worker
1347*61046927SAndroid Build Coastguard Worker nir_intrinsic_instr *intr = nir_instr_as_intrinsic(inst);
1348*61046927SAndroid Build Coastguard Worker if (intr->intrinsic != nir_intrinsic_load_ubo)
1349*61046927SAndroid Build Coastguard Worker return NULL;
1350*61046927SAndroid Build Coastguard Worker
1351*61046927SAndroid Build Coastguard Worker assert(nir_src_is_const(intr->src[0]));
1352*61046927SAndroid Build Coastguard Worker if (!nir_src_is_const(intr->src[1]))
1353*61046927SAndroid Build Coastguard Worker return NULL;
1354*61046927SAndroid Build Coastguard Worker
1355*61046927SAndroid Build Coastguard Worker return intr;
1356*61046927SAndroid Build Coastguard Worker }
1357*61046927SAndroid Build Coastguard Worker
1358*61046927SAndroid Build Coastguard Worker static bool
v3d_nir_sort_constant_ubo_load(nir_block * block,nir_intrinsic_instr * ref)1359*61046927SAndroid Build Coastguard Worker v3d_nir_sort_constant_ubo_load(nir_block *block, nir_intrinsic_instr *ref)
1360*61046927SAndroid Build Coastguard Worker {
1361*61046927SAndroid Build Coastguard Worker bool progress = false;
1362*61046927SAndroid Build Coastguard Worker
1363*61046927SAndroid Build Coastguard Worker nir_instr *ref_inst = &ref->instr;
1364*61046927SAndroid Build Coastguard Worker uint32_t ref_offset = nir_src_as_uint(ref->src[1]);
1365*61046927SAndroid Build Coastguard Worker uint32_t ref_index = nir_src_as_uint(ref->src[0]);
1366*61046927SAndroid Build Coastguard Worker
1367*61046927SAndroid Build Coastguard Worker /* Go through all instructions after ref searching for constant UBO
1368*61046927SAndroid Build Coastguard Worker * loads for the same UBO index.
1369*61046927SAndroid Build Coastguard Worker */
1370*61046927SAndroid Build Coastguard Worker bool seq_break = false;
1371*61046927SAndroid Build Coastguard Worker nir_instr *inst = &ref->instr;
1372*61046927SAndroid Build Coastguard Worker nir_instr *next_inst = NULL;
1373*61046927SAndroid Build Coastguard Worker while (true) {
1374*61046927SAndroid Build Coastguard Worker inst = next_inst ? next_inst : nir_instr_next(inst);
1375*61046927SAndroid Build Coastguard Worker if (!inst)
1376*61046927SAndroid Build Coastguard Worker break;
1377*61046927SAndroid Build Coastguard Worker
1378*61046927SAndroid Build Coastguard Worker next_inst = NULL;
1379*61046927SAndroid Build Coastguard Worker
1380*61046927SAndroid Build Coastguard Worker if (inst->type != nir_instr_type_intrinsic)
1381*61046927SAndroid Build Coastguard Worker continue;
1382*61046927SAndroid Build Coastguard Worker
1383*61046927SAndroid Build Coastguard Worker nir_intrinsic_instr *intr = nir_instr_as_intrinsic(inst);
1384*61046927SAndroid Build Coastguard Worker if (intr->intrinsic != nir_intrinsic_load_ubo)
1385*61046927SAndroid Build Coastguard Worker continue;
1386*61046927SAndroid Build Coastguard Worker
1387*61046927SAndroid Build Coastguard Worker /* We only produce unifa sequences for non-divergent loads */
1388*61046927SAndroid Build Coastguard Worker if (nir_src_is_divergent(intr->src[1]))
1389*61046927SAndroid Build Coastguard Worker continue;
1390*61046927SAndroid Build Coastguard Worker
1391*61046927SAndroid Build Coastguard Worker /* If there are any UBO loads that are not constant or that
1392*61046927SAndroid Build Coastguard Worker * use a different UBO index in between the reference load and
1393*61046927SAndroid Build Coastguard Worker * any other constant load for the same index, they would break
1394*61046927SAndroid Build Coastguard Worker * the unifa sequence. We will flag that so we can then move
1395*61046927SAndroid Build Coastguard Worker * all constant UBO loads for the reference index before these
1396*61046927SAndroid Build Coastguard Worker * and not just the ones that are not ordered to avoid breaking
1397*61046927SAndroid Build Coastguard Worker * the sequence and reduce unifa writes.
1398*61046927SAndroid Build Coastguard Worker */
1399*61046927SAndroid Build Coastguard Worker if (!nir_src_is_const(intr->src[1])) {
1400*61046927SAndroid Build Coastguard Worker seq_break = true;
1401*61046927SAndroid Build Coastguard Worker continue;
1402*61046927SAndroid Build Coastguard Worker }
1403*61046927SAndroid Build Coastguard Worker uint32_t offset = nir_src_as_uint(intr->src[1]);
1404*61046927SAndroid Build Coastguard Worker
1405*61046927SAndroid Build Coastguard Worker assert(nir_src_is_const(intr->src[0]));
1406*61046927SAndroid Build Coastguard Worker uint32_t index = nir_src_as_uint(intr->src[0]);
1407*61046927SAndroid Build Coastguard Worker if (index != ref_index) {
1408*61046927SAndroid Build Coastguard Worker seq_break = true;
1409*61046927SAndroid Build Coastguard Worker continue;
1410*61046927SAndroid Build Coastguard Worker }
1411*61046927SAndroid Build Coastguard Worker
1412*61046927SAndroid Build Coastguard Worker /* Only move loads with an offset that is close enough to the
1413*61046927SAndroid Build Coastguard Worker * reference offset, since otherwise we would not be able to
1414*61046927SAndroid Build Coastguard Worker * skip the unifa write for them. See ntq_emit_load_ubo_unifa.
1415*61046927SAndroid Build Coastguard Worker */
1416*61046927SAndroid Build Coastguard Worker if (abs((int)(ref_offset - offset)) > MAX_UNIFA_SKIP_DISTANCE)
1417*61046927SAndroid Build Coastguard Worker continue;
1418*61046927SAndroid Build Coastguard Worker
1419*61046927SAndroid Build Coastguard Worker /* We will move this load if its offset is smaller than ref's
1420*61046927SAndroid Build Coastguard Worker * (in which case we will move it before ref) or if the offset
1421*61046927SAndroid Build Coastguard Worker * is larger than ref's but there are sequence breakers in
1422*61046927SAndroid Build Coastguard Worker * in between (in which case we will move it after ref and
1423*61046927SAndroid Build Coastguard Worker * before the sequence breakers).
1424*61046927SAndroid Build Coastguard Worker */
1425*61046927SAndroid Build Coastguard Worker if (!seq_break && offset >= ref_offset)
1426*61046927SAndroid Build Coastguard Worker continue;
1427*61046927SAndroid Build Coastguard Worker
1428*61046927SAndroid Build Coastguard Worker /* Find where exactly we want to move this load:
1429*61046927SAndroid Build Coastguard Worker *
1430*61046927SAndroid Build Coastguard Worker * If we are moving it before ref, we want to check any other
1431*61046927SAndroid Build Coastguard Worker * UBO loads we placed before ref and make sure we insert this
1432*61046927SAndroid Build Coastguard Worker * one properly ordered with them. Likewise, if we are moving
1433*61046927SAndroid Build Coastguard Worker * it after ref.
1434*61046927SAndroid Build Coastguard Worker */
1435*61046927SAndroid Build Coastguard Worker nir_instr *pos = ref_inst;
1436*61046927SAndroid Build Coastguard Worker nir_instr *tmp = pos;
1437*61046927SAndroid Build Coastguard Worker do {
1438*61046927SAndroid Build Coastguard Worker if (offset < ref_offset)
1439*61046927SAndroid Build Coastguard Worker tmp = nir_instr_prev(tmp);
1440*61046927SAndroid Build Coastguard Worker else
1441*61046927SAndroid Build Coastguard Worker tmp = nir_instr_next(tmp);
1442*61046927SAndroid Build Coastguard Worker
1443*61046927SAndroid Build Coastguard Worker if (!tmp || tmp == inst)
1444*61046927SAndroid Build Coastguard Worker break;
1445*61046927SAndroid Build Coastguard Worker
1446*61046927SAndroid Build Coastguard Worker /* Ignore non-unifa UBO loads */
1447*61046927SAndroid Build Coastguard Worker if (tmp->type != nir_instr_type_intrinsic)
1448*61046927SAndroid Build Coastguard Worker continue;
1449*61046927SAndroid Build Coastguard Worker
1450*61046927SAndroid Build Coastguard Worker nir_intrinsic_instr *tmp_intr =
1451*61046927SAndroid Build Coastguard Worker nir_instr_as_intrinsic(tmp);
1452*61046927SAndroid Build Coastguard Worker if (tmp_intr->intrinsic != nir_intrinsic_load_ubo)
1453*61046927SAndroid Build Coastguard Worker continue;
1454*61046927SAndroid Build Coastguard Worker
1455*61046927SAndroid Build Coastguard Worker if (nir_src_is_divergent(tmp_intr->src[1]))
1456*61046927SAndroid Build Coastguard Worker continue;
1457*61046927SAndroid Build Coastguard Worker
1458*61046927SAndroid Build Coastguard Worker /* Stop if we find a unifa UBO load that breaks the
1459*61046927SAndroid Build Coastguard Worker * sequence.
1460*61046927SAndroid Build Coastguard Worker */
1461*61046927SAndroid Build Coastguard Worker if (!nir_src_is_const(tmp_intr->src[1]))
1462*61046927SAndroid Build Coastguard Worker break;
1463*61046927SAndroid Build Coastguard Worker
1464*61046927SAndroid Build Coastguard Worker if (nir_src_as_uint(tmp_intr->src[0]) != index)
1465*61046927SAndroid Build Coastguard Worker break;
1466*61046927SAndroid Build Coastguard Worker
1467*61046927SAndroid Build Coastguard Worker uint32_t tmp_offset = nir_src_as_uint(tmp_intr->src[1]);
1468*61046927SAndroid Build Coastguard Worker if (offset < ref_offset) {
1469*61046927SAndroid Build Coastguard Worker if (tmp_offset < offset ||
1470*61046927SAndroid Build Coastguard Worker tmp_offset >= ref_offset) {
1471*61046927SAndroid Build Coastguard Worker break;
1472*61046927SAndroid Build Coastguard Worker } else {
1473*61046927SAndroid Build Coastguard Worker pos = tmp;
1474*61046927SAndroid Build Coastguard Worker }
1475*61046927SAndroid Build Coastguard Worker } else {
1476*61046927SAndroid Build Coastguard Worker if (tmp_offset > offset ||
1477*61046927SAndroid Build Coastguard Worker tmp_offset <= ref_offset) {
1478*61046927SAndroid Build Coastguard Worker break;
1479*61046927SAndroid Build Coastguard Worker } else {
1480*61046927SAndroid Build Coastguard Worker pos = tmp;
1481*61046927SAndroid Build Coastguard Worker }
1482*61046927SAndroid Build Coastguard Worker }
1483*61046927SAndroid Build Coastguard Worker } while (true);
1484*61046927SAndroid Build Coastguard Worker
1485*61046927SAndroid Build Coastguard Worker /* We can't move the UBO load before the instruction that
1486*61046927SAndroid Build Coastguard Worker * defines its constant offset. If that instruction is placed
1487*61046927SAndroid Build Coastguard Worker * in between the new location (pos) and the current location
1488*61046927SAndroid Build Coastguard Worker * of this load, we will have to move that instruction too.
1489*61046927SAndroid Build Coastguard Worker *
1490*61046927SAndroid Build Coastguard Worker * We don't care about the UBO index definition because that
1491*61046927SAndroid Build Coastguard Worker * is optimized to be reused by all UBO loads for the same
1492*61046927SAndroid Build Coastguard Worker * index and therefore is certain to be defined before the
1493*61046927SAndroid Build Coastguard Worker * first UBO load that uses it.
1494*61046927SAndroid Build Coastguard Worker */
1495*61046927SAndroid Build Coastguard Worker nir_instr *offset_inst = NULL;
1496*61046927SAndroid Build Coastguard Worker tmp = inst;
1497*61046927SAndroid Build Coastguard Worker while ((tmp = nir_instr_prev(tmp)) != NULL) {
1498*61046927SAndroid Build Coastguard Worker if (pos == tmp) {
1499*61046927SAndroid Build Coastguard Worker /* We reached the target location without
1500*61046927SAndroid Build Coastguard Worker * finding the instruction that defines the
1501*61046927SAndroid Build Coastguard Worker * offset, so that instruction must be before
1502*61046927SAndroid Build Coastguard Worker * the new position and we don't have to fix it.
1503*61046927SAndroid Build Coastguard Worker */
1504*61046927SAndroid Build Coastguard Worker break;
1505*61046927SAndroid Build Coastguard Worker }
1506*61046927SAndroid Build Coastguard Worker if (intr->src[1].ssa->parent_instr == tmp) {
1507*61046927SAndroid Build Coastguard Worker offset_inst = tmp;
1508*61046927SAndroid Build Coastguard Worker break;
1509*61046927SAndroid Build Coastguard Worker }
1510*61046927SAndroid Build Coastguard Worker }
1511*61046927SAndroid Build Coastguard Worker
1512*61046927SAndroid Build Coastguard Worker if (offset_inst) {
1513*61046927SAndroid Build Coastguard Worker exec_node_remove(&offset_inst->node);
1514*61046927SAndroid Build Coastguard Worker exec_node_insert_node_before(&pos->node,
1515*61046927SAndroid Build Coastguard Worker &offset_inst->node);
1516*61046927SAndroid Build Coastguard Worker }
1517*61046927SAndroid Build Coastguard Worker
1518*61046927SAndroid Build Coastguard Worker /* Since we are moving the instruction before its current
1519*61046927SAndroid Build Coastguard Worker * location, grab its successor before the move so that
1520*61046927SAndroid Build Coastguard Worker * we can continue the next iteration of the main loop from
1521*61046927SAndroid Build Coastguard Worker * that instruction.
1522*61046927SAndroid Build Coastguard Worker */
1523*61046927SAndroid Build Coastguard Worker next_inst = nir_instr_next(inst);
1524*61046927SAndroid Build Coastguard Worker
1525*61046927SAndroid Build Coastguard Worker /* Move this load to the selected location */
1526*61046927SAndroid Build Coastguard Worker exec_node_remove(&inst->node);
1527*61046927SAndroid Build Coastguard Worker if (offset < ref_offset)
1528*61046927SAndroid Build Coastguard Worker exec_node_insert_node_before(&pos->node, &inst->node);
1529*61046927SAndroid Build Coastguard Worker else
1530*61046927SAndroid Build Coastguard Worker exec_node_insert_after(&pos->node, &inst->node);
1531*61046927SAndroid Build Coastguard Worker
1532*61046927SAndroid Build Coastguard Worker progress = true;
1533*61046927SAndroid Build Coastguard Worker }
1534*61046927SAndroid Build Coastguard Worker
1535*61046927SAndroid Build Coastguard Worker return progress;
1536*61046927SAndroid Build Coastguard Worker }
1537*61046927SAndroid Build Coastguard Worker
1538*61046927SAndroid Build Coastguard Worker static bool
v3d_nir_sort_constant_ubo_loads_block(struct v3d_compile * c,nir_block * block)1539*61046927SAndroid Build Coastguard Worker v3d_nir_sort_constant_ubo_loads_block(struct v3d_compile *c,
1540*61046927SAndroid Build Coastguard Worker nir_block *block)
1541*61046927SAndroid Build Coastguard Worker {
1542*61046927SAndroid Build Coastguard Worker bool progress = false;
1543*61046927SAndroid Build Coastguard Worker bool local_progress;
1544*61046927SAndroid Build Coastguard Worker do {
1545*61046927SAndroid Build Coastguard Worker local_progress = false;
1546*61046927SAndroid Build Coastguard Worker nir_foreach_instr_safe(inst, block) {
1547*61046927SAndroid Build Coastguard Worker nir_intrinsic_instr *intr =
1548*61046927SAndroid Build Coastguard Worker nir_instr_as_constant_ubo_load(inst);
1549*61046927SAndroid Build Coastguard Worker if (intr) {
1550*61046927SAndroid Build Coastguard Worker local_progress |=
1551*61046927SAndroid Build Coastguard Worker v3d_nir_sort_constant_ubo_load(block, intr);
1552*61046927SAndroid Build Coastguard Worker }
1553*61046927SAndroid Build Coastguard Worker }
1554*61046927SAndroid Build Coastguard Worker progress |= local_progress;
1555*61046927SAndroid Build Coastguard Worker } while (local_progress);
1556*61046927SAndroid Build Coastguard Worker
1557*61046927SAndroid Build Coastguard Worker return progress;
1558*61046927SAndroid Build Coastguard Worker }
1559*61046927SAndroid Build Coastguard Worker
1560*61046927SAndroid Build Coastguard Worker /**
1561*61046927SAndroid Build Coastguard Worker * Sorts constant UBO loads in each block by offset to maximize chances of
1562*61046927SAndroid Build Coastguard Worker * skipping unifa writes when converting to VIR. This can increase register
1563*61046927SAndroid Build Coastguard Worker * pressure.
1564*61046927SAndroid Build Coastguard Worker */
1565*61046927SAndroid Build Coastguard Worker static bool
v3d_nir_sort_constant_ubo_loads(nir_shader * s,struct v3d_compile * c)1566*61046927SAndroid Build Coastguard Worker v3d_nir_sort_constant_ubo_loads(nir_shader *s, struct v3d_compile *c)
1567*61046927SAndroid Build Coastguard Worker {
1568*61046927SAndroid Build Coastguard Worker nir_foreach_function_impl(impl, s) {
1569*61046927SAndroid Build Coastguard Worker nir_foreach_block(block, impl) {
1570*61046927SAndroid Build Coastguard Worker c->sorted_any_ubo_loads |=
1571*61046927SAndroid Build Coastguard Worker v3d_nir_sort_constant_ubo_loads_block(c, block);
1572*61046927SAndroid Build Coastguard Worker }
1573*61046927SAndroid Build Coastguard Worker nir_metadata_preserve(impl,
1574*61046927SAndroid Build Coastguard Worker nir_metadata_control_flow);
1575*61046927SAndroid Build Coastguard Worker }
1576*61046927SAndroid Build Coastguard Worker return c->sorted_any_ubo_loads;
1577*61046927SAndroid Build Coastguard Worker }
1578*61046927SAndroid Build Coastguard Worker
1579*61046927SAndroid Build Coastguard Worker static void
lower_load_num_subgroups(struct v3d_compile * c,nir_builder * b,nir_intrinsic_instr * intr)1580*61046927SAndroid Build Coastguard Worker lower_load_num_subgroups(struct v3d_compile *c,
1581*61046927SAndroid Build Coastguard Worker nir_builder *b,
1582*61046927SAndroid Build Coastguard Worker nir_intrinsic_instr *intr)
1583*61046927SAndroid Build Coastguard Worker {
1584*61046927SAndroid Build Coastguard Worker assert(c->s->info.stage == MESA_SHADER_COMPUTE);
1585*61046927SAndroid Build Coastguard Worker assert(intr->intrinsic == nir_intrinsic_load_num_subgroups);
1586*61046927SAndroid Build Coastguard Worker
1587*61046927SAndroid Build Coastguard Worker b->cursor = nir_after_instr(&intr->instr);
1588*61046927SAndroid Build Coastguard Worker uint32_t num_subgroups =
1589*61046927SAndroid Build Coastguard Worker DIV_ROUND_UP(c->s->info.workgroup_size[0] *
1590*61046927SAndroid Build Coastguard Worker c->s->info.workgroup_size[1] *
1591*61046927SAndroid Build Coastguard Worker c->s->info.workgroup_size[2], V3D_CHANNELS);
1592*61046927SAndroid Build Coastguard Worker nir_def *result = nir_imm_int(b, num_subgroups);
1593*61046927SAndroid Build Coastguard Worker nir_def_replace(&intr->def, result);
1594*61046927SAndroid Build Coastguard Worker }
1595*61046927SAndroid Build Coastguard Worker
1596*61046927SAndroid Build Coastguard Worker static bool
lower_subgroup_intrinsics(struct v3d_compile * c,nir_block * block,nir_builder * b)1597*61046927SAndroid Build Coastguard Worker lower_subgroup_intrinsics(struct v3d_compile *c,
1598*61046927SAndroid Build Coastguard Worker nir_block *block, nir_builder *b)
1599*61046927SAndroid Build Coastguard Worker {
1600*61046927SAndroid Build Coastguard Worker bool progress = false;
1601*61046927SAndroid Build Coastguard Worker nir_foreach_instr_safe(inst, block) {
1602*61046927SAndroid Build Coastguard Worker if (inst->type != nir_instr_type_intrinsic)
1603*61046927SAndroid Build Coastguard Worker continue;;
1604*61046927SAndroid Build Coastguard Worker
1605*61046927SAndroid Build Coastguard Worker nir_intrinsic_instr *intr =
1606*61046927SAndroid Build Coastguard Worker nir_instr_as_intrinsic(inst);
1607*61046927SAndroid Build Coastguard Worker if (!intr)
1608*61046927SAndroid Build Coastguard Worker continue;
1609*61046927SAndroid Build Coastguard Worker
1610*61046927SAndroid Build Coastguard Worker switch (intr->intrinsic) {
1611*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_num_subgroups:
1612*61046927SAndroid Build Coastguard Worker lower_load_num_subgroups(c, b, intr);
1613*61046927SAndroid Build Coastguard Worker progress = true;
1614*61046927SAndroid Build Coastguard Worker FALLTHROUGH;
1615*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_subgroup_id:
1616*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_subgroup_size:
1617*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_subgroup_invocation:
1618*61046927SAndroid Build Coastguard Worker case nir_intrinsic_elect:
1619*61046927SAndroid Build Coastguard Worker case nir_intrinsic_ballot:
1620*61046927SAndroid Build Coastguard Worker case nir_intrinsic_inverse_ballot:
1621*61046927SAndroid Build Coastguard Worker case nir_intrinsic_ballot_bitfield_extract:
1622*61046927SAndroid Build Coastguard Worker case nir_intrinsic_ballot_bit_count_reduce:
1623*61046927SAndroid Build Coastguard Worker case nir_intrinsic_ballot_find_lsb:
1624*61046927SAndroid Build Coastguard Worker case nir_intrinsic_ballot_find_msb:
1625*61046927SAndroid Build Coastguard Worker case nir_intrinsic_ballot_bit_count_exclusive:
1626*61046927SAndroid Build Coastguard Worker case nir_intrinsic_ballot_bit_count_inclusive:
1627*61046927SAndroid Build Coastguard Worker case nir_intrinsic_reduce:
1628*61046927SAndroid Build Coastguard Worker case nir_intrinsic_inclusive_scan:
1629*61046927SAndroid Build Coastguard Worker case nir_intrinsic_exclusive_scan:
1630*61046927SAndroid Build Coastguard Worker case nir_intrinsic_read_invocation:
1631*61046927SAndroid Build Coastguard Worker case nir_intrinsic_read_first_invocation:
1632*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_subgroup_eq_mask:
1633*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_subgroup_ge_mask:
1634*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_subgroup_gt_mask:
1635*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_subgroup_le_mask:
1636*61046927SAndroid Build Coastguard Worker case nir_intrinsic_load_subgroup_lt_mask:
1637*61046927SAndroid Build Coastguard Worker case nir_intrinsic_shuffle:
1638*61046927SAndroid Build Coastguard Worker case nir_intrinsic_shuffle_xor:
1639*61046927SAndroid Build Coastguard Worker case nir_intrinsic_shuffle_up:
1640*61046927SAndroid Build Coastguard Worker case nir_intrinsic_shuffle_down:
1641*61046927SAndroid Build Coastguard Worker case nir_intrinsic_vote_all:
1642*61046927SAndroid Build Coastguard Worker case nir_intrinsic_vote_any:
1643*61046927SAndroid Build Coastguard Worker case nir_intrinsic_vote_feq:
1644*61046927SAndroid Build Coastguard Worker case nir_intrinsic_vote_ieq:
1645*61046927SAndroid Build Coastguard Worker case nir_intrinsic_quad_broadcast:
1646*61046927SAndroid Build Coastguard Worker case nir_intrinsic_quad_swap_horizontal:
1647*61046927SAndroid Build Coastguard Worker case nir_intrinsic_quad_swap_vertical:
1648*61046927SAndroid Build Coastguard Worker case nir_intrinsic_quad_swap_diagonal:
1649*61046927SAndroid Build Coastguard Worker c->has_subgroups = true;
1650*61046927SAndroid Build Coastguard Worker break;
1651*61046927SAndroid Build Coastguard Worker default:
1652*61046927SAndroid Build Coastguard Worker break;
1653*61046927SAndroid Build Coastguard Worker }
1654*61046927SAndroid Build Coastguard Worker }
1655*61046927SAndroid Build Coastguard Worker
1656*61046927SAndroid Build Coastguard Worker return progress;
1657*61046927SAndroid Build Coastguard Worker }
1658*61046927SAndroid Build Coastguard Worker
1659*61046927SAndroid Build Coastguard Worker static bool
v3d_nir_lower_subgroup_intrinsics(nir_shader * s,struct v3d_compile * c)1660*61046927SAndroid Build Coastguard Worker v3d_nir_lower_subgroup_intrinsics(nir_shader *s, struct v3d_compile *c)
1661*61046927SAndroid Build Coastguard Worker {
1662*61046927SAndroid Build Coastguard Worker bool progress = false;
1663*61046927SAndroid Build Coastguard Worker nir_foreach_function_impl(impl, s) {
1664*61046927SAndroid Build Coastguard Worker nir_builder b = nir_builder_create(impl);
1665*61046927SAndroid Build Coastguard Worker
1666*61046927SAndroid Build Coastguard Worker nir_foreach_block(block, impl)
1667*61046927SAndroid Build Coastguard Worker progress |= lower_subgroup_intrinsics(c, block, &b);
1668*61046927SAndroid Build Coastguard Worker
1669*61046927SAndroid Build Coastguard Worker nir_metadata_preserve(impl,
1670*61046927SAndroid Build Coastguard Worker nir_metadata_control_flow);
1671*61046927SAndroid Build Coastguard Worker }
1672*61046927SAndroid Build Coastguard Worker return progress;
1673*61046927SAndroid Build Coastguard Worker }
1674*61046927SAndroid Build Coastguard Worker
1675*61046927SAndroid Build Coastguard Worker static void
v3d_attempt_compile(struct v3d_compile * c)1676*61046927SAndroid Build Coastguard Worker v3d_attempt_compile(struct v3d_compile *c)
1677*61046927SAndroid Build Coastguard Worker {
1678*61046927SAndroid Build Coastguard Worker switch (c->s->info.stage) {
1679*61046927SAndroid Build Coastguard Worker case MESA_SHADER_VERTEX:
1680*61046927SAndroid Build Coastguard Worker c->vs_key = (struct v3d_vs_key *) c->key;
1681*61046927SAndroid Build Coastguard Worker break;
1682*61046927SAndroid Build Coastguard Worker case MESA_SHADER_GEOMETRY:
1683*61046927SAndroid Build Coastguard Worker c->gs_key = (struct v3d_gs_key *) c->key;
1684*61046927SAndroid Build Coastguard Worker break;
1685*61046927SAndroid Build Coastguard Worker case MESA_SHADER_FRAGMENT:
1686*61046927SAndroid Build Coastguard Worker c->fs_key = (struct v3d_fs_key *) c->key;
1687*61046927SAndroid Build Coastguard Worker break;
1688*61046927SAndroid Build Coastguard Worker case MESA_SHADER_COMPUTE:
1689*61046927SAndroid Build Coastguard Worker break;
1690*61046927SAndroid Build Coastguard Worker default:
1691*61046927SAndroid Build Coastguard Worker unreachable("unsupported shader stage");
1692*61046927SAndroid Build Coastguard Worker }
1693*61046927SAndroid Build Coastguard Worker
1694*61046927SAndroid Build Coastguard Worker switch (c->s->info.stage) {
1695*61046927SAndroid Build Coastguard Worker case MESA_SHADER_VERTEX:
1696*61046927SAndroid Build Coastguard Worker v3d_nir_lower_vs_early(c);
1697*61046927SAndroid Build Coastguard Worker break;
1698*61046927SAndroid Build Coastguard Worker case MESA_SHADER_GEOMETRY:
1699*61046927SAndroid Build Coastguard Worker v3d_nir_lower_gs_early(c);
1700*61046927SAndroid Build Coastguard Worker break;
1701*61046927SAndroid Build Coastguard Worker case MESA_SHADER_FRAGMENT:
1702*61046927SAndroid Build Coastguard Worker v3d_nir_lower_fs_early(c);
1703*61046927SAndroid Build Coastguard Worker break;
1704*61046927SAndroid Build Coastguard Worker default:
1705*61046927SAndroid Build Coastguard Worker break;
1706*61046927SAndroid Build Coastguard Worker }
1707*61046927SAndroid Build Coastguard Worker
1708*61046927SAndroid Build Coastguard Worker v3d_lower_nir(c);
1709*61046927SAndroid Build Coastguard Worker
1710*61046927SAndroid Build Coastguard Worker switch (c->s->info.stage) {
1711*61046927SAndroid Build Coastguard Worker case MESA_SHADER_VERTEX:
1712*61046927SAndroid Build Coastguard Worker v3d_nir_lower_vs_late(c);
1713*61046927SAndroid Build Coastguard Worker break;
1714*61046927SAndroid Build Coastguard Worker case MESA_SHADER_GEOMETRY:
1715*61046927SAndroid Build Coastguard Worker v3d_nir_lower_gs_late(c);
1716*61046927SAndroid Build Coastguard Worker break;
1717*61046927SAndroid Build Coastguard Worker case MESA_SHADER_FRAGMENT:
1718*61046927SAndroid Build Coastguard Worker v3d_nir_lower_fs_late(c);
1719*61046927SAndroid Build Coastguard Worker break;
1720*61046927SAndroid Build Coastguard Worker default:
1721*61046927SAndroid Build Coastguard Worker break;
1722*61046927SAndroid Build Coastguard Worker }
1723*61046927SAndroid Build Coastguard Worker
1724*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, v3d_nir_lower_io, c);
1725*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, v3d_nir_lower_txf_ms);
1726*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, v3d_nir_lower_image_load_store, c);
1727*61046927SAndroid Build Coastguard Worker
1728*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_opt_idiv_const, 8);
1729*61046927SAndroid Build Coastguard Worker nir_lower_idiv_options idiv_options = {
1730*61046927SAndroid Build Coastguard Worker .allow_fp16 = true,
1731*61046927SAndroid Build Coastguard Worker };
1732*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_idiv, &idiv_options);
1733*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_alu);
1734*61046927SAndroid Build Coastguard Worker
1735*61046927SAndroid Build Coastguard Worker if (c->key->robust_uniform_access || c->key->robust_storage_access ||
1736*61046927SAndroid Build Coastguard Worker c->key->robust_image_access) {
1737*61046927SAndroid Build Coastguard Worker /* nir_lower_robust_access assumes constant buffer
1738*61046927SAndroid Build Coastguard Worker * indices on ubo/ssbo intrinsics so run copy propagation and
1739*61046927SAndroid Build Coastguard Worker * constant folding passes before we run the lowering to warrant
1740*61046927SAndroid Build Coastguard Worker * this. We also want to run the lowering before v3d_optimize to
1741*61046927SAndroid Build Coastguard Worker * clean-up redundant get_buffer_size calls produced in the pass.
1742*61046927SAndroid Build Coastguard Worker */
1743*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_copy_prop);
1744*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_opt_constant_folding);
1745*61046927SAndroid Build Coastguard Worker
1746*61046927SAndroid Build Coastguard Worker nir_lower_robust_access_options opts = {
1747*61046927SAndroid Build Coastguard Worker .lower_image = c->key->robust_image_access,
1748*61046927SAndroid Build Coastguard Worker .lower_ssbo = c->key->robust_storage_access,
1749*61046927SAndroid Build Coastguard Worker .lower_ubo = c->key->robust_uniform_access,
1750*61046927SAndroid Build Coastguard Worker };
1751*61046927SAndroid Build Coastguard Worker
1752*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_robust_access, &opts);
1753*61046927SAndroid Build Coastguard Worker }
1754*61046927SAndroid Build Coastguard Worker
1755*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_vars_to_scratch,
1756*61046927SAndroid Build Coastguard Worker nir_var_function_temp,
1757*61046927SAndroid Build Coastguard Worker 0,
1758*61046927SAndroid Build Coastguard Worker glsl_get_natural_size_align_bytes,
1759*61046927SAndroid Build Coastguard Worker glsl_get_natural_size_align_bytes);
1760*61046927SAndroid Build Coastguard Worker
1761*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, v3d_nir_lower_global_2x32);
1762*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_wrmasks, should_split_wrmask, c->s);
1763*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, v3d_nir_lower_load_store_bitsize);
1764*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, v3d_nir_lower_scratch);
1765*61046927SAndroid Build Coastguard Worker
1766*61046927SAndroid Build Coastguard Worker /* needs to run after load_store_bitsize */
1767*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_pack);
1768*61046927SAndroid Build Coastguard Worker
1769*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, v3d_nir_lower_subgroup_intrinsics, c);
1770*61046927SAndroid Build Coastguard Worker
1771*61046927SAndroid Build Coastguard Worker const nir_lower_subgroups_options subgroup_opts = {
1772*61046927SAndroid Build Coastguard Worker .subgroup_size = V3D_CHANNELS,
1773*61046927SAndroid Build Coastguard Worker .ballot_components = 1,
1774*61046927SAndroid Build Coastguard Worker .ballot_bit_size = 32,
1775*61046927SAndroid Build Coastguard Worker .lower_to_scalar = true,
1776*61046927SAndroid Build Coastguard Worker .lower_inverse_ballot = true,
1777*61046927SAndroid Build Coastguard Worker .lower_subgroup_masks = true,
1778*61046927SAndroid Build Coastguard Worker .lower_relative_shuffle = true,
1779*61046927SAndroid Build Coastguard Worker .lower_quad = true,
1780*61046927SAndroid Build Coastguard Worker };
1781*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_subgroups, &subgroup_opts);
1782*61046927SAndroid Build Coastguard Worker
1783*61046927SAndroid Build Coastguard Worker v3d_optimize_nir(c, c->s);
1784*61046927SAndroid Build Coastguard Worker
1785*61046927SAndroid Build Coastguard Worker /* Do late algebraic optimization to turn add(a, neg(b)) back into
1786*61046927SAndroid Build Coastguard Worker * subs, then the mandatory cleanup after algebraic. Note that it may
1787*61046927SAndroid Build Coastguard Worker * produce fnegs, and if so then we need to keep running to squash
1788*61046927SAndroid Build Coastguard Worker * fneg(fneg(a)).
1789*61046927SAndroid Build Coastguard Worker */
1790*61046927SAndroid Build Coastguard Worker bool more_late_algebraic = true;
1791*61046927SAndroid Build Coastguard Worker while (more_late_algebraic) {
1792*61046927SAndroid Build Coastguard Worker more_late_algebraic = false;
1793*61046927SAndroid Build Coastguard Worker NIR_PASS(more_late_algebraic, c->s, nir_opt_algebraic_late);
1794*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_opt_constant_folding);
1795*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_copy_prop);
1796*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_opt_dce);
1797*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_opt_cse);
1798*61046927SAndroid Build Coastguard Worker }
1799*61046927SAndroid Build Coastguard Worker
1800*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_lower_bool_to_int32);
1801*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_convert_to_lcssa, true, true);
1802*61046927SAndroid Build Coastguard Worker NIR_PASS_V(c->s, nir_divergence_analysis);
1803*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_convert_from_ssa, true);
1804*61046927SAndroid Build Coastguard Worker
1805*61046927SAndroid Build Coastguard Worker struct nir_schedule_options schedule_options = {
1806*61046927SAndroid Build Coastguard Worker /* Schedule for about half our register space, to enable more
1807*61046927SAndroid Build Coastguard Worker * shaders to hit 4 threads.
1808*61046927SAndroid Build Coastguard Worker */
1809*61046927SAndroid Build Coastguard Worker .threshold = c->threads == 4 ? 24 : 48,
1810*61046927SAndroid Build Coastguard Worker
1811*61046927SAndroid Build Coastguard Worker /* Vertex shaders share the same memory for inputs and outputs,
1812*61046927SAndroid Build Coastguard Worker * fragment and geometry shaders do not.
1813*61046927SAndroid Build Coastguard Worker */
1814*61046927SAndroid Build Coastguard Worker .stages_with_shared_io_memory =
1815*61046927SAndroid Build Coastguard Worker (((1 << MESA_ALL_SHADER_STAGES) - 1) &
1816*61046927SAndroid Build Coastguard Worker ~((1 << MESA_SHADER_FRAGMENT) |
1817*61046927SAndroid Build Coastguard Worker (1 << MESA_SHADER_GEOMETRY))),
1818*61046927SAndroid Build Coastguard Worker
1819*61046927SAndroid Build Coastguard Worker .fallback = c->fallback_scheduler,
1820*61046927SAndroid Build Coastguard Worker
1821*61046927SAndroid Build Coastguard Worker .intrinsic_cb = v3d_intrinsic_dependency_cb,
1822*61046927SAndroid Build Coastguard Worker .intrinsic_cb_data = c,
1823*61046927SAndroid Build Coastguard Worker
1824*61046927SAndroid Build Coastguard Worker .instr_delay_cb = v3d_instr_delay_cb,
1825*61046927SAndroid Build Coastguard Worker .instr_delay_cb_data = c,
1826*61046927SAndroid Build Coastguard Worker };
1827*61046927SAndroid Build Coastguard Worker NIR_PASS_V(c->s, nir_schedule, &schedule_options);
1828*61046927SAndroid Build Coastguard Worker
1829*61046927SAndroid Build Coastguard Worker if (!c->disable_constant_ubo_load_sorting)
1830*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, v3d_nir_sort_constant_ubo_loads, c);
1831*61046927SAndroid Build Coastguard Worker
1832*61046927SAndroid Build Coastguard Worker const nir_move_options buffer_opts = c->move_buffer_loads ?
1833*61046927SAndroid Build Coastguard Worker (nir_move_load_ubo | nir_move_load_ssbo) : 0;
1834*61046927SAndroid Build Coastguard Worker NIR_PASS(_, c->s, nir_opt_move, nir_move_load_uniform |
1835*61046927SAndroid Build Coastguard Worker nir_move_const_undef |
1836*61046927SAndroid Build Coastguard Worker buffer_opts);
1837*61046927SAndroid Build Coastguard Worker
1838*61046927SAndroid Build Coastguard Worker NIR_PASS_V(c->s, nir_trivialize_registers);
1839*61046927SAndroid Build Coastguard Worker
1840*61046927SAndroid Build Coastguard Worker v3d_nir_to_vir(c);
1841*61046927SAndroid Build Coastguard Worker }
1842*61046927SAndroid Build Coastguard Worker
1843*61046927SAndroid Build Coastguard Worker uint32_t
v3d_prog_data_size(gl_shader_stage stage)1844*61046927SAndroid Build Coastguard Worker v3d_prog_data_size(gl_shader_stage stage)
1845*61046927SAndroid Build Coastguard Worker {
1846*61046927SAndroid Build Coastguard Worker static const int prog_data_size[] = {
1847*61046927SAndroid Build Coastguard Worker [MESA_SHADER_VERTEX] = sizeof(struct v3d_vs_prog_data),
1848*61046927SAndroid Build Coastguard Worker [MESA_SHADER_GEOMETRY] = sizeof(struct v3d_gs_prog_data),
1849*61046927SAndroid Build Coastguard Worker [MESA_SHADER_FRAGMENT] = sizeof(struct v3d_fs_prog_data),
1850*61046927SAndroid Build Coastguard Worker [MESA_SHADER_COMPUTE] = sizeof(struct v3d_compute_prog_data),
1851*61046927SAndroid Build Coastguard Worker };
1852*61046927SAndroid Build Coastguard Worker
1853*61046927SAndroid Build Coastguard Worker assert(stage >= 0 &&
1854*61046927SAndroid Build Coastguard Worker stage < ARRAY_SIZE(prog_data_size) &&
1855*61046927SAndroid Build Coastguard Worker prog_data_size[stage]);
1856*61046927SAndroid Build Coastguard Worker
1857*61046927SAndroid Build Coastguard Worker return prog_data_size[stage];
1858*61046927SAndroid Build Coastguard Worker }
1859*61046927SAndroid Build Coastguard Worker
v3d_shaderdb_dump(struct v3d_compile * c,char ** shaderdb_str)1860*61046927SAndroid Build Coastguard Worker int v3d_shaderdb_dump(struct v3d_compile *c,
1861*61046927SAndroid Build Coastguard Worker char **shaderdb_str)
1862*61046927SAndroid Build Coastguard Worker {
1863*61046927SAndroid Build Coastguard Worker if (c == NULL || c->compilation_result != V3D_COMPILATION_SUCCEEDED)
1864*61046927SAndroid Build Coastguard Worker return -1;
1865*61046927SAndroid Build Coastguard Worker
1866*61046927SAndroid Build Coastguard Worker return asprintf(shaderdb_str,
1867*61046927SAndroid Build Coastguard Worker "%s shader: %d inst, %d threads, %d loops, "
1868*61046927SAndroid Build Coastguard Worker "%d uniforms, %d max-temps, %d:%d spills:fills, "
1869*61046927SAndroid Build Coastguard Worker "%d sfu-stalls, %d inst-and-stalls, %d nops",
1870*61046927SAndroid Build Coastguard Worker vir_get_stage_name(c),
1871*61046927SAndroid Build Coastguard Worker c->qpu_inst_count,
1872*61046927SAndroid Build Coastguard Worker c->threads,
1873*61046927SAndroid Build Coastguard Worker c->loops,
1874*61046927SAndroid Build Coastguard Worker c->num_uniforms,
1875*61046927SAndroid Build Coastguard Worker vir_get_max_temps(c),
1876*61046927SAndroid Build Coastguard Worker c->spills,
1877*61046927SAndroid Build Coastguard Worker c->fills,
1878*61046927SAndroid Build Coastguard Worker c->qpu_inst_stalled_count,
1879*61046927SAndroid Build Coastguard Worker c->qpu_inst_count + c->qpu_inst_stalled_count,
1880*61046927SAndroid Build Coastguard Worker c->nop_count);
1881*61046927SAndroid Build Coastguard Worker }
1882*61046927SAndroid Build Coastguard Worker
1883*61046927SAndroid Build Coastguard Worker /* This is a list of incremental changes to the compilation strategy
1884*61046927SAndroid Build Coastguard Worker * that will be used to try to compile the shader successfully. The
1885*61046927SAndroid Build Coastguard Worker * default strategy is to enable all optimizations which will have
1886*61046927SAndroid Build Coastguard Worker * the highest register pressure but is expected to produce most
1887*61046927SAndroid Build Coastguard Worker * optimal code. Following strategies incrementally disable specific
1888*61046927SAndroid Build Coastguard Worker * optimizations that are known to contribute to register pressure
1889*61046927SAndroid Build Coastguard Worker * in order to be able to compile the shader successfully while meeting
1890*61046927SAndroid Build Coastguard Worker * thread count requirements.
1891*61046927SAndroid Build Coastguard Worker *
1892*61046927SAndroid Build Coastguard Worker * V3D 4.1+ has a min thread count of 2, but we can use 1 here to also
1893*61046927SAndroid Build Coastguard Worker * cover previous hardware as well (meaning that we are not limiting
1894*61046927SAndroid Build Coastguard Worker * register allocation to any particular thread count). This is fine
1895*61046927SAndroid Build Coastguard Worker * because v3d_nir_to_vir will cap this to the actual minimum.
1896*61046927SAndroid Build Coastguard Worker */
1897*61046927SAndroid Build Coastguard Worker static const struct v3d_compiler_strategy strategies[] = {
1898*61046927SAndroid Build Coastguard Worker /*0*/ { "default", 4, 4, false, false, false, false, false, false, 0 },
1899*61046927SAndroid Build Coastguard Worker /*1*/ { "disable general TMU sched", 4, 4, true, false, false, false, false, false, 0 },
1900*61046927SAndroid Build Coastguard Worker /*2*/ { "disable gcm", 4, 4, true, true, false, false, false, false, 0 },
1901*61046927SAndroid Build Coastguard Worker /*3*/ { "disable loop unrolling", 4, 4, true, true, true, false, false, false, 0 },
1902*61046927SAndroid Build Coastguard Worker /*4*/ { "disable UBO load sorting", 4, 4, true, true, true, true, false, false, 0 },
1903*61046927SAndroid Build Coastguard Worker /*5*/ { "disable TMU pipelining", 4, 4, true, true, true, true, false, true, 0 },
1904*61046927SAndroid Build Coastguard Worker /*6*/ { "lower thread count", 2, 1, false, false, false, false, false, false, -1 },
1905*61046927SAndroid Build Coastguard Worker /*7*/ { "disable general TMU sched (2t)", 2, 1, true, false, false, false, false, false, -1 },
1906*61046927SAndroid Build Coastguard Worker /*8*/ { "disable gcm (2t)", 2, 1, true, true, false, false, false, false, -1 },
1907*61046927SAndroid Build Coastguard Worker /*9*/ { "disable loop unrolling (2t)", 2, 1, true, true, true, false, false, false, -1 },
1908*61046927SAndroid Build Coastguard Worker /*10*/ { "Move buffer loads (2t)", 2, 1, true, true, true, true, true, false, -1 },
1909*61046927SAndroid Build Coastguard Worker /*11*/ { "disable TMU pipelining (2t)", 2, 1, true, true, true, true, true, true, -1 },
1910*61046927SAndroid Build Coastguard Worker /*12*/ { "fallback scheduler", 2, 1, true, true, true, true, true, true, -1 }
1911*61046927SAndroid Build Coastguard Worker };
1912*61046927SAndroid Build Coastguard Worker
1913*61046927SAndroid Build Coastguard Worker /**
1914*61046927SAndroid Build Coastguard Worker * If a particular optimization didn't make any progress during a compile
1915*61046927SAndroid Build Coastguard Worker * attempt disabling it alone won't allow us to compile the shader successfully,
1916*61046927SAndroid Build Coastguard Worker * since we'll end up with the same code. Detect these scenarios so we can
1917*61046927SAndroid Build Coastguard Worker * avoid wasting time with useless compiles. We should also consider if the
1918*61046927SAndroid Build Coastguard Worker * gy changes other aspects of the compilation process though, like
1919*61046927SAndroid Build Coastguard Worker * spilling, and not skip it in that case.
1920*61046927SAndroid Build Coastguard Worker */
1921*61046927SAndroid Build Coastguard Worker static bool
skip_compile_strategy(struct v3d_compile * c,uint32_t idx)1922*61046927SAndroid Build Coastguard Worker skip_compile_strategy(struct v3d_compile *c, uint32_t idx)
1923*61046927SAndroid Build Coastguard Worker {
1924*61046927SAndroid Build Coastguard Worker /* We decide if we can skip a strategy based on the optimizations that
1925*61046927SAndroid Build Coastguard Worker * were active in the previous strategy, so we should only be calling this
1926*61046927SAndroid Build Coastguard Worker * for strategies after the first.
1927*61046927SAndroid Build Coastguard Worker */
1928*61046927SAndroid Build Coastguard Worker assert(idx > 0);
1929*61046927SAndroid Build Coastguard Worker
1930*61046927SAndroid Build Coastguard Worker /* Don't skip a strategy that changes spilling behavior */
1931*61046927SAndroid Build Coastguard Worker if (strategies[idx].max_tmu_spills !=
1932*61046927SAndroid Build Coastguard Worker strategies[idx - 1].max_tmu_spills) {
1933*61046927SAndroid Build Coastguard Worker return false;
1934*61046927SAndroid Build Coastguard Worker }
1935*61046927SAndroid Build Coastguard Worker
1936*61046927SAndroid Build Coastguard Worker switch (idx) {
1937*61046927SAndroid Build Coastguard Worker /* General TMU sched.: skip if we didn't emit any TMU loads */
1938*61046927SAndroid Build Coastguard Worker case 1:
1939*61046927SAndroid Build Coastguard Worker case 7:
1940*61046927SAndroid Build Coastguard Worker return !c->has_general_tmu_load;
1941*61046927SAndroid Build Coastguard Worker /* Global code motion: skip if nir_opt_gcm didn't make any progress */
1942*61046927SAndroid Build Coastguard Worker case 2:
1943*61046927SAndroid Build Coastguard Worker case 8:
1944*61046927SAndroid Build Coastguard Worker return !c->gcm_progress;
1945*61046927SAndroid Build Coastguard Worker /* Loop unrolling: skip if we didn't unroll any loops */
1946*61046927SAndroid Build Coastguard Worker case 3:
1947*61046927SAndroid Build Coastguard Worker case 9:
1948*61046927SAndroid Build Coastguard Worker return !c->unrolled_any_loops;
1949*61046927SAndroid Build Coastguard Worker /* UBO load sorting: skip if we didn't sort any loads */
1950*61046927SAndroid Build Coastguard Worker case 4:
1951*61046927SAndroid Build Coastguard Worker return !c->sorted_any_ubo_loads;
1952*61046927SAndroid Build Coastguard Worker /* Move buffer loads: we assume any shader with difficult RA
1953*61046927SAndroid Build Coastguard Worker * most likely has UBO / SSBO loads so we never try to skip.
1954*61046927SAndroid Build Coastguard Worker * For now, we only try this for 2-thread compiles since it
1955*61046927SAndroid Build Coastguard Worker * is expected to impact instruction counts and latency.
1956*61046927SAndroid Build Coastguard Worker */
1957*61046927SAndroid Build Coastguard Worker case 10:
1958*61046927SAndroid Build Coastguard Worker assert(c->threads < 4);
1959*61046927SAndroid Build Coastguard Worker return false;
1960*61046927SAndroid Build Coastguard Worker /* TMU pipelining: skip if we didn't pipeline any TMU ops */
1961*61046927SAndroid Build Coastguard Worker case 5:
1962*61046927SAndroid Build Coastguard Worker case 11:
1963*61046927SAndroid Build Coastguard Worker return !c->pipelined_any_tmu;
1964*61046927SAndroid Build Coastguard Worker /* Lower thread count: skip if we already tried less that 4 threads */
1965*61046927SAndroid Build Coastguard Worker case 6:
1966*61046927SAndroid Build Coastguard Worker return c->threads < 4;
1967*61046927SAndroid Build Coastguard Worker default:
1968*61046927SAndroid Build Coastguard Worker return false;
1969*61046927SAndroid Build Coastguard Worker };
1970*61046927SAndroid Build Coastguard Worker }
1971*61046927SAndroid Build Coastguard Worker
1972*61046927SAndroid Build Coastguard Worker static inline void
set_best_compile(struct v3d_compile ** best,struct v3d_compile * c)1973*61046927SAndroid Build Coastguard Worker set_best_compile(struct v3d_compile **best, struct v3d_compile *c)
1974*61046927SAndroid Build Coastguard Worker {
1975*61046927SAndroid Build Coastguard Worker if (*best)
1976*61046927SAndroid Build Coastguard Worker vir_compile_destroy(*best);
1977*61046927SAndroid Build Coastguard Worker *best = c;
1978*61046927SAndroid Build Coastguard Worker }
1979*61046927SAndroid Build Coastguard Worker
v3d_compile(const struct v3d_compiler * compiler,struct v3d_key * key,struct v3d_prog_data ** out_prog_data,nir_shader * s,void (* debug_output)(const char * msg,void * debug_output_data),void * debug_output_data,int program_id,int variant_id,uint32_t * final_assembly_size)1980*61046927SAndroid Build Coastguard Worker uint64_t *v3d_compile(const struct v3d_compiler *compiler,
1981*61046927SAndroid Build Coastguard Worker struct v3d_key *key,
1982*61046927SAndroid Build Coastguard Worker struct v3d_prog_data **out_prog_data,
1983*61046927SAndroid Build Coastguard Worker nir_shader *s,
1984*61046927SAndroid Build Coastguard Worker void (*debug_output)(const char *msg,
1985*61046927SAndroid Build Coastguard Worker void *debug_output_data),
1986*61046927SAndroid Build Coastguard Worker void *debug_output_data,
1987*61046927SAndroid Build Coastguard Worker int program_id, int variant_id,
1988*61046927SAndroid Build Coastguard Worker uint32_t *final_assembly_size)
1989*61046927SAndroid Build Coastguard Worker {
1990*61046927SAndroid Build Coastguard Worker struct v3d_compile *c = NULL;
1991*61046927SAndroid Build Coastguard Worker
1992*61046927SAndroid Build Coastguard Worker uint32_t best_spill_fill_count = UINT32_MAX;
1993*61046927SAndroid Build Coastguard Worker struct v3d_compile *best_c = NULL;
1994*61046927SAndroid Build Coastguard Worker for (int32_t strat = 0; strat < ARRAY_SIZE(strategies); strat++) {
1995*61046927SAndroid Build Coastguard Worker /* Fallback strategy */
1996*61046927SAndroid Build Coastguard Worker if (strat > 0) {
1997*61046927SAndroid Build Coastguard Worker assert(c);
1998*61046927SAndroid Build Coastguard Worker if (skip_compile_strategy(c, strat))
1999*61046927SAndroid Build Coastguard Worker continue;
2000*61046927SAndroid Build Coastguard Worker
2001*61046927SAndroid Build Coastguard Worker char *debug_msg;
2002*61046927SAndroid Build Coastguard Worker int ret = asprintf(&debug_msg,
2003*61046927SAndroid Build Coastguard Worker "Falling back to strategy '%s' "
2004*61046927SAndroid Build Coastguard Worker "for %s prog %d/%d",
2005*61046927SAndroid Build Coastguard Worker strategies[strat].name,
2006*61046927SAndroid Build Coastguard Worker vir_get_stage_name(c),
2007*61046927SAndroid Build Coastguard Worker c->program_id, c->variant_id);
2008*61046927SAndroid Build Coastguard Worker
2009*61046927SAndroid Build Coastguard Worker if (ret >= 0) {
2010*61046927SAndroid Build Coastguard Worker if (V3D_DBG(PERF))
2011*61046927SAndroid Build Coastguard Worker fprintf(stderr, "%s\n", debug_msg);
2012*61046927SAndroid Build Coastguard Worker
2013*61046927SAndroid Build Coastguard Worker c->debug_output(debug_msg, c->debug_output_data);
2014*61046927SAndroid Build Coastguard Worker free(debug_msg);
2015*61046927SAndroid Build Coastguard Worker }
2016*61046927SAndroid Build Coastguard Worker
2017*61046927SAndroid Build Coastguard Worker if (c != best_c)
2018*61046927SAndroid Build Coastguard Worker vir_compile_destroy(c);
2019*61046927SAndroid Build Coastguard Worker }
2020*61046927SAndroid Build Coastguard Worker
2021*61046927SAndroid Build Coastguard Worker c = vir_compile_init(compiler, key, s,
2022*61046927SAndroid Build Coastguard Worker debug_output, debug_output_data,
2023*61046927SAndroid Build Coastguard Worker program_id, variant_id,
2024*61046927SAndroid Build Coastguard Worker strat, &strategies[strat],
2025*61046927SAndroid Build Coastguard Worker strat == ARRAY_SIZE(strategies) - 1);
2026*61046927SAndroid Build Coastguard Worker
2027*61046927SAndroid Build Coastguard Worker v3d_attempt_compile(c);
2028*61046927SAndroid Build Coastguard Worker
2029*61046927SAndroid Build Coastguard Worker /* Broken shader or driver bug */
2030*61046927SAndroid Build Coastguard Worker if (c->compilation_result == V3D_COMPILATION_FAILED)
2031*61046927SAndroid Build Coastguard Worker break;
2032*61046927SAndroid Build Coastguard Worker
2033*61046927SAndroid Build Coastguard Worker /* If we compiled without spills, choose this.
2034*61046927SAndroid Build Coastguard Worker * Otherwise if this is a 4-thread compile, choose this (these
2035*61046927SAndroid Build Coastguard Worker * have a very low cap on the allowed TMU spills so we assume
2036*61046927SAndroid Build Coastguard Worker * it will be better than a 2-thread compile without spills).
2037*61046927SAndroid Build Coastguard Worker * Otherwise, keep going while tracking the strategy with the
2038*61046927SAndroid Build Coastguard Worker * lowest spill count.
2039*61046927SAndroid Build Coastguard Worker */
2040*61046927SAndroid Build Coastguard Worker if (c->compilation_result == V3D_COMPILATION_SUCCEEDED) {
2041*61046927SAndroid Build Coastguard Worker if (c->spills == 0 ||
2042*61046927SAndroid Build Coastguard Worker strategies[strat].min_threads == 4 ||
2043*61046927SAndroid Build Coastguard Worker V3D_DBG(OPT_COMPILE_TIME)) {
2044*61046927SAndroid Build Coastguard Worker set_best_compile(&best_c, c);
2045*61046927SAndroid Build Coastguard Worker break;
2046*61046927SAndroid Build Coastguard Worker } else if (c->spills + c->fills <
2047*61046927SAndroid Build Coastguard Worker best_spill_fill_count) {
2048*61046927SAndroid Build Coastguard Worker set_best_compile(&best_c, c);
2049*61046927SAndroid Build Coastguard Worker best_spill_fill_count = c->spills + c->fills;
2050*61046927SAndroid Build Coastguard Worker }
2051*61046927SAndroid Build Coastguard Worker
2052*61046927SAndroid Build Coastguard Worker if (V3D_DBG(PERF)) {
2053*61046927SAndroid Build Coastguard Worker char *debug_msg;
2054*61046927SAndroid Build Coastguard Worker int ret = asprintf(&debug_msg,
2055*61046927SAndroid Build Coastguard Worker "Compiled %s prog %d/%d with %d "
2056*61046927SAndroid Build Coastguard Worker "spills and %d fills. Will try "
2057*61046927SAndroid Build Coastguard Worker "more strategies.",
2058*61046927SAndroid Build Coastguard Worker vir_get_stage_name(c),
2059*61046927SAndroid Build Coastguard Worker c->program_id, c->variant_id,
2060*61046927SAndroid Build Coastguard Worker c->spills, c->fills);
2061*61046927SAndroid Build Coastguard Worker if (ret >= 0) {
2062*61046927SAndroid Build Coastguard Worker fprintf(stderr, "%s\n", debug_msg);
2063*61046927SAndroid Build Coastguard Worker c->debug_output(debug_msg, c->debug_output_data);
2064*61046927SAndroid Build Coastguard Worker free(debug_msg);
2065*61046927SAndroid Build Coastguard Worker }
2066*61046927SAndroid Build Coastguard Worker }
2067*61046927SAndroid Build Coastguard Worker }
2068*61046927SAndroid Build Coastguard Worker
2069*61046927SAndroid Build Coastguard Worker /* Only try next streategy if we failed to register allocate
2070*61046927SAndroid Build Coastguard Worker * or we had to spill.
2071*61046927SAndroid Build Coastguard Worker */
2072*61046927SAndroid Build Coastguard Worker assert(c->compilation_result ==
2073*61046927SAndroid Build Coastguard Worker V3D_COMPILATION_FAILED_REGISTER_ALLOCATION ||
2074*61046927SAndroid Build Coastguard Worker c->spills > 0);
2075*61046927SAndroid Build Coastguard Worker }
2076*61046927SAndroid Build Coastguard Worker
2077*61046927SAndroid Build Coastguard Worker /* If the best strategy was not the last, choose that */
2078*61046927SAndroid Build Coastguard Worker if (best_c && c != best_c)
2079*61046927SAndroid Build Coastguard Worker set_best_compile(&c, best_c);
2080*61046927SAndroid Build Coastguard Worker
2081*61046927SAndroid Build Coastguard Worker if (V3D_DBG(PERF) &&
2082*61046927SAndroid Build Coastguard Worker c->compilation_result !=
2083*61046927SAndroid Build Coastguard Worker V3D_COMPILATION_FAILED_REGISTER_ALLOCATION &&
2084*61046927SAndroid Build Coastguard Worker c->spills > 0) {
2085*61046927SAndroid Build Coastguard Worker char *debug_msg;
2086*61046927SAndroid Build Coastguard Worker int ret = asprintf(&debug_msg,
2087*61046927SAndroid Build Coastguard Worker "Compiled %s prog %d/%d with %d "
2088*61046927SAndroid Build Coastguard Worker "spills and %d fills",
2089*61046927SAndroid Build Coastguard Worker vir_get_stage_name(c),
2090*61046927SAndroid Build Coastguard Worker c->program_id, c->variant_id,
2091*61046927SAndroid Build Coastguard Worker c->spills, c->fills);
2092*61046927SAndroid Build Coastguard Worker fprintf(stderr, "%s\n", debug_msg);
2093*61046927SAndroid Build Coastguard Worker
2094*61046927SAndroid Build Coastguard Worker if (ret >= 0) {
2095*61046927SAndroid Build Coastguard Worker c->debug_output(debug_msg, c->debug_output_data);
2096*61046927SAndroid Build Coastguard Worker free(debug_msg);
2097*61046927SAndroid Build Coastguard Worker }
2098*61046927SAndroid Build Coastguard Worker }
2099*61046927SAndroid Build Coastguard Worker
2100*61046927SAndroid Build Coastguard Worker if (c->compilation_result != V3D_COMPILATION_SUCCEEDED) {
2101*61046927SAndroid Build Coastguard Worker fprintf(stderr, "Failed to compile %s prog %d/%d "
2102*61046927SAndroid Build Coastguard Worker "with any strategy.\n",
2103*61046927SAndroid Build Coastguard Worker vir_get_stage_name(c), c->program_id, c->variant_id);
2104*61046927SAndroid Build Coastguard Worker
2105*61046927SAndroid Build Coastguard Worker vir_compile_destroy(c);
2106*61046927SAndroid Build Coastguard Worker return NULL;
2107*61046927SAndroid Build Coastguard Worker }
2108*61046927SAndroid Build Coastguard Worker
2109*61046927SAndroid Build Coastguard Worker struct v3d_prog_data *prog_data;
2110*61046927SAndroid Build Coastguard Worker
2111*61046927SAndroid Build Coastguard Worker prog_data = rzalloc_size(NULL, v3d_prog_data_size(c->s->info.stage));
2112*61046927SAndroid Build Coastguard Worker
2113*61046927SAndroid Build Coastguard Worker v3d_set_prog_data(c, prog_data);
2114*61046927SAndroid Build Coastguard Worker
2115*61046927SAndroid Build Coastguard Worker *out_prog_data = prog_data;
2116*61046927SAndroid Build Coastguard Worker
2117*61046927SAndroid Build Coastguard Worker char *shaderdb;
2118*61046927SAndroid Build Coastguard Worker int ret = v3d_shaderdb_dump(c, &shaderdb);
2119*61046927SAndroid Build Coastguard Worker if (ret >= 0) {
2120*61046927SAndroid Build Coastguard Worker if (V3D_DBG(SHADERDB))
2121*61046927SAndroid Build Coastguard Worker fprintf(stderr, "SHADER-DB-%s - %s\n", s->info.name, shaderdb);
2122*61046927SAndroid Build Coastguard Worker
2123*61046927SAndroid Build Coastguard Worker c->debug_output(shaderdb, c->debug_output_data);
2124*61046927SAndroid Build Coastguard Worker free(shaderdb);
2125*61046927SAndroid Build Coastguard Worker }
2126*61046927SAndroid Build Coastguard Worker
2127*61046927SAndroid Build Coastguard Worker return v3d_return_qpu_insts(c, final_assembly_size);
2128*61046927SAndroid Build Coastguard Worker }
2129*61046927SAndroid Build Coastguard Worker
2130*61046927SAndroid Build Coastguard Worker void
vir_remove_instruction(struct v3d_compile * c,struct qinst * qinst)2131*61046927SAndroid Build Coastguard Worker vir_remove_instruction(struct v3d_compile *c, struct qinst *qinst)
2132*61046927SAndroid Build Coastguard Worker {
2133*61046927SAndroid Build Coastguard Worker if (qinst->dst.file == QFILE_TEMP)
2134*61046927SAndroid Build Coastguard Worker c->defs[qinst->dst.index] = NULL;
2135*61046927SAndroid Build Coastguard Worker
2136*61046927SAndroid Build Coastguard Worker assert(&qinst->link != c->cursor.link);
2137*61046927SAndroid Build Coastguard Worker
2138*61046927SAndroid Build Coastguard Worker list_del(&qinst->link);
2139*61046927SAndroid Build Coastguard Worker free(qinst);
2140*61046927SAndroid Build Coastguard Worker
2141*61046927SAndroid Build Coastguard Worker c->live_intervals_valid = false;
2142*61046927SAndroid Build Coastguard Worker }
2143*61046927SAndroid Build Coastguard Worker
2144*61046927SAndroid Build Coastguard Worker struct qreg
vir_follow_movs(struct v3d_compile * c,struct qreg reg)2145*61046927SAndroid Build Coastguard Worker vir_follow_movs(struct v3d_compile *c, struct qreg reg)
2146*61046927SAndroid Build Coastguard Worker {
2147*61046927SAndroid Build Coastguard Worker /* XXX
2148*61046927SAndroid Build Coastguard Worker int pack = reg.pack;
2149*61046927SAndroid Build Coastguard Worker
2150*61046927SAndroid Build Coastguard Worker while (reg.file == QFILE_TEMP &&
2151*61046927SAndroid Build Coastguard Worker c->defs[reg.index] &&
2152*61046927SAndroid Build Coastguard Worker (c->defs[reg.index]->op == QOP_MOV ||
2153*61046927SAndroid Build Coastguard Worker c->defs[reg.index]->op == QOP_FMOV) &&
2154*61046927SAndroid Build Coastguard Worker !c->defs[reg.index]->dst.pack &&
2155*61046927SAndroid Build Coastguard Worker !c->defs[reg.index]->src[0].pack) {
2156*61046927SAndroid Build Coastguard Worker reg = c->defs[reg.index]->src[0];
2157*61046927SAndroid Build Coastguard Worker }
2158*61046927SAndroid Build Coastguard Worker
2159*61046927SAndroid Build Coastguard Worker reg.pack = pack;
2160*61046927SAndroid Build Coastguard Worker */
2161*61046927SAndroid Build Coastguard Worker return reg;
2162*61046927SAndroid Build Coastguard Worker }
2163*61046927SAndroid Build Coastguard Worker
2164*61046927SAndroid Build Coastguard Worker void
vir_compile_destroy(struct v3d_compile * c)2165*61046927SAndroid Build Coastguard Worker vir_compile_destroy(struct v3d_compile *c)
2166*61046927SAndroid Build Coastguard Worker {
2167*61046927SAndroid Build Coastguard Worker /* Defuse the assert that we aren't removing the cursor's instruction.
2168*61046927SAndroid Build Coastguard Worker */
2169*61046927SAndroid Build Coastguard Worker c->cursor.link = NULL;
2170*61046927SAndroid Build Coastguard Worker
2171*61046927SAndroid Build Coastguard Worker vir_for_each_block(block, c) {
2172*61046927SAndroid Build Coastguard Worker while (!list_is_empty(&block->instructions)) {
2173*61046927SAndroid Build Coastguard Worker struct qinst *qinst =
2174*61046927SAndroid Build Coastguard Worker list_first_entry(&block->instructions,
2175*61046927SAndroid Build Coastguard Worker struct qinst, link);
2176*61046927SAndroid Build Coastguard Worker vir_remove_instruction(c, qinst);
2177*61046927SAndroid Build Coastguard Worker }
2178*61046927SAndroid Build Coastguard Worker }
2179*61046927SAndroid Build Coastguard Worker
2180*61046927SAndroid Build Coastguard Worker ralloc_free(c);
2181*61046927SAndroid Build Coastguard Worker }
2182*61046927SAndroid Build Coastguard Worker
2183*61046927SAndroid Build Coastguard Worker uint32_t
vir_get_uniform_index(struct v3d_compile * c,enum quniform_contents contents,uint32_t data)2184*61046927SAndroid Build Coastguard Worker vir_get_uniform_index(struct v3d_compile *c,
2185*61046927SAndroid Build Coastguard Worker enum quniform_contents contents,
2186*61046927SAndroid Build Coastguard Worker uint32_t data)
2187*61046927SAndroid Build Coastguard Worker {
2188*61046927SAndroid Build Coastguard Worker for (int i = 0; i < c->num_uniforms; i++) {
2189*61046927SAndroid Build Coastguard Worker if (c->uniform_contents[i] == contents &&
2190*61046927SAndroid Build Coastguard Worker c->uniform_data[i] == data) {
2191*61046927SAndroid Build Coastguard Worker return i;
2192*61046927SAndroid Build Coastguard Worker }
2193*61046927SAndroid Build Coastguard Worker }
2194*61046927SAndroid Build Coastguard Worker
2195*61046927SAndroid Build Coastguard Worker uint32_t uniform = c->num_uniforms++;
2196*61046927SAndroid Build Coastguard Worker
2197*61046927SAndroid Build Coastguard Worker if (uniform >= c->uniform_array_size) {
2198*61046927SAndroid Build Coastguard Worker c->uniform_array_size = MAX2(MAX2(16, uniform + 1),
2199*61046927SAndroid Build Coastguard Worker c->uniform_array_size * 2);
2200*61046927SAndroid Build Coastguard Worker
2201*61046927SAndroid Build Coastguard Worker c->uniform_data = reralloc(c, c->uniform_data,
2202*61046927SAndroid Build Coastguard Worker uint32_t,
2203*61046927SAndroid Build Coastguard Worker c->uniform_array_size);
2204*61046927SAndroid Build Coastguard Worker c->uniform_contents = reralloc(c, c->uniform_contents,
2205*61046927SAndroid Build Coastguard Worker enum quniform_contents,
2206*61046927SAndroid Build Coastguard Worker c->uniform_array_size);
2207*61046927SAndroid Build Coastguard Worker }
2208*61046927SAndroid Build Coastguard Worker
2209*61046927SAndroid Build Coastguard Worker c->uniform_contents[uniform] = contents;
2210*61046927SAndroid Build Coastguard Worker c->uniform_data[uniform] = data;
2211*61046927SAndroid Build Coastguard Worker
2212*61046927SAndroid Build Coastguard Worker return uniform;
2213*61046927SAndroid Build Coastguard Worker }
2214*61046927SAndroid Build Coastguard Worker
2215*61046927SAndroid Build Coastguard Worker /* Looks back into the current block to find the ldunif that wrote the uniform
2216*61046927SAndroid Build Coastguard Worker * at the requested index. If it finds it, it returns true and writes the
2217*61046927SAndroid Build Coastguard Worker * destination register of the ldunif instruction to 'unif'.
2218*61046927SAndroid Build Coastguard Worker *
2219*61046927SAndroid Build Coastguard Worker * This can impact register pressure and end up leading to worse code, so we
2220*61046927SAndroid Build Coastguard Worker * limit the number of instructions we are willing to look back through to
2221*61046927SAndroid Build Coastguard Worker * strike a good balance.
2222*61046927SAndroid Build Coastguard Worker */
2223*61046927SAndroid Build Coastguard Worker static bool
try_opt_ldunif(struct v3d_compile * c,uint32_t index,struct qreg * unif)2224*61046927SAndroid Build Coastguard Worker try_opt_ldunif(struct v3d_compile *c, uint32_t index, struct qreg *unif)
2225*61046927SAndroid Build Coastguard Worker {
2226*61046927SAndroid Build Coastguard Worker uint32_t count = 20;
2227*61046927SAndroid Build Coastguard Worker struct qinst *prev_inst = NULL;
2228*61046927SAndroid Build Coastguard Worker assert(c->cur_block);
2229*61046927SAndroid Build Coastguard Worker
2230*61046927SAndroid Build Coastguard Worker #if MESA_DEBUG
2231*61046927SAndroid Build Coastguard Worker /* We can only reuse a uniform if it was emitted in the same block,
2232*61046927SAndroid Build Coastguard Worker * so callers must make sure the current instruction is being emitted
2233*61046927SAndroid Build Coastguard Worker * in the current block.
2234*61046927SAndroid Build Coastguard Worker */
2235*61046927SAndroid Build Coastguard Worker bool found = false;
2236*61046927SAndroid Build Coastguard Worker vir_for_each_inst(inst, c->cur_block) {
2237*61046927SAndroid Build Coastguard Worker if (&inst->link == c->cursor.link) {
2238*61046927SAndroid Build Coastguard Worker found = true;
2239*61046927SAndroid Build Coastguard Worker break;
2240*61046927SAndroid Build Coastguard Worker }
2241*61046927SAndroid Build Coastguard Worker }
2242*61046927SAndroid Build Coastguard Worker
2243*61046927SAndroid Build Coastguard Worker assert(found || &c->cur_block->instructions == c->cursor.link);
2244*61046927SAndroid Build Coastguard Worker #endif
2245*61046927SAndroid Build Coastguard Worker
2246*61046927SAndroid Build Coastguard Worker list_for_each_entry_from_rev(struct qinst, inst, c->cursor.link->prev,
2247*61046927SAndroid Build Coastguard Worker &c->cur_block->instructions, link) {
2248*61046927SAndroid Build Coastguard Worker if ((inst->qpu.sig.ldunif || inst->qpu.sig.ldunifrf) &&
2249*61046927SAndroid Build Coastguard Worker inst->uniform == index) {
2250*61046927SAndroid Build Coastguard Worker prev_inst = inst;
2251*61046927SAndroid Build Coastguard Worker break;
2252*61046927SAndroid Build Coastguard Worker }
2253*61046927SAndroid Build Coastguard Worker
2254*61046927SAndroid Build Coastguard Worker if (--count == 0)
2255*61046927SAndroid Build Coastguard Worker break;
2256*61046927SAndroid Build Coastguard Worker }
2257*61046927SAndroid Build Coastguard Worker
2258*61046927SAndroid Build Coastguard Worker if (!prev_inst)
2259*61046927SAndroid Build Coastguard Worker return false;
2260*61046927SAndroid Build Coastguard Worker
2261*61046927SAndroid Build Coastguard Worker /* Only reuse the ldunif result if it was written to a temp register,
2262*61046927SAndroid Build Coastguard Worker * otherwise there may be special restrictions (for example, ldunif
2263*61046927SAndroid Build Coastguard Worker * may write directly to unifa, which is a write-only register).
2264*61046927SAndroid Build Coastguard Worker */
2265*61046927SAndroid Build Coastguard Worker if (prev_inst->dst.file != QFILE_TEMP)
2266*61046927SAndroid Build Coastguard Worker return false;
2267*61046927SAndroid Build Coastguard Worker
2268*61046927SAndroid Build Coastguard Worker list_for_each_entry_from(struct qinst, inst, prev_inst->link.next,
2269*61046927SAndroid Build Coastguard Worker &c->cur_block->instructions, link) {
2270*61046927SAndroid Build Coastguard Worker if (inst->dst.file == prev_inst->dst.file &&
2271*61046927SAndroid Build Coastguard Worker inst->dst.index == prev_inst->dst.index) {
2272*61046927SAndroid Build Coastguard Worker return false;
2273*61046927SAndroid Build Coastguard Worker }
2274*61046927SAndroid Build Coastguard Worker }
2275*61046927SAndroid Build Coastguard Worker
2276*61046927SAndroid Build Coastguard Worker *unif = prev_inst->dst;
2277*61046927SAndroid Build Coastguard Worker return true;
2278*61046927SAndroid Build Coastguard Worker }
2279*61046927SAndroid Build Coastguard Worker
2280*61046927SAndroid Build Coastguard Worker struct qreg
vir_uniform(struct v3d_compile * c,enum quniform_contents contents,uint32_t data)2281*61046927SAndroid Build Coastguard Worker vir_uniform(struct v3d_compile *c,
2282*61046927SAndroid Build Coastguard Worker enum quniform_contents contents,
2283*61046927SAndroid Build Coastguard Worker uint32_t data)
2284*61046927SAndroid Build Coastguard Worker {
2285*61046927SAndroid Build Coastguard Worker const int num_uniforms = c->num_uniforms;
2286*61046927SAndroid Build Coastguard Worker const int index = vir_get_uniform_index(c, contents, data);
2287*61046927SAndroid Build Coastguard Worker
2288*61046927SAndroid Build Coastguard Worker /* If this is not the first time we see this uniform try to reuse the
2289*61046927SAndroid Build Coastguard Worker * result of the last ldunif that loaded it.
2290*61046927SAndroid Build Coastguard Worker */
2291*61046927SAndroid Build Coastguard Worker const bool is_new_uniform = num_uniforms != c->num_uniforms;
2292*61046927SAndroid Build Coastguard Worker if (!is_new_uniform && !c->disable_ldunif_opt) {
2293*61046927SAndroid Build Coastguard Worker struct qreg ldunif_dst;
2294*61046927SAndroid Build Coastguard Worker if (try_opt_ldunif(c, index, &ldunif_dst))
2295*61046927SAndroid Build Coastguard Worker return ldunif_dst;
2296*61046927SAndroid Build Coastguard Worker }
2297*61046927SAndroid Build Coastguard Worker
2298*61046927SAndroid Build Coastguard Worker struct qinst *inst = vir_NOP(c);
2299*61046927SAndroid Build Coastguard Worker inst->qpu.sig.ldunif = true;
2300*61046927SAndroid Build Coastguard Worker inst->uniform = index;
2301*61046927SAndroid Build Coastguard Worker inst->dst = vir_get_temp(c);
2302*61046927SAndroid Build Coastguard Worker c->defs[inst->dst.index] = inst;
2303*61046927SAndroid Build Coastguard Worker return inst->dst;
2304*61046927SAndroid Build Coastguard Worker }
2305*61046927SAndroid Build Coastguard Worker
2306*61046927SAndroid Build Coastguard Worker #define OPTPASS(func) \
2307*61046927SAndroid Build Coastguard Worker do { \
2308*61046927SAndroid Build Coastguard Worker bool stage_progress = func(c); \
2309*61046927SAndroid Build Coastguard Worker if (stage_progress) { \
2310*61046927SAndroid Build Coastguard Worker progress = true; \
2311*61046927SAndroid Build Coastguard Worker if (print_opt_debug) { \
2312*61046927SAndroid Build Coastguard Worker fprintf(stderr, \
2313*61046927SAndroid Build Coastguard Worker "VIR opt pass %2d: %s progress\n", \
2314*61046927SAndroid Build Coastguard Worker pass, #func); \
2315*61046927SAndroid Build Coastguard Worker } \
2316*61046927SAndroid Build Coastguard Worker /*XXX vir_validate(c);*/ \
2317*61046927SAndroid Build Coastguard Worker } \
2318*61046927SAndroid Build Coastguard Worker } while (0)
2319*61046927SAndroid Build Coastguard Worker
2320*61046927SAndroid Build Coastguard Worker void
vir_optimize(struct v3d_compile * c)2321*61046927SAndroid Build Coastguard Worker vir_optimize(struct v3d_compile *c)
2322*61046927SAndroid Build Coastguard Worker {
2323*61046927SAndroid Build Coastguard Worker bool print_opt_debug = false;
2324*61046927SAndroid Build Coastguard Worker int pass = 1;
2325*61046927SAndroid Build Coastguard Worker
2326*61046927SAndroid Build Coastguard Worker while (true) {
2327*61046927SAndroid Build Coastguard Worker bool progress = false;
2328*61046927SAndroid Build Coastguard Worker
2329*61046927SAndroid Build Coastguard Worker OPTPASS(vir_opt_copy_propagate);
2330*61046927SAndroid Build Coastguard Worker OPTPASS(vir_opt_redundant_flags);
2331*61046927SAndroid Build Coastguard Worker OPTPASS(vir_opt_dead_code);
2332*61046927SAndroid Build Coastguard Worker OPTPASS(vir_opt_small_immediates);
2333*61046927SAndroid Build Coastguard Worker OPTPASS(vir_opt_constant_alu);
2334*61046927SAndroid Build Coastguard Worker
2335*61046927SAndroid Build Coastguard Worker if (!progress)
2336*61046927SAndroid Build Coastguard Worker break;
2337*61046927SAndroid Build Coastguard Worker
2338*61046927SAndroid Build Coastguard Worker pass++;
2339*61046927SAndroid Build Coastguard Worker }
2340*61046927SAndroid Build Coastguard Worker }
2341*61046927SAndroid Build Coastguard Worker
2342*61046927SAndroid Build Coastguard Worker const char *
vir_get_stage_name(struct v3d_compile * c)2343*61046927SAndroid Build Coastguard Worker vir_get_stage_name(struct v3d_compile *c)
2344*61046927SAndroid Build Coastguard Worker {
2345*61046927SAndroid Build Coastguard Worker if (c->vs_key && c->vs_key->is_coord)
2346*61046927SAndroid Build Coastguard Worker return "MESA_SHADER_VERTEX_BIN";
2347*61046927SAndroid Build Coastguard Worker else if (c->gs_key && c->gs_key->is_coord)
2348*61046927SAndroid Build Coastguard Worker return "MESA_SHADER_GEOMETRY_BIN";
2349*61046927SAndroid Build Coastguard Worker else
2350*61046927SAndroid Build Coastguard Worker return gl_shader_stage_name(c->s->info.stage);
2351*61046927SAndroid Build Coastguard Worker }
2352*61046927SAndroid Build Coastguard Worker
2353*61046927SAndroid Build Coastguard Worker static inline uint32_t
compute_vpm_size_in_sectors(const struct v3d_device_info * devinfo)2354*61046927SAndroid Build Coastguard Worker compute_vpm_size_in_sectors(const struct v3d_device_info *devinfo)
2355*61046927SAndroid Build Coastguard Worker {
2356*61046927SAndroid Build Coastguard Worker assert(devinfo->vpm_size > 0);
2357*61046927SAndroid Build Coastguard Worker const uint32_t sector_size = V3D_CHANNELS * sizeof(uint32_t) * 8;
2358*61046927SAndroid Build Coastguard Worker return devinfo->vpm_size / sector_size;
2359*61046927SAndroid Build Coastguard Worker }
2360*61046927SAndroid Build Coastguard Worker
2361*61046927SAndroid Build Coastguard Worker /* Computes various parameters affecting VPM memory configuration for programs
2362*61046927SAndroid Build Coastguard Worker * involving geometry shaders to ensure the program fits in memory and honors
2363*61046927SAndroid Build Coastguard Worker * requirements described in section "VPM usage" of the programming manual.
2364*61046927SAndroid Build Coastguard Worker */
2365*61046927SAndroid Build Coastguard Worker static bool
compute_vpm_config_gs(struct v3d_device_info * devinfo,struct v3d_vs_prog_data * vs,struct v3d_gs_prog_data * gs,struct vpm_config * vpm_cfg_out)2366*61046927SAndroid Build Coastguard Worker compute_vpm_config_gs(struct v3d_device_info *devinfo,
2367*61046927SAndroid Build Coastguard Worker struct v3d_vs_prog_data *vs,
2368*61046927SAndroid Build Coastguard Worker struct v3d_gs_prog_data *gs,
2369*61046927SAndroid Build Coastguard Worker struct vpm_config *vpm_cfg_out)
2370*61046927SAndroid Build Coastguard Worker {
2371*61046927SAndroid Build Coastguard Worker const uint32_t A = vs->separate_segments ? 1 : 0;
2372*61046927SAndroid Build Coastguard Worker const uint32_t Ad = vs->vpm_input_size;
2373*61046927SAndroid Build Coastguard Worker const uint32_t Vd = vs->vpm_output_size;
2374*61046927SAndroid Build Coastguard Worker
2375*61046927SAndroid Build Coastguard Worker const uint32_t vpm_size = compute_vpm_size_in_sectors(devinfo);
2376*61046927SAndroid Build Coastguard Worker
2377*61046927SAndroid Build Coastguard Worker /* Try to fit program into our VPM memory budget by adjusting
2378*61046927SAndroid Build Coastguard Worker * configurable parameters iteratively. We do this in two phases:
2379*61046927SAndroid Build Coastguard Worker * the first phase tries to fit the program into the total available
2380*61046927SAndroid Build Coastguard Worker * VPM memory. If we succeed at that, then the second phase attempts
2381*61046927SAndroid Build Coastguard Worker * to fit the program into half of that budget so we can run bin and
2382*61046927SAndroid Build Coastguard Worker * render programs in parallel.
2383*61046927SAndroid Build Coastguard Worker */
2384*61046927SAndroid Build Coastguard Worker struct vpm_config vpm_cfg[2];
2385*61046927SAndroid Build Coastguard Worker struct vpm_config *final_vpm_cfg = NULL;
2386*61046927SAndroid Build Coastguard Worker uint32_t phase = 0;
2387*61046927SAndroid Build Coastguard Worker
2388*61046927SAndroid Build Coastguard Worker vpm_cfg[phase].As = 1;
2389*61046927SAndroid Build Coastguard Worker vpm_cfg[phase].Gs = 1;
2390*61046927SAndroid Build Coastguard Worker vpm_cfg[phase].Gd = gs->vpm_output_size;
2391*61046927SAndroid Build Coastguard Worker vpm_cfg[phase].gs_width = gs->simd_width;
2392*61046927SAndroid Build Coastguard Worker
2393*61046927SAndroid Build Coastguard Worker /* While there is a requirement that Vc >= [Vn / 16], this is
2394*61046927SAndroid Build Coastguard Worker * always the case when tessellation is not present because in that
2395*61046927SAndroid Build Coastguard Worker * case Vn can only be 6 at most (when input primitive is triangles
2396*61046927SAndroid Build Coastguard Worker * with adjacency).
2397*61046927SAndroid Build Coastguard Worker *
2398*61046927SAndroid Build Coastguard Worker * We always choose Vc=2. We can't go lower than this due to GFXH-1744,
2399*61046927SAndroid Build Coastguard Worker * and Broadcom has not found it worth it to increase it beyond this
2400*61046927SAndroid Build Coastguard Worker * in general. Increasing Vc also increases VPM memory pressure which
2401*61046927SAndroid Build Coastguard Worker * can turn up being detrimental for performance in some scenarios.
2402*61046927SAndroid Build Coastguard Worker */
2403*61046927SAndroid Build Coastguard Worker vpm_cfg[phase].Vc = 2;
2404*61046927SAndroid Build Coastguard Worker
2405*61046927SAndroid Build Coastguard Worker /* Gv is a constraint on the hardware to not exceed the
2406*61046927SAndroid Build Coastguard Worker * specified number of vertex segments per GS batch. If adding a
2407*61046927SAndroid Build Coastguard Worker * new primitive to a GS batch would result in a range of more
2408*61046927SAndroid Build Coastguard Worker * than Gv vertex segments being referenced by the batch, then
2409*61046927SAndroid Build Coastguard Worker * the hardware will flush the batch and start a new one. This
2410*61046927SAndroid Build Coastguard Worker * means that we can choose any value we want, we just need to
2411*61046927SAndroid Build Coastguard Worker * be aware that larger values improve GS batch utilization
2412*61046927SAndroid Build Coastguard Worker * at the expense of more VPM memory pressure (which can affect
2413*61046927SAndroid Build Coastguard Worker * other performance aspects, such as GS dispatch width).
2414*61046927SAndroid Build Coastguard Worker * We start with the largest value, and will reduce it if we
2415*61046927SAndroid Build Coastguard Worker * find that total memory pressure is too high.
2416*61046927SAndroid Build Coastguard Worker */
2417*61046927SAndroid Build Coastguard Worker vpm_cfg[phase].Gv = 3;
2418*61046927SAndroid Build Coastguard Worker do {
2419*61046927SAndroid Build Coastguard Worker /* When GS is present in absence of TES, then we need to satisfy
2420*61046927SAndroid Build Coastguard Worker * that Ve >= Gv. We go with the smallest value of Ve to avoid
2421*61046927SAndroid Build Coastguard Worker * increasing memory pressure.
2422*61046927SAndroid Build Coastguard Worker */
2423*61046927SAndroid Build Coastguard Worker vpm_cfg[phase].Ve = vpm_cfg[phase].Gv;
2424*61046927SAndroid Build Coastguard Worker
2425*61046927SAndroid Build Coastguard Worker uint32_t vpm_sectors =
2426*61046927SAndroid Build Coastguard Worker A * vpm_cfg[phase].As * Ad +
2427*61046927SAndroid Build Coastguard Worker (vpm_cfg[phase].Vc + vpm_cfg[phase].Ve) * Vd +
2428*61046927SAndroid Build Coastguard Worker vpm_cfg[phase].Gs * vpm_cfg[phase].Gd;
2429*61046927SAndroid Build Coastguard Worker
2430*61046927SAndroid Build Coastguard Worker /* Ideally we want to use no more than half of the available
2431*61046927SAndroid Build Coastguard Worker * memory so we can execute a bin and render program in parallel
2432*61046927SAndroid Build Coastguard Worker * without stalls. If we achieved that then we are done.
2433*61046927SAndroid Build Coastguard Worker */
2434*61046927SAndroid Build Coastguard Worker if (vpm_sectors <= vpm_size / 2) {
2435*61046927SAndroid Build Coastguard Worker final_vpm_cfg = &vpm_cfg[phase];
2436*61046927SAndroid Build Coastguard Worker break;
2437*61046927SAndroid Build Coastguard Worker }
2438*61046927SAndroid Build Coastguard Worker
2439*61046927SAndroid Build Coastguard Worker /* At the very least, we should not allocate more than the
2440*61046927SAndroid Build Coastguard Worker * total available VPM memory. If we have a configuration that
2441*61046927SAndroid Build Coastguard Worker * succeeds at this we save it and continue to see if we can
2442*61046927SAndroid Build Coastguard Worker * meet the half-memory-use criteria too.
2443*61046927SAndroid Build Coastguard Worker */
2444*61046927SAndroid Build Coastguard Worker if (phase == 0 && vpm_sectors <= vpm_size) {
2445*61046927SAndroid Build Coastguard Worker vpm_cfg[1] = vpm_cfg[0];
2446*61046927SAndroid Build Coastguard Worker phase = 1;
2447*61046927SAndroid Build Coastguard Worker }
2448*61046927SAndroid Build Coastguard Worker
2449*61046927SAndroid Build Coastguard Worker /* Try lowering Gv */
2450*61046927SAndroid Build Coastguard Worker if (vpm_cfg[phase].Gv > 0) {
2451*61046927SAndroid Build Coastguard Worker vpm_cfg[phase].Gv--;
2452*61046927SAndroid Build Coastguard Worker continue;
2453*61046927SAndroid Build Coastguard Worker }
2454*61046927SAndroid Build Coastguard Worker
2455*61046927SAndroid Build Coastguard Worker /* Try lowering GS dispatch width */
2456*61046927SAndroid Build Coastguard Worker if (vpm_cfg[phase].gs_width > 1) {
2457*61046927SAndroid Build Coastguard Worker do {
2458*61046927SAndroid Build Coastguard Worker vpm_cfg[phase].gs_width >>= 1;
2459*61046927SAndroid Build Coastguard Worker vpm_cfg[phase].Gd = align(vpm_cfg[phase].Gd, 2) / 2;
2460*61046927SAndroid Build Coastguard Worker } while (vpm_cfg[phase].gs_width == 2);
2461*61046927SAndroid Build Coastguard Worker
2462*61046927SAndroid Build Coastguard Worker /* Reset Gv to max after dropping dispatch width */
2463*61046927SAndroid Build Coastguard Worker vpm_cfg[phase].Gv = 3;
2464*61046927SAndroid Build Coastguard Worker continue;
2465*61046927SAndroid Build Coastguard Worker }
2466*61046927SAndroid Build Coastguard Worker
2467*61046927SAndroid Build Coastguard Worker /* We ran out of options to reduce memory pressure. If we
2468*61046927SAndroid Build Coastguard Worker * are at phase 1 we have at least a valid configuration, so we
2469*61046927SAndroid Build Coastguard Worker * we use that.
2470*61046927SAndroid Build Coastguard Worker */
2471*61046927SAndroid Build Coastguard Worker if (phase == 1)
2472*61046927SAndroid Build Coastguard Worker final_vpm_cfg = &vpm_cfg[0];
2473*61046927SAndroid Build Coastguard Worker break;
2474*61046927SAndroid Build Coastguard Worker } while (true);
2475*61046927SAndroid Build Coastguard Worker
2476*61046927SAndroid Build Coastguard Worker if (!final_vpm_cfg)
2477*61046927SAndroid Build Coastguard Worker return false;
2478*61046927SAndroid Build Coastguard Worker
2479*61046927SAndroid Build Coastguard Worker assert(final_vpm_cfg);
2480*61046927SAndroid Build Coastguard Worker assert(final_vpm_cfg->Gd <= 16);
2481*61046927SAndroid Build Coastguard Worker assert(final_vpm_cfg->Gv < 4);
2482*61046927SAndroid Build Coastguard Worker assert(final_vpm_cfg->Ve < 4);
2483*61046927SAndroid Build Coastguard Worker assert(final_vpm_cfg->Vc >= 2 && final_vpm_cfg->Vc <= 4);
2484*61046927SAndroid Build Coastguard Worker assert(final_vpm_cfg->gs_width == 1 ||
2485*61046927SAndroid Build Coastguard Worker final_vpm_cfg->gs_width == 4 ||
2486*61046927SAndroid Build Coastguard Worker final_vpm_cfg->gs_width == 8 ||
2487*61046927SAndroid Build Coastguard Worker final_vpm_cfg->gs_width == 16);
2488*61046927SAndroid Build Coastguard Worker
2489*61046927SAndroid Build Coastguard Worker *vpm_cfg_out = *final_vpm_cfg;
2490*61046927SAndroid Build Coastguard Worker return true;
2491*61046927SAndroid Build Coastguard Worker }
2492*61046927SAndroid Build Coastguard Worker
2493*61046927SAndroid Build Coastguard Worker bool
v3d_compute_vpm_config(struct v3d_device_info * devinfo,struct v3d_vs_prog_data * vs_bin,struct v3d_vs_prog_data * vs,struct v3d_gs_prog_data * gs_bin,struct v3d_gs_prog_data * gs,struct vpm_config * vpm_cfg_bin,struct vpm_config * vpm_cfg)2494*61046927SAndroid Build Coastguard Worker v3d_compute_vpm_config(struct v3d_device_info *devinfo,
2495*61046927SAndroid Build Coastguard Worker struct v3d_vs_prog_data *vs_bin,
2496*61046927SAndroid Build Coastguard Worker struct v3d_vs_prog_data *vs,
2497*61046927SAndroid Build Coastguard Worker struct v3d_gs_prog_data *gs_bin,
2498*61046927SAndroid Build Coastguard Worker struct v3d_gs_prog_data *gs,
2499*61046927SAndroid Build Coastguard Worker struct vpm_config *vpm_cfg_bin,
2500*61046927SAndroid Build Coastguard Worker struct vpm_config *vpm_cfg)
2501*61046927SAndroid Build Coastguard Worker {
2502*61046927SAndroid Build Coastguard Worker assert(vs && vs_bin);
2503*61046927SAndroid Build Coastguard Worker assert((gs != NULL) == (gs_bin != NULL));
2504*61046927SAndroid Build Coastguard Worker
2505*61046927SAndroid Build Coastguard Worker if (!gs) {
2506*61046927SAndroid Build Coastguard Worker vpm_cfg_bin->As = 1;
2507*61046927SAndroid Build Coastguard Worker vpm_cfg_bin->Ve = 0;
2508*61046927SAndroid Build Coastguard Worker vpm_cfg_bin->Vc = vs_bin->vcm_cache_size;
2509*61046927SAndroid Build Coastguard Worker
2510*61046927SAndroid Build Coastguard Worker vpm_cfg->As = 1;
2511*61046927SAndroid Build Coastguard Worker vpm_cfg->Ve = 0;
2512*61046927SAndroid Build Coastguard Worker vpm_cfg->Vc = vs->vcm_cache_size;
2513*61046927SAndroid Build Coastguard Worker } else {
2514*61046927SAndroid Build Coastguard Worker if (!compute_vpm_config_gs(devinfo, vs_bin, gs_bin, vpm_cfg_bin))
2515*61046927SAndroid Build Coastguard Worker return false;
2516*61046927SAndroid Build Coastguard Worker
2517*61046927SAndroid Build Coastguard Worker if (!compute_vpm_config_gs(devinfo, vs, gs, vpm_cfg))
2518*61046927SAndroid Build Coastguard Worker return false;
2519*61046927SAndroid Build Coastguard Worker }
2520*61046927SAndroid Build Coastguard Worker
2521*61046927SAndroid Build Coastguard Worker return true;
2522*61046927SAndroid Build Coastguard Worker }
2523