1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file
25 *
26 * Implements a pass that validates various invariants of the IR. The current
27 * pass only validates that GRF's uses are sane. More can be added later.
28 */
29
30 #include "brw_fs.h"
31 #include "brw_cfg.h"
32 #include "brw_eu.h"
33
34 #define fsv_assert(assertion) \
35 { \
36 if (!(assertion)) { \
37 fprintf(stderr, "ASSERT: Scalar %s validation failed!\n", \
38 _mesa_shader_stage_to_abbrev(s.stage)); \
39 brw_print_instruction(s, inst, stderr); \
40 fprintf(stderr, "%s:%d: '%s' failed\n", __FILE__, __LINE__, #assertion); \
41 abort(); \
42 } \
43 }
44
45 #define fsv_assert_eq(A, B) \
46 { \
47 unsigned a = (A); \
48 unsigned b = (B); \
49 if (a != b) { \
50 fprintf(stderr, "ASSERT: Scalar %s validation failed!\n", \
51 _mesa_shader_stage_to_abbrev(s.stage)); \
52 brw_print_instruction(s, inst, stderr); \
53 fprintf(stderr, "%s:%d: A == B failed\n", __FILE__, __LINE__); \
54 fprintf(stderr, " A = %s = %u\n", #A, a); \
55 fprintf(stderr, " B = %s = %u\n", #B, b); \
56 abort(); \
57 } \
58 }
59
60 #define fsv_assert_ne(A, B) \
61 { \
62 unsigned a = (A); \
63 unsigned b = (B); \
64 if (a == b) { \
65 fprintf(stderr, "ASSERT: Scalar %s validation failed!\n", \
66 _mesa_shader_stage_to_abbrev(s.stage)); \
67 brw_print_instruction(s, inst, stderr); \
68 fprintf(stderr, "%s:%d: A != B failed\n", __FILE__, __LINE__); \
69 fprintf(stderr, " A = %s = %u\n", #A, a); \
70 fprintf(stderr, " B = %s = %u\n", #B, b); \
71 abort(); \
72 } \
73 }
74
75 #define fsv_assert_lte(A, B) \
76 { \
77 unsigned a = (A); \
78 unsigned b = (B); \
79 if (a > b) { \
80 fprintf(stderr, "ASSERT: Scalar %s validation failed!\n", \
81 _mesa_shader_stage_to_abbrev(s.stage)); \
82 brw_print_instruction(s, inst, stderr); \
83 fprintf(stderr, "%s:%d: A <= B failed\n", __FILE__, __LINE__); \
84 fprintf(stderr, " A = %s = %u\n", #A, a); \
85 fprintf(stderr, " B = %s = %u\n", #B, b); \
86 abort(); \
87 } \
88 }
89
90 #ifndef NDEBUG
91 static inline bool
is_ud_imm(const brw_reg & reg)92 is_ud_imm(const brw_reg ®)
93 {
94 return reg.file == IMM && reg.type == BRW_TYPE_UD;
95 }
96
97 static void
validate_memory_logical(const fs_visitor & s,const fs_inst * inst)98 validate_memory_logical(const fs_visitor &s, const fs_inst *inst)
99 {
100 const intel_device_info *devinfo = s.devinfo;
101
102 fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_OPCODE]));
103 fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_MODE]));
104 fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_BINDING_TYPE]));
105 fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_COORD_COMPONENTS]));
106 fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_ALIGNMENT]));
107 fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_DATA_SIZE]));
108 fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_COMPONENTS]));
109 fsv_assert(is_ud_imm(inst->src[MEMORY_LOGICAL_FLAGS]));
110
111 enum lsc_data_size data_size =
112 (enum lsc_data_size) inst->src[MEMORY_LOGICAL_DATA_SIZE].ud;
113 unsigned data_size_B = lsc_data_size_bytes(data_size);
114
115 if (!devinfo->has_lsc) {
116 fsv_assert(data_size == LSC_DATA_SIZE_D8U32 ||
117 data_size == LSC_DATA_SIZE_D16U32 ||
118 data_size == LSC_DATA_SIZE_D32 ||
119 data_size == LSC_DATA_SIZE_D64);
120 }
121
122 enum lsc_opcode op = (enum lsc_opcode) inst->src[MEMORY_LOGICAL_OPCODE].ud;
123 enum memory_flags flags = (memory_flags)inst->src[MEMORY_LOGICAL_FLAGS].ud;
124 bool transpose = flags & MEMORY_FLAG_TRANSPOSE;
125 bool include_helpers = flags & MEMORY_FLAG_INCLUDE_HELPERS;
126
127 fsv_assert(!transpose || !include_helpers);
128 fsv_assert(!transpose || lsc_opcode_has_transpose(op));
129
130 if (inst->src[MEMORY_LOGICAL_BINDING_TYPE].ud == LSC_ADDR_SURFTYPE_FLAT)
131 fsv_assert(inst->src[MEMORY_LOGICAL_BINDING].file == BAD_FILE);
132
133 if (inst->src[MEMORY_LOGICAL_DATA1].file != BAD_FILE) {
134 fsv_assert(inst->src[MEMORY_LOGICAL_COMPONENTS].ud ==
135 inst->components_read(MEMORY_LOGICAL_DATA1));
136
137 fsv_assert(inst->src[MEMORY_LOGICAL_DATA0].type ==
138 inst->src[MEMORY_LOGICAL_DATA1].type);
139 }
140
141 if (inst->src[MEMORY_LOGICAL_DATA0].file != BAD_FILE) {
142 fsv_assert(inst->src[MEMORY_LOGICAL_COMPONENTS].ud ==
143 inst->components_read(MEMORY_LOGICAL_DATA0));
144
145 fsv_assert(brw_type_size_bytes(inst->src[MEMORY_LOGICAL_DATA0].type) ==
146 data_size_B);
147 }
148
149 if (inst->dst.file != BAD_FILE)
150 fsv_assert(brw_type_size_bytes(inst->dst.type) == data_size_B);
151
152 switch (inst->opcode) {
153 case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
154 fsv_assert(op == LSC_OP_LOAD || op == LSC_OP_LOAD_CMASK);
155 fsv_assert(inst->src[MEMORY_LOGICAL_DATA0].file == BAD_FILE);
156 fsv_assert(inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE);
157 break;
158 case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
159 fsv_assert(lsc_opcode_is_store(op));
160 fsv_assert(inst->src[MEMORY_LOGICAL_DATA0].file != BAD_FILE);
161 fsv_assert(inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE);
162 break;
163 case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
164 fsv_assert(lsc_opcode_is_atomic(op));
165 fsv_assert((inst->src[MEMORY_LOGICAL_DATA0].file == BAD_FILE)
166 == (lsc_op_num_data_values(op) < 1));
167 fsv_assert((inst->src[MEMORY_LOGICAL_DATA1].file == BAD_FILE)
168 == (lsc_op_num_data_values(op) < 2));
169 fsv_assert(inst->src[MEMORY_LOGICAL_COMPONENTS].ud == 1);
170 fsv_assert(!include_helpers);
171 break;
172 default:
173 unreachable("invalid opcode");
174 }
175 }
176
177 void
brw_fs_validate(const fs_visitor & s)178 brw_fs_validate(const fs_visitor &s)
179 {
180 const intel_device_info *devinfo = s.devinfo;
181
182 s.cfg->validate(_mesa_shader_stage_to_abbrev(s.stage));
183
184 foreach_block_and_inst (block, fs_inst, inst, s.cfg) {
185 switch (inst->opcode) {
186 case SHADER_OPCODE_SEND:
187 fsv_assert(is_uniform(inst->src[0]) && is_uniform(inst->src[1]));
188 break;
189
190 case BRW_OPCODE_MOV:
191 fsv_assert(inst->sources == 1);
192 break;
193
194 case SHADER_OPCODE_MEMORY_LOAD_LOGICAL:
195 case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
196 case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
197 validate_memory_logical(s, inst);
198 break;
199
200 default:
201 break;
202 }
203
204 /* On Xe2, the "write the accumulator in addition to the explicit
205 * destination" bit no longer exists. Try to catch uses of this feature
206 * earlier in the process.
207 */
208 if (devinfo->ver >= 20 && inst->writes_accumulator) {
209 fsv_assert(inst->dst.is_accumulator() ||
210 inst->opcode == BRW_OPCODE_ADDC ||
211 inst->opcode == BRW_OPCODE_MACH ||
212 inst->opcode == BRW_OPCODE_SUBB);
213 }
214
215 if (inst->is_3src(s.compiler)) {
216 const unsigned integer_sources =
217 brw_type_is_int(inst->src[0].type) +
218 brw_type_is_int(inst->src[1].type) +
219 brw_type_is_int(inst->src[2].type);
220 const unsigned float_sources =
221 brw_type_is_float(inst->src[0].type) +
222 brw_type_is_float(inst->src[1].type) +
223 brw_type_is_float(inst->src[2].type);
224
225 fsv_assert((integer_sources == 3 && float_sources == 0) ||
226 (integer_sources == 0 && float_sources == 3));
227
228 if (devinfo->ver >= 10) {
229 for (unsigned i = 0; i < 3; i++) {
230 if (inst->src[i].file == IMM)
231 continue;
232
233 switch (inst->src[i].vstride) {
234 case BRW_VERTICAL_STRIDE_0:
235 case BRW_VERTICAL_STRIDE_4:
236 case BRW_VERTICAL_STRIDE_8:
237 case BRW_VERTICAL_STRIDE_16:
238 break;
239
240 case BRW_VERTICAL_STRIDE_1:
241 fsv_assert_lte(12, devinfo->ver);
242 break;
243
244 case BRW_VERTICAL_STRIDE_2:
245 fsv_assert_lte(devinfo->ver, 11);
246 break;
247
248 default:
249 fsv_assert(!"invalid vstride");
250 break;
251 }
252 }
253 } else if (s.grf_used != 0) {
254 /* Only perform the pre-Gfx10 checks after register allocation has
255 * occured.
256 *
257 * Many passes (e.g., constant copy propagation) will genenerate
258 * invalid 3-source instructions with the expectation that later
259 * passes (e.g., combine constants) will fix them.
260 */
261 for (unsigned i = 0; i < 3; i++) {
262 fsv_assert_ne(inst->src[i].file, IMM);
263
264 /* A stride of 1 (the usual case) or 0, with a special
265 * "repctrl" bit, is allowed. The repctrl bit doesn't work for
266 * 64-bit datatypes, so if the source type is 64-bit then only
267 * a stride of 1 is allowed. From the Broadwell PRM, Volume 7
268 * "3D Media GPGPU", page 944:
269 *
270 * This is applicable to 32b datatypes and 16b datatype. 64b
271 * datatypes cannot use the replicate control.
272 */
273 fsv_assert_lte(inst->src[i].vstride, 1);
274
275 if (brw_type_size_bytes(inst->src[i].type) > 4)
276 fsv_assert_eq(inst->src[i].vstride, 1);
277 }
278 }
279 }
280
281 if (inst->dst.file == VGRF) {
282 fsv_assert_lte(inst->dst.offset / REG_SIZE + regs_written(inst),
283 s.alloc.sizes[inst->dst.nr]);
284 }
285
286 for (unsigned i = 0; i < inst->sources; i++) {
287 if (inst->src[i].file == VGRF) {
288 fsv_assert_lte(inst->src[i].offset / REG_SIZE + regs_read(inst, i),
289 s.alloc.sizes[inst->src[i].nr]);
290 }
291 }
292
293 /* Accumulator Registers, bspec 47251:
294 *
295 * "When destination is accumulator with offset 0, destination
296 * horizontal stride must be 1."
297 */
298 if (intel_needs_workaround(devinfo, 14014617373) &&
299 inst->dst.is_accumulator() &&
300 phys_subnr(devinfo, inst->dst) == 0) {
301 fsv_assert_eq(inst->dst.hstride, 1);
302 }
303
304 if (inst->is_math() && intel_needs_workaround(devinfo, 22016140776)) {
305 /* Wa_22016140776:
306 *
307 * Scalar broadcast on HF math (packed or unpacked) must not be
308 * used. Compiler must use a mov instruction to expand the scalar
309 * value to a vector before using in a HF (packed or unpacked)
310 * math operation.
311 *
312 * Since copy propagation knows about this restriction, nothing
313 * should be able to generate these invalid source strides. Detect
314 * potential problems sooner rather than later.
315 */
316 for (unsigned i = 0; i < inst->sources; i++) {
317 fsv_assert(!is_uniform(inst->src[i]) ||
318 inst->src[i].type != BRW_TYPE_HF);
319 }
320 }
321 }
322 }
323 #endif
324