1 /* 2 * Copyright © 2014 Connor Abbott 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include "nir_instr_set.h" 25 #include "util/half_float.h" 26 #include "nir_vla.h" 27 28 /* This function determines if uses of an instruction can safely be rewritten 29 * to use another identical instruction instead. Note that this function must 30 * be kept in sync with hash_instr() and nir_instrs_equal() -- only 31 * instructions that pass this test will be handed on to those functions, and 32 * conversely they must handle everything that this function returns true for. 33 */ 34 static bool instr_can_rewrite(const nir_instr * instr)35 instr_can_rewrite(const nir_instr *instr) 36 { 37 switch (instr->type) { 38 case nir_instr_type_alu: 39 case nir_instr_type_deref: 40 case nir_instr_type_tex: 41 case nir_instr_type_load_const: 42 case nir_instr_type_phi: 43 return true; 44 case nir_instr_type_intrinsic: { 45 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 46 switch (intr->intrinsic) { 47 case nir_intrinsic_ddx: 48 case nir_intrinsic_ddx_fine: 49 case nir_intrinsic_ddx_coarse: 50 case nir_intrinsic_ddy: 51 case nir_intrinsic_ddy_fine: 52 case nir_intrinsic_ddy_coarse: 53 /* Derivatives are not CAN_REORDER, because we cannot move derivatives 54 * across terminates if that would lose helper invocations. However, 55 * they can be CSE'd as a special case - if it is legal to execute a 56 * derivative at instruction A, then it is also legal to execute the 57 * derivative from instruction B. So we can hoist up the derivatives as 58 * CSE is inclined to without a problem. 59 */ 60 return true; 61 default: 62 return nir_intrinsic_can_reorder(intr); 63 } 64 } 65 case nir_instr_type_debug_info: 66 return nir_instr_as_debug_info(instr)->type == nir_debug_info_string; 67 case nir_instr_type_call: 68 case nir_instr_type_jump: 69 case nir_instr_type_undef: 70 return false; 71 case nir_instr_type_parallel_copy: 72 default: 73 unreachable("Invalid instruction type"); 74 } 75 76 return false; 77 } 78 79 #define HASH(hash, data) XXH32(&(data), sizeof(data), hash) 80 81 static uint32_t hash_src(uint32_t hash,const nir_src * src)82 hash_src(uint32_t hash, const nir_src *src) 83 { 84 hash = HASH(hash, src->ssa); 85 return hash; 86 } 87 88 static uint32_t hash_alu_src(uint32_t hash,const nir_alu_src * src,unsigned num_components)89 hash_alu_src(uint32_t hash, const nir_alu_src *src, unsigned num_components) 90 { 91 for (unsigned i = 0; i < num_components; i++) 92 hash = HASH(hash, src->swizzle[i]); 93 94 hash = hash_src(hash, &src->src); 95 return hash; 96 } 97 98 static uint32_t hash_alu(uint32_t hash,const nir_alu_instr * instr)99 hash_alu(uint32_t hash, const nir_alu_instr *instr) 100 { 101 /* We explicitly don't hash instr->exact. */ 102 uint8_t flags = instr->no_signed_wrap | 103 instr->no_unsigned_wrap << 1; 104 uint8_t v[8]; 105 v[0] = flags; 106 v[1] = instr->def.num_components; 107 v[2] = instr->def.bit_size; 108 v[3] = 0; 109 uint32_t op = instr->op; 110 memcpy(v + 4, &op, sizeof(op)); 111 hash = XXH32(v, sizeof(v), hash); 112 113 if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_2SRC_COMMUTATIVE) { 114 assert(nir_op_infos[instr->op].num_inputs >= 2); 115 116 uint32_t hash0 = hash_alu_src(hash, &instr->src[0], 117 nir_ssa_alu_instr_src_components(instr, 0)); 118 uint32_t hash1 = hash_alu_src(hash, &instr->src[1], 119 nir_ssa_alu_instr_src_components(instr, 1)); 120 /* For commutative operations, we need some commutative way of 121 * combining the hashes. One option would be to XOR them but that 122 * means that anything with two identical sources will hash to 0 and 123 * that's common enough we probably don't want the guaranteed 124 * collision. Either addition or multiplication will also work. 125 */ 126 hash = hash0 * hash1; 127 128 for (unsigned i = 2; i < nir_op_infos[instr->op].num_inputs; i++) { 129 hash = hash_alu_src(hash, &instr->src[i], 130 nir_ssa_alu_instr_src_components(instr, i)); 131 } 132 } else { 133 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { 134 hash = hash_alu_src(hash, &instr->src[i], 135 nir_ssa_alu_instr_src_components(instr, i)); 136 } 137 } 138 139 return hash; 140 } 141 142 static uint32_t hash_deref(uint32_t hash,const nir_deref_instr * instr)143 hash_deref(uint32_t hash, const nir_deref_instr *instr) 144 { 145 uint32_t v[4]; 146 v[0] = instr->deref_type; 147 v[1] = instr->modes; 148 uint64_t type = (uintptr_t)instr->type; 149 memcpy(v + 2, &type, sizeof(type)); 150 hash = XXH32(v, sizeof(v), hash); 151 152 if (instr->deref_type == nir_deref_type_var) 153 return HASH(hash, instr->var); 154 155 hash = hash_src(hash, &instr->parent); 156 157 switch (instr->deref_type) { 158 case nir_deref_type_struct: 159 hash = HASH(hash, instr->strct.index); 160 break; 161 162 case nir_deref_type_array: 163 case nir_deref_type_ptr_as_array: 164 hash = hash_src(hash, &instr->arr.index); 165 hash = HASH(hash, instr->arr.in_bounds); 166 break; 167 168 case nir_deref_type_cast: 169 hash = HASH(hash, instr->cast.ptr_stride); 170 hash = HASH(hash, instr->cast.align_mul); 171 hash = HASH(hash, instr->cast.align_offset); 172 break; 173 174 case nir_deref_type_var: 175 case nir_deref_type_array_wildcard: 176 /* Nothing to do */ 177 break; 178 179 default: 180 unreachable("Invalid instruction deref type"); 181 } 182 183 return hash; 184 } 185 186 static uint32_t hash_load_const(uint32_t hash,const nir_load_const_instr * instr)187 hash_load_const(uint32_t hash, const nir_load_const_instr *instr) 188 { 189 hash = HASH(hash, instr->def.num_components); 190 191 if (instr->def.bit_size == 1) { 192 for (unsigned i = 0; i < instr->def.num_components; i++) { 193 uint8_t b = instr->value[i].b; 194 hash = HASH(hash, b); 195 } 196 } else { 197 unsigned size = instr->def.num_components * sizeof(*instr->value); 198 hash = XXH32(instr->value, size, hash); 199 } 200 201 return hash; 202 } 203 204 static int cmp_phi_src(const void * data1,const void * data2)205 cmp_phi_src(const void *data1, const void *data2) 206 { 207 nir_phi_src *src1 = *(nir_phi_src **)data1; 208 nir_phi_src *src2 = *(nir_phi_src **)data2; 209 return src1->pred > src2->pred ? 1 : (src1->pred == src2->pred ? 0 : -1); 210 } 211 212 static uint32_t hash_phi(uint32_t hash,const nir_phi_instr * instr)213 hash_phi(uint32_t hash, const nir_phi_instr *instr) 214 { 215 hash = HASH(hash, instr->instr.block); 216 217 /* Similar to hash_alu(), combine the hashes commutatively. */ 218 nir_foreach_phi_src(src, instr) 219 hash *= HASH(hash_src(0, &src->src), src->pred); 220 221 return hash; 222 } 223 224 static uint32_t hash_intrinsic(uint32_t hash,const nir_intrinsic_instr * instr)225 hash_intrinsic(uint32_t hash, const nir_intrinsic_instr *instr) 226 { 227 const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; 228 hash = HASH(hash, instr->intrinsic); 229 230 if (info->has_dest) { 231 uint8_t v[4] = { instr->def.num_components, instr->def.bit_size, 0, 0 }; 232 hash = XXH32(v, sizeof(v), hash); 233 } 234 235 hash = XXH32(instr->const_index, info->num_indices * sizeof(instr->const_index[0]), hash); 236 237 for (unsigned i = 0; i < nir_intrinsic_infos[instr->intrinsic].num_srcs; i++) 238 hash = hash_src(hash, &instr->src[i]); 239 240 return hash; 241 } 242 243 static uint32_t hash_tex(uint32_t hash,const nir_tex_instr * instr)244 hash_tex(uint32_t hash, const nir_tex_instr *instr) 245 { 246 uint8_t v[24]; 247 v[0] = instr->op; 248 v[1] = instr->num_srcs; 249 v[2] = instr->coord_components | (instr->sampler_dim << 4); 250 uint8_t flags = instr->is_array | (instr->is_shadow << 1) | (instr->is_new_style_shadow << 2) | 251 (instr->is_sparse << 3) | (instr->component << 4) | (instr->texture_non_uniform << 6) | 252 (instr->sampler_non_uniform << 7); 253 v[3] = flags; 254 STATIC_ASSERT(sizeof(instr->tg4_offsets) == 8); 255 memcpy(v + 4, instr->tg4_offsets, 8); 256 uint32_t texture_index = instr->texture_index; 257 uint32_t sampler_index = instr->sampler_index; 258 uint32_t backend_flags = instr->backend_flags; 259 memcpy(v + 12, &texture_index, 4); 260 memcpy(v + 16, &sampler_index, 4); 261 memcpy(v + 20, &backend_flags, 4); 262 hash = XXH32(v, sizeof(v), hash); 263 264 for (unsigned i = 0; i < instr->num_srcs; i++) 265 hash *= hash_src(0, &instr->src[i].src); 266 267 return hash; 268 } 269 270 static uint32_t hash_debug_info(uint32_t hash,const nir_debug_info_instr * instr)271 hash_debug_info(uint32_t hash, const nir_debug_info_instr *instr) 272 { 273 assert(instr->type == nir_debug_info_string); 274 return XXH32(instr->string, instr->string_length, hash); 275 } 276 277 /* Computes a hash of an instruction for use in a hash table. Note that this 278 * will only work for instructions where instr_can_rewrite() returns true, and 279 * it should return identical hashes for two instructions that are the same 280 * according nir_instrs_equal(). 281 */ 282 283 static uint32_t hash_instr(const void * data)284 hash_instr(const void *data) 285 { 286 const nir_instr *instr = data; 287 uint32_t hash = 0; 288 289 switch (instr->type) { 290 case nir_instr_type_alu: 291 hash = hash_alu(hash, nir_instr_as_alu(instr)); 292 break; 293 case nir_instr_type_deref: 294 hash = hash_deref(hash, nir_instr_as_deref(instr)); 295 break; 296 case nir_instr_type_load_const: 297 hash = hash_load_const(hash, nir_instr_as_load_const(instr)); 298 break; 299 case nir_instr_type_phi: 300 hash = hash_phi(hash, nir_instr_as_phi(instr)); 301 break; 302 case nir_instr_type_intrinsic: 303 hash = hash_intrinsic(hash, nir_instr_as_intrinsic(instr)); 304 break; 305 case nir_instr_type_tex: 306 hash = hash_tex(hash, nir_instr_as_tex(instr)); 307 break; 308 case nir_instr_type_debug_info: 309 hash = hash_debug_info(hash, nir_instr_as_debug_info(instr)); 310 break; 311 default: 312 unreachable("Invalid instruction type"); 313 } 314 315 return hash; 316 } 317 318 bool nir_srcs_equal(nir_src src1,nir_src src2)319 nir_srcs_equal(nir_src src1, nir_src src2) 320 { 321 return src1.ssa == src2.ssa; 322 } 323 324 /** 325 * If the \p s is an SSA value that was generated by a negation instruction, 326 * that instruction is returned as a \c nir_alu_instr. Otherwise \c NULL is 327 * returned. 328 */ 329 static nir_alu_instr * get_neg_instr(nir_src s)330 get_neg_instr(nir_src s) 331 { 332 nir_alu_instr *alu = nir_src_as_alu_instr(s); 333 334 return alu != NULL && (alu->op == nir_op_fneg || alu->op == nir_op_ineg) 335 ? alu 336 : NULL; 337 } 338 339 bool nir_const_value_negative_equal(nir_const_value c1,nir_const_value c2,nir_alu_type full_type)340 nir_const_value_negative_equal(nir_const_value c1, 341 nir_const_value c2, 342 nir_alu_type full_type) 343 { 344 assert(nir_alu_type_get_base_type(full_type) != nir_type_invalid); 345 assert(nir_alu_type_get_type_size(full_type) != 0); 346 347 switch (full_type) { 348 case nir_type_float16: 349 return _mesa_half_to_float(c1.u16) == -_mesa_half_to_float(c2.u16); 350 351 case nir_type_float32: 352 return c1.f32 == -c2.f32; 353 354 case nir_type_float64: 355 return c1.f64 == -c2.f64; 356 357 case nir_type_int8: 358 case nir_type_uint8: 359 return c1.i8 == -c2.i8; 360 361 case nir_type_int16: 362 case nir_type_uint16: 363 return c1.i16 == -c2.i16; 364 365 case nir_type_int32: 366 case nir_type_uint32: 367 return c1.i32 == -c2.i32; 368 369 case nir_type_int64: 370 case nir_type_uint64: 371 return c1.i64 == -c2.i64; 372 373 default: 374 break; 375 } 376 377 return false; 378 } 379 380 /** 381 * Shallow compare of ALU srcs to determine if one is the negation of the other 382 * 383 * This function detects cases where \p alu1 is a constant and \p alu2 is a 384 * constant that is its negation. It will also detect cases where \p alu2 is 385 * an SSA value that is a \c nir_op_fneg applied to \p alu1 (and vice versa). 386 * 387 * This function does not detect the general case when \p alu1 and \p alu2 are 388 * SSA values that are the negations of each other (e.g., \p alu1 represents 389 * (a * b) and \p alu2 represents (-a * b)). 390 * 391 * \warning 392 * It is the responsibility of the caller to ensure that the component counts, 393 * write masks, and base types of the sources being compared are compatible. 394 */ 395 bool nir_alu_srcs_negative_equal(const nir_alu_instr * alu1,const nir_alu_instr * alu2,unsigned src1,unsigned src2)396 nir_alu_srcs_negative_equal(const nir_alu_instr *alu1, 397 const nir_alu_instr *alu2, 398 unsigned src1, unsigned src2) 399 { 400 #ifndef NDEBUG 401 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) { 402 assert(nir_alu_instr_channel_used(alu1, src1, i) == 403 nir_alu_instr_channel_used(alu2, src2, i)); 404 } 405 406 if (nir_alu_type_get_base_type(nir_op_infos[alu1->op].input_types[src1]) == nir_type_float) { 407 assert(nir_op_infos[alu1->op].input_types[src1] == 408 nir_op_infos[alu2->op].input_types[src2]); 409 } else { 410 assert(nir_op_infos[alu1->op].input_types[src1] == nir_type_int); 411 assert(nir_op_infos[alu2->op].input_types[src2] == nir_type_int); 412 } 413 #endif 414 415 /* Handling load_const instructions is tricky. */ 416 417 const nir_const_value *const const1 = 418 nir_src_as_const_value(alu1->src[src1].src); 419 420 if (const1 != NULL) { 421 const nir_const_value *const const2 = 422 nir_src_as_const_value(alu2->src[src2].src); 423 424 if (const2 == NULL) 425 return false; 426 427 if (nir_src_bit_size(alu1->src[src1].src) != 428 nir_src_bit_size(alu2->src[src2].src)) 429 return false; 430 431 const nir_alu_type full_type = nir_op_infos[alu1->op].input_types[src1] | 432 nir_src_bit_size(alu1->src[src1].src); 433 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) { 434 if (nir_alu_instr_channel_used(alu1, src1, i) && 435 !nir_const_value_negative_equal(const1[alu1->src[src1].swizzle[i]], 436 const2[alu2->src[src2].swizzle[i]], 437 full_type)) 438 return false; 439 } 440 441 return true; 442 } 443 444 uint8_t alu1_swizzle[NIR_MAX_VEC_COMPONENTS] = { 0 }; 445 nir_src alu1_actual_src; 446 nir_alu_instr *neg1 = get_neg_instr(alu1->src[src1].src); 447 bool parity = false; 448 449 if (neg1) { 450 parity = !parity; 451 alu1_actual_src = neg1->src[0].src; 452 453 for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(neg1, 0); i++) 454 alu1_swizzle[i] = neg1->src[0].swizzle[i]; 455 } else { 456 alu1_actual_src = alu1->src[src1].src; 457 458 for (unsigned i = 0; i < nir_src_num_components(alu1_actual_src); i++) 459 alu1_swizzle[i] = i; 460 } 461 462 uint8_t alu2_swizzle[NIR_MAX_VEC_COMPONENTS] = { 0 }; 463 nir_src alu2_actual_src; 464 nir_alu_instr *neg2 = get_neg_instr(alu2->src[src2].src); 465 466 if (neg2) { 467 parity = !parity; 468 alu2_actual_src = neg2->src[0].src; 469 470 for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(neg2, 0); i++) 471 alu2_swizzle[i] = neg2->src[0].swizzle[i]; 472 } else { 473 alu2_actual_src = alu2->src[src2].src; 474 475 for (unsigned i = 0; i < nir_src_num_components(alu2_actual_src); i++) 476 alu2_swizzle[i] = i; 477 } 478 479 /* Bail early if sources are not equal or we don't have parity. */ 480 if (!parity || !nir_srcs_equal(alu1_actual_src, alu2_actual_src)) 481 return false; 482 483 for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) { 484 if (alu1_swizzle[alu1->src[src1].swizzle[i]] != 485 alu2_swizzle[alu2->src[src2].swizzle[i]]) 486 return false; 487 } 488 489 return true; 490 } 491 492 bool nir_alu_srcs_equal(const nir_alu_instr * alu1,const nir_alu_instr * alu2,unsigned src1,unsigned src2)493 nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, 494 unsigned src1, unsigned src2) 495 { 496 for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) { 497 if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i]) 498 return false; 499 } 500 501 return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src); 502 } 503 504 /* Returns "true" if two instructions are equal. Note that this will only 505 * work for the subset of instructions defined by instr_can_rewrite(). Also, 506 * it should only return "true" for instructions that hash_instr() will return 507 * the same hash for (ignoring collisions, of course). 508 */ 509 510 bool nir_instrs_equal(const nir_instr * instr1,const nir_instr * instr2)511 nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) 512 { 513 assert(instr_can_rewrite(instr1) && instr_can_rewrite(instr2)); 514 515 if (instr1->type != instr2->type) 516 return false; 517 518 switch (instr1->type) { 519 case nir_instr_type_alu: { 520 nir_alu_instr *alu1 = nir_instr_as_alu(instr1); 521 nir_alu_instr *alu2 = nir_instr_as_alu(instr2); 522 523 if (alu1->op != alu2->op) 524 return false; 525 526 /* We explicitly don't compare instr->exact. */ 527 528 if (alu1->no_signed_wrap != alu2->no_signed_wrap) 529 return false; 530 531 if (alu1->no_unsigned_wrap != alu2->no_unsigned_wrap) 532 return false; 533 534 /* TODO: We can probably acutally do something more inteligent such 535 * as allowing different numbers and taking a maximum or something 536 * here */ 537 if (alu1->def.num_components != alu2->def.num_components) 538 return false; 539 540 if (alu1->def.bit_size != alu2->def.bit_size) 541 return false; 542 543 if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_2SRC_COMMUTATIVE) { 544 if ((!nir_alu_srcs_equal(alu1, alu2, 0, 0) || 545 !nir_alu_srcs_equal(alu1, alu2, 1, 1)) && 546 (!nir_alu_srcs_equal(alu1, alu2, 0, 1) || 547 !nir_alu_srcs_equal(alu1, alu2, 1, 0))) 548 return false; 549 550 for (unsigned i = 2; i < nir_op_infos[alu1->op].num_inputs; i++) { 551 if (!nir_alu_srcs_equal(alu1, alu2, i, i)) 552 return false; 553 } 554 } else { 555 for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) { 556 if (!nir_alu_srcs_equal(alu1, alu2, i, i)) 557 return false; 558 } 559 } 560 return true; 561 } 562 case nir_instr_type_deref: { 563 nir_deref_instr *deref1 = nir_instr_as_deref(instr1); 564 nir_deref_instr *deref2 = nir_instr_as_deref(instr2); 565 566 if (deref1->deref_type != deref2->deref_type || 567 deref1->modes != deref2->modes || 568 deref1->type != deref2->type) 569 return false; 570 571 if (deref1->deref_type == nir_deref_type_var) 572 return deref1->var == deref2->var; 573 574 if (!nir_srcs_equal(deref1->parent, deref2->parent)) 575 return false; 576 577 switch (deref1->deref_type) { 578 case nir_deref_type_struct: 579 if (deref1->strct.index != deref2->strct.index) 580 return false; 581 break; 582 583 case nir_deref_type_array: 584 case nir_deref_type_ptr_as_array: 585 if (!nir_srcs_equal(deref1->arr.index, deref2->arr.index)) 586 return false; 587 if (deref1->arr.in_bounds != deref2->arr.in_bounds) 588 return false; 589 break; 590 591 case nir_deref_type_cast: 592 if (deref1->cast.ptr_stride != deref2->cast.ptr_stride || 593 deref1->cast.align_mul != deref2->cast.align_mul || 594 deref1->cast.align_offset != deref2->cast.align_offset) 595 return false; 596 break; 597 598 case nir_deref_type_var: 599 case nir_deref_type_array_wildcard: 600 /* Nothing to do */ 601 break; 602 603 default: 604 unreachable("Invalid instruction deref type"); 605 } 606 return true; 607 } 608 case nir_instr_type_tex: { 609 nir_tex_instr *tex1 = nir_instr_as_tex(instr1); 610 nir_tex_instr *tex2 = nir_instr_as_tex(instr2); 611 612 if (tex1->op != tex2->op) 613 return false; 614 615 if (tex1->num_srcs != tex2->num_srcs) 616 return false; 617 for (unsigned i = 0; i < tex1->num_srcs; i++) { 618 if (tex1->src[i].src_type != tex2->src[i].src_type || 619 !nir_srcs_equal(tex1->src[i].src, tex2->src[i].src)) { 620 return false; 621 } 622 } 623 624 if (tex1->coord_components != tex2->coord_components || 625 tex1->sampler_dim != tex2->sampler_dim || 626 tex1->is_array != tex2->is_array || 627 tex1->is_shadow != tex2->is_shadow || 628 tex1->is_new_style_shadow != tex2->is_new_style_shadow || 629 tex1->component != tex2->component || 630 tex1->texture_index != tex2->texture_index || 631 tex1->sampler_index != tex2->sampler_index || 632 tex1->backend_flags != tex2->backend_flags) { 633 return false; 634 } 635 636 if (memcmp(tex1->tg4_offsets, tex2->tg4_offsets, 637 sizeof(tex1->tg4_offsets))) 638 return false; 639 640 return true; 641 } 642 case nir_instr_type_load_const: { 643 nir_load_const_instr *load1 = nir_instr_as_load_const(instr1); 644 nir_load_const_instr *load2 = nir_instr_as_load_const(instr2); 645 646 if (load1->def.num_components != load2->def.num_components) 647 return false; 648 649 if (load1->def.bit_size != load2->def.bit_size) 650 return false; 651 652 if (load1->def.bit_size == 1) { 653 for (unsigned i = 0; i < load1->def.num_components; ++i) { 654 if (load1->value[i].b != load2->value[i].b) 655 return false; 656 } 657 } else { 658 unsigned size = load1->def.num_components * sizeof(*load1->value); 659 if (memcmp(load1->value, load2->value, size) != 0) 660 return false; 661 } 662 return true; 663 } 664 case nir_instr_type_phi: { 665 nir_phi_instr *phi1 = nir_instr_as_phi(instr1); 666 nir_phi_instr *phi2 = nir_instr_as_phi(instr2); 667 668 if (phi1->instr.block != phi2->instr.block) 669 return false; 670 671 /* In case of phis with no sources, the dest needs to be checked 672 * to ensure that phis with incompatible dests won't get merged 673 * during CSE. */ 674 if (phi1->def.num_components != phi2->def.num_components) 675 return false; 676 if (phi1->def.bit_size != phi2->def.bit_size) 677 return false; 678 679 nir_foreach_phi_src(src1, phi1) { 680 nir_foreach_phi_src(src2, phi2) { 681 if (src1->pred == src2->pred) { 682 if (!nir_srcs_equal(src1->src, src2->src)) 683 return false; 684 685 break; 686 } 687 } 688 } 689 690 return true; 691 } 692 case nir_instr_type_intrinsic: { 693 nir_intrinsic_instr *intrinsic1 = nir_instr_as_intrinsic(instr1); 694 nir_intrinsic_instr *intrinsic2 = nir_instr_as_intrinsic(instr2); 695 const nir_intrinsic_info *info = 696 &nir_intrinsic_infos[intrinsic1->intrinsic]; 697 698 if (intrinsic1->intrinsic != intrinsic2->intrinsic || 699 intrinsic1->num_components != intrinsic2->num_components) 700 return false; 701 702 if (info->has_dest && intrinsic1->def.num_components != 703 intrinsic2->def.num_components) 704 return false; 705 706 if (info->has_dest && intrinsic1->def.bit_size != 707 intrinsic2->def.bit_size) 708 return false; 709 710 for (unsigned i = 0; i < info->num_srcs; i++) { 711 if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i])) 712 return false; 713 } 714 715 for (unsigned i = 0; i < info->num_indices; i++) { 716 if (intrinsic1->const_index[i] != intrinsic2->const_index[i]) 717 return false; 718 } 719 720 return true; 721 } 722 case nir_instr_type_debug_info: { 723 nir_debug_info_instr *di1 = nir_instr_as_debug_info(instr1); 724 nir_debug_info_instr *di2 = nir_instr_as_debug_info(instr2); 725 726 assert(di1->type == nir_debug_info_string); 727 assert(di2->type == nir_debug_info_string); 728 729 return di1->string_length == di2->string_length && 730 !memcmp(di1->string, di2->string, di1->string_length); 731 } 732 case nir_instr_type_call: 733 case nir_instr_type_jump: 734 case nir_instr_type_undef: 735 case nir_instr_type_parallel_copy: 736 default: 737 unreachable("Invalid instruction type"); 738 } 739 740 unreachable("All cases in the above switch should return"); 741 } 742 743 static nir_def * nir_instr_get_def_def(nir_instr * instr)744 nir_instr_get_def_def(nir_instr *instr) 745 { 746 switch (instr->type) { 747 case nir_instr_type_alu: 748 return &nir_instr_as_alu(instr)->def; 749 case nir_instr_type_deref: 750 return &nir_instr_as_deref(instr)->def; 751 case nir_instr_type_load_const: 752 return &nir_instr_as_load_const(instr)->def; 753 case nir_instr_type_phi: 754 return &nir_instr_as_phi(instr)->def; 755 case nir_instr_type_intrinsic: 756 return &nir_instr_as_intrinsic(instr)->def; 757 case nir_instr_type_tex: 758 return &nir_instr_as_tex(instr)->def; 759 case nir_instr_type_debug_info: 760 return &nir_instr_as_debug_info(instr)->def; 761 default: 762 unreachable("We never ask for any of these"); 763 } 764 } 765 766 static bool cmp_func(const void * data1,const void * data2)767 cmp_func(const void *data1, const void *data2) 768 { 769 return nir_instrs_equal(data1, data2); 770 } 771 772 struct set * nir_instr_set_create(void * mem_ctx)773 nir_instr_set_create(void *mem_ctx) 774 { 775 return _mesa_set_create(mem_ctx, hash_instr, cmp_func); 776 } 777 778 void nir_instr_set_destroy(struct set * instr_set)779 nir_instr_set_destroy(struct set *instr_set) 780 { 781 _mesa_set_destroy(instr_set, NULL); 782 } 783 784 nir_instr * nir_instr_set_add_or_rewrite(struct set * instr_set,nir_instr * instr,bool (* cond_function)(const nir_instr * a,const nir_instr * b))785 nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr, 786 bool (*cond_function)(const nir_instr *a, 787 const nir_instr *b)) 788 { 789 if (!instr_can_rewrite(instr)) 790 return NULL; 791 792 struct set_entry *e = _mesa_set_search_or_add(instr_set, instr, NULL); 793 nir_instr *match = (nir_instr *)e->key; 794 if (match == instr) 795 return NULL; 796 797 if (!cond_function || cond_function(match, instr)) { 798 /* rewrite instruction if condition is matched */ 799 nir_def *def = nir_instr_get_def_def(instr); 800 nir_def *new_def = nir_instr_get_def_def(match); 801 802 /* It's safe to replace an exact instruction with an inexact one as 803 * long as we make it exact. If we got here, the two instructions are 804 * exactly identical in every other way so, once we've set the exact 805 * bit, they are the same. 806 */ 807 if (instr->type == nir_instr_type_alu) { 808 nir_instr_as_alu(match)->exact |= nir_instr_as_alu(instr)->exact; 809 nir_instr_as_alu(match)->fp_fast_math |= nir_instr_as_alu(instr)->fp_fast_math; 810 } 811 812 nir_def_rewrite_uses(def, new_def); 813 814 return match; 815 } else { 816 /* otherwise, replace hashed instruction */ 817 e->key = instr; 818 return NULL; 819 } 820 } 821 822 void nir_instr_set_remove(struct set * instr_set,nir_instr * instr)823 nir_instr_set_remove(struct set *instr_set, nir_instr *instr) 824 { 825 if (!instr_can_rewrite(instr)) 826 return; 827 828 struct set_entry *entry = _mesa_set_search(instr_set, instr); 829 if (entry) 830 _mesa_set_remove(instr_set, entry); 831 } 832