1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include <gtest/gtest.h> 25 #include "brw_disasm_info.h" 26 #include "brw_eu.h" 27 #include "brw_eu_defines.h" 28 #include "util/bitset.h" 29 #include "util/ralloc.h" 30 31 static const struct intel_gfx_info { 32 const char *name; 33 } gfx_names[] = { 34 { "skl", }, 35 { "bxt", }, 36 { "kbl", }, 37 { "aml", }, 38 { "glk", }, 39 { "cfl", }, 40 { "whl", }, 41 { "cml", }, 42 { "icl", }, 43 { "ehl", }, 44 { "jsl", }, 45 { "tgl", }, 46 { "rkl", }, 47 { "dg1", }, 48 { "adl", }, 49 { "sg1", }, 50 { "rpl", }, 51 { "dg2", }, 52 { "mtl", }, 53 }; 54 55 class validation_test: public ::testing::TestWithParam<struct intel_gfx_info> { 56 virtual void SetUp(); 57 58 public: 59 validation_test(); 60 virtual ~validation_test(); 61 62 struct brw_isa_info isa; 63 struct brw_codegen *p; 64 struct intel_device_info devinfo; 65 }; 66 validation_test()67 validation_test::validation_test() 68 { 69 p = rzalloc(NULL, struct brw_codegen); 70 memset(&devinfo, 0, sizeof(devinfo)); 71 } 72 ~validation_test()73 validation_test::~validation_test() 74 { 75 ralloc_free(p); 76 } 77 SetUp()78 void validation_test::SetUp() 79 { 80 struct intel_gfx_info info = GetParam(); 81 int devid = intel_device_name_to_pci_device_id(info.name); 82 83 intel_get_device_info_from_pci_id(devid, &devinfo); 84 85 brw_init_isa_info(&isa, &devinfo); 86 87 brw_init_codegen(&isa, p, p); 88 } 89 90 struct gfx_name { 91 template <class ParamType> 92 std::string operator ()gfx_name93 operator()(const ::testing::TestParamInfo<ParamType>& info) const { 94 return info.param.name; 95 } 96 }; 97 98 INSTANTIATE_TEST_SUITE_P( 99 eu_assembly, validation_test, 100 ::testing::ValuesIn(gfx_names), 101 gfx_name() 102 ); 103 104 static bool validate(struct brw_codegen * p)105 validate(struct brw_codegen *p) 106 { 107 const bool print = getenv("TEST_DEBUG"); 108 struct disasm_info *disasm = disasm_initialize(p->isa, NULL); 109 110 if (print) { 111 disasm_new_inst_group(disasm, 0); 112 disasm_new_inst_group(disasm, p->next_insn_offset); 113 } 114 115 bool ret = brw_validate_instructions(p->isa, p->store, 0, 116 p->next_insn_offset, disasm); 117 118 if (print) { 119 dump_assembly(p->store, 0, p->next_insn_offset, disasm, NULL); 120 } 121 ralloc_free(disasm); 122 123 return ret; 124 } 125 126 #define last_inst (&p->store[p->nr_insn - 1]) 127 #define g0 brw_vec8_grf(0, 0) 128 #define acc0 brw_acc_reg(8) 129 #define null brw_null_reg() 130 #define zero brw_imm_f(0.0f) 131 132 static void clear_instructions(struct brw_codegen * p)133 clear_instructions(struct brw_codegen *p) 134 { 135 p->next_insn_offset = 0; 136 p->nr_insn = 0; 137 } 138 TEST_P(validation_test,sanity)139 TEST_P(validation_test, sanity) 140 { 141 brw_ADD(p, g0, g0, g0); 142 143 EXPECT_TRUE(validate(p)); 144 } 145 TEST_P(validation_test,src0_null_reg)146 TEST_P(validation_test, src0_null_reg) 147 { 148 brw_MOV(p, g0, null); 149 150 EXPECT_FALSE(validate(p)); 151 } 152 TEST_P(validation_test,src1_null_reg)153 TEST_P(validation_test, src1_null_reg) 154 { 155 brw_ADD(p, g0, g0, null); 156 157 EXPECT_FALSE(validate(p)); 158 } 159 TEST_P(validation_test,math_src0_null_reg)160 TEST_P(validation_test, math_src0_null_reg) 161 { 162 gfx6_math(p, g0, BRW_MATH_FUNCTION_SIN, null, null); 163 164 EXPECT_FALSE(validate(p)); 165 } 166 TEST_P(validation_test,math_src1_null_reg)167 TEST_P(validation_test, math_src1_null_reg) 168 { 169 gfx6_math(p, g0, BRW_MATH_FUNCTION_POW, g0, null); 170 EXPECT_FALSE(validate(p)); 171 } 172 TEST_P(validation_test,opcode46)173 TEST_P(validation_test, opcode46) 174 { 175 /* opcode 46 is "push" on Gen 4 and 5 176 * "fork" on Gen 6 177 * reserved on Gen 7 178 * "goto" on Gfx8+ 179 */ 180 brw_next_insn(p, brw_opcode_decode(&isa, 46)); 181 182 EXPECT_TRUE(validate(p)); 183 } 184 TEST_P(validation_test,invalid_exec_size_encoding)185 TEST_P(validation_test, invalid_exec_size_encoding) 186 { 187 const struct { 188 enum brw_execution_size exec_size; 189 bool expected_result; 190 } test_case[] = { 191 { BRW_EXECUTE_1, true }, 192 { BRW_EXECUTE_2, true }, 193 { BRW_EXECUTE_4, true }, 194 { BRW_EXECUTE_8, true }, 195 { BRW_EXECUTE_16, true }, 196 { BRW_EXECUTE_32, true }, 197 198 { (enum brw_execution_size)((int)BRW_EXECUTE_32 + 1), false }, 199 { (enum brw_execution_size)((int)BRW_EXECUTE_32 + 2), false }, 200 }; 201 202 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) { 203 brw_MOV(p, g0, g0); 204 205 brw_inst_set_exec_size(&devinfo, last_inst, test_case[i].exec_size); 206 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 207 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 208 209 if (test_case[i].exec_size == BRW_EXECUTE_1) { 210 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0); 211 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1); 212 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0); 213 } else { 214 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_2); 215 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2); 216 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 217 } 218 219 EXPECT_EQ(test_case[i].expected_result, validate(p)); 220 221 clear_instructions(p); 222 } 223 } 224 TEST_P(validation_test,invalid_type_encoding)225 TEST_P(validation_test, invalid_type_encoding) 226 { 227 enum brw_reg_file files[2] = { 228 FIXED_GRF, 229 IMM, 230 }; 231 232 for (unsigned i = 0; i < ARRAY_SIZE(files); i++) { 233 const enum brw_reg_file file = files[i]; 234 const int num_bits = 4; 235 const int num_encodings = 1 << num_bits; 236 237 /* The data types are encoded into <num_bits> bits to be used in hardware 238 * instructions, so keep a record in a bitset the invalid patterns so 239 * they can be verified to be invalid when used. 240 */ 241 BITSET_DECLARE(invalid_encodings, num_encodings); 242 243 const struct { 244 enum brw_reg_type type; 245 bool expected_result; 246 } test_case[] = { 247 { BRW_TYPE_DF, devinfo.has_64bit_float }, 248 { BRW_TYPE_F, true }, 249 { BRW_TYPE_HF, true }, 250 { BRW_TYPE_VF, file == IMM }, 251 { BRW_TYPE_Q, devinfo.has_64bit_int }, 252 { BRW_TYPE_UQ, devinfo.has_64bit_int }, 253 { BRW_TYPE_D, true }, 254 { BRW_TYPE_UD, true }, 255 { BRW_TYPE_W, true }, 256 { BRW_TYPE_UW, true }, 257 { BRW_TYPE_B, file == FIXED_GRF }, 258 { BRW_TYPE_UB, file == FIXED_GRF }, 259 { BRW_TYPE_V, file == IMM }, 260 { BRW_TYPE_UV, file == IMM }, 261 }; 262 263 /* Initially assume all hardware encodings are invalid */ 264 BITSET_ONES(invalid_encodings); 265 266 brw_set_default_exec_size(p, BRW_EXECUTE_4); 267 268 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) { 269 if (test_case[i].expected_result) { 270 unsigned hw_type = brw_type_encode(&devinfo, file, test_case[i].type); 271 if (hw_type != INVALID_HW_REG_TYPE) { 272 /* ... and remove valid encodings from the set */ 273 assert(BITSET_TEST(invalid_encodings, hw_type)); 274 BITSET_CLEAR(invalid_encodings, hw_type); 275 } 276 277 if (file == FIXED_GRF) { 278 struct brw_reg g = retype(g0, test_case[i].type); 279 brw_MOV(p, g, g); 280 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 281 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4); 282 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 283 } else { 284 enum brw_reg_type t; 285 286 switch (test_case[i].type) { 287 case BRW_TYPE_V: 288 t = BRW_TYPE_W; 289 break; 290 case BRW_TYPE_UV: 291 t = BRW_TYPE_UW; 292 break; 293 case BRW_TYPE_VF: 294 t = BRW_TYPE_F; 295 break; 296 default: 297 t = test_case[i].type; 298 break; 299 } 300 301 struct brw_reg g = retype(g0, t); 302 brw_MOV(p, g, retype(brw_imm_w(0), test_case[i].type)); 303 } 304 305 EXPECT_TRUE(validate(p)); 306 307 clear_instructions(p); 308 } 309 } 310 311 /* The remaining encodings in invalid_encodings do not have a mapping 312 * from BRW_TYPE_* and must be invalid. Verify that invalid 313 * encodings are rejected by the validator. 314 */ 315 int e; 316 BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) { 317 if (file == FIXED_GRF) { 318 brw_MOV(p, g0, g0); 319 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 320 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4); 321 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 322 } else { 323 brw_MOV(p, g0, brw_imm_w(0)); 324 } 325 brw_inst_set_dst_reg_hw_type(&devinfo, last_inst, e); 326 brw_inst_set_src0_reg_hw_type(&devinfo, last_inst, e); 327 328 EXPECT_FALSE(validate(p)); 329 330 clear_instructions(p); 331 } 332 } 333 } 334 TEST_P(validation_test,invalid_type_encoding_3src_a16)335 TEST_P(validation_test, invalid_type_encoding_3src_a16) 336 { 337 /* 3-src instructions in align16 mode only supported on Gfx6-10 */ 338 if (devinfo.ver < 6 || devinfo.ver > 10) 339 return; 340 341 const int num_bits = devinfo.ver >= 8 ? 3 : 2; 342 const int num_encodings = 1 << num_bits; 343 344 /* The data types are encoded into <num_bits> bits to be used in hardware 345 * instructions, so keep a record in a bitset the invalid patterns so 346 * they can be verified to be invalid when used. 347 */ 348 BITSET_DECLARE(invalid_encodings, num_encodings); 349 350 const struct { 351 enum brw_reg_type type; 352 bool expected_result; 353 } test_case[] = { 354 { BRW_TYPE_DF, devinfo.ver >= 7 }, 355 { BRW_TYPE_F, true }, 356 { BRW_TYPE_HF, devinfo.ver >= 8 }, 357 { BRW_TYPE_D, devinfo.ver >= 7 }, 358 { BRW_TYPE_UD, devinfo.ver >= 7 }, 359 }; 360 361 /* Initially assume all hardware encodings are invalid */ 362 BITSET_ONES(invalid_encodings); 363 364 brw_set_default_access_mode(p, BRW_ALIGN_16); 365 brw_set_default_exec_size(p, BRW_EXECUTE_4); 366 367 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) { 368 if (test_case[i].expected_result) { 369 unsigned hw_type = 370 brw_type_encode_for_3src(&devinfo, test_case[i].type); 371 if (hw_type != INVALID_HW_REG_TYPE) { 372 /* ... and remove valid encodings from the set */ 373 assert(BITSET_TEST(invalid_encodings, hw_type)); 374 BITSET_CLEAR(invalid_encodings, hw_type); 375 } 376 377 struct brw_reg g = retype(g0, test_case[i].type); 378 if (!brw_type_is_int(test_case[i].type)) { 379 brw_MAD(p, g, g, g, g); 380 } else { 381 brw_BFE(p, g, g, g, g); 382 } 383 384 EXPECT_TRUE(validate(p)); 385 386 clear_instructions(p); 387 } 388 } 389 390 /* The remaining encodings in invalid_encodings do not have a mapping 391 * from BRW_TYPE_* and must be invalid. Verify that invalid 392 * encodings are rejected by the validator. 393 */ 394 int e; 395 BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) { 396 for (unsigned i = 0; i < 2; i++) { 397 if (i == 0) { 398 brw_MAD(p, g0, g0, g0, g0); 399 } else { 400 brw_BFE(p, g0, g0, g0, g0); 401 } 402 403 brw_inst_set_3src_a16_dst_hw_type(&devinfo, last_inst, e); 404 brw_inst_set_3src_a16_src_hw_type(&devinfo, last_inst, e); 405 406 EXPECT_FALSE(validate(p)); 407 408 clear_instructions(p); 409 410 if (devinfo.ver == 6) 411 break; 412 } 413 } 414 } 415 TEST_P(validation_test,invalid_type_encoding_3src_a1)416 TEST_P(validation_test, invalid_type_encoding_3src_a1) 417 { 418 /* 3-src instructions in align1 mode only supported on Gfx10+ */ 419 if (devinfo.ver < 10) 420 return; 421 422 const int num_bits = 3 + 1 /* for exec_type */; 423 const int num_encodings = 1 << num_bits; 424 425 /* The data types are encoded into <num_bits> bits to be used in hardware 426 * instructions, so keep a record in a bitset the invalid patterns so 427 * they can be verified to be invalid when used. 428 */ 429 BITSET_DECLARE(invalid_encodings, num_encodings); 430 431 const struct { 432 enum brw_reg_type type; 433 unsigned exec_type; 434 bool expected_result; 435 } test_case[] = { 436 #define E(x) ((unsigned)BRW_ALIGN1_3SRC_EXEC_TYPE_##x) 437 { BRW_TYPE_DF, E(FLOAT), devinfo.has_64bit_float }, 438 { BRW_TYPE_F, E(FLOAT), true }, 439 { BRW_TYPE_HF, E(FLOAT), true }, 440 { BRW_TYPE_D, E(INT), true }, 441 { BRW_TYPE_UD, E(INT), true }, 442 { BRW_TYPE_W, E(INT), true }, 443 { BRW_TYPE_UW, E(INT), true }, 444 445 /* There are no ternary instructions that can operate on B-type sources 446 * on Gfx11-12. Src1/Src2 cannot be B-typed either. 447 */ 448 { BRW_TYPE_B, E(INT), false }, 449 { BRW_TYPE_UB, E(INT), false }, 450 }; 451 452 /* Initially assume all hardware encodings are invalid */ 453 BITSET_ONES(invalid_encodings); 454 455 brw_set_default_access_mode(p, BRW_ALIGN_1); 456 brw_set_default_exec_size(p, BRW_EXECUTE_4); 457 458 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) { 459 if (test_case[i].expected_result) { 460 unsigned hw_type = 461 brw_type_encode_for_3src(&devinfo, test_case[i].type); 462 unsigned hw_exec_type = hw_type | (test_case[i].exec_type << 3); 463 if (hw_type != INVALID_HW_REG_TYPE) { 464 /* ... and remove valid encodings from the set */ 465 assert(BITSET_TEST(invalid_encodings, hw_exec_type)); 466 BITSET_CLEAR(invalid_encodings, hw_exec_type); 467 } 468 469 struct brw_reg g = retype(g0, test_case[i].type); 470 if (!brw_type_is_int(test_case[i].type)) { 471 brw_MAD(p, g, g, g, g); 472 } else { 473 brw_BFE(p, g, g, g, g); 474 } 475 476 EXPECT_TRUE(validate(p)); 477 478 clear_instructions(p); 479 } 480 } 481 482 /* The remaining encodings in invalid_encodings do not have a mapping 483 * from BRW_TYPE_* and must be invalid. Verify that invalid 484 * encodings are rejected by the validator. 485 */ 486 int e; 487 BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) { 488 const unsigned hw_type = e & 0x7; 489 const unsigned exec_type = e >> 3; 490 491 for (unsigned i = 0; i < 2; i++) { 492 if (i == 0) { 493 brw_MAD(p, g0, g0, g0, g0); 494 brw_inst_set_3src_a1_exec_type(&devinfo, last_inst, BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT); 495 } else { 496 brw_CSEL(p, g0, g0, g0, g0); 497 brw_inst_set_3src_cond_modifier(&devinfo, last_inst, BRW_CONDITIONAL_NZ); 498 brw_inst_set_3src_a1_exec_type(&devinfo, last_inst, BRW_ALIGN1_3SRC_EXEC_TYPE_INT); 499 } 500 501 brw_inst_set_3src_a1_exec_type(&devinfo, last_inst, exec_type); 502 brw_inst_set_3src_a1_dst_hw_type (&devinfo, last_inst, hw_type); 503 brw_inst_set_3src_a1_src0_hw_type(&devinfo, last_inst, hw_type); 504 brw_inst_set_3src_a1_src1_hw_type(&devinfo, last_inst, hw_type); 505 brw_inst_set_3src_a1_src2_hw_type(&devinfo, last_inst, hw_type); 506 507 EXPECT_FALSE(validate(p)); 508 509 clear_instructions(p); 510 } 511 } 512 } 513 514 TEST_P(validation_test, 3src_inst_access_mode) 515 { 516 /* 3-src instructions only supported on Gfx6+ */ 517 if (devinfo.ver < 6) 518 return; 519 520 /* No access mode bit on Gfx12+ */ 521 if (devinfo.ver >= 12) 522 return; 523 524 const struct { 525 unsigned mode; 526 bool expected_result; 527 } test_case[] = { 528 { BRW_ALIGN_1, devinfo.ver >= 10 }, 529 { BRW_ALIGN_16, devinfo.ver <= 10 }, 530 }; 531 532 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) { 533 if (devinfo.ver < 10) 534 brw_set_default_access_mode(p, BRW_ALIGN_16); 535 536 brw_MAD(p, g0, g0, g0, g0); 537 brw_inst_set_access_mode(&devinfo, last_inst, test_case[i].mode); 538 539 EXPECT_EQ(test_case[i].expected_result, validate(p)); 540 541 clear_instructions(p); 542 } 543 } 544 545 /* When the Execution Data Type is wider than the destination data type, the 546 * destination must [...] specify a HorzStride equal to the ratio in sizes of 547 * the two data types. 548 */ TEST_P(validation_test,dest_stride_must_be_equal_to_the_ratio_of_exec_size_to_dest_size)549 TEST_P(validation_test, dest_stride_must_be_equal_to_the_ratio_of_exec_size_to_dest_size) 550 { 551 brw_ADD(p, g0, g0, g0); 552 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 553 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D); 554 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D); 555 556 EXPECT_FALSE(validate(p)); 557 558 clear_instructions(p); 559 560 brw_ADD(p, g0, g0, g0); 561 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 562 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2); 563 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D); 564 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D); 565 566 EXPECT_TRUE(validate(p)); 567 } 568 569 /* When the Execution Data Type is wider than the destination data type, the 570 * destination must be aligned as required by the wider execution data type 571 * [...] 572 */ TEST_P(validation_test,dst_subreg_must_be_aligned_to_exec_type_size)573 TEST_P(validation_test, dst_subreg_must_be_aligned_to_exec_type_size) 574 { 575 brw_ADD(p, g0, g0, g0); 576 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 2); 577 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2); 578 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 579 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D); 580 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D); 581 582 EXPECT_FALSE(validate(p)); 583 584 clear_instructions(p); 585 586 brw_ADD(p, g0, g0, g0); 587 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4); 588 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 8); 589 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2); 590 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 591 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D); 592 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 593 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4); 594 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 595 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D); 596 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 597 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4); 598 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 599 600 EXPECT_TRUE(validate(p)); 601 } 602 603 /* ExecSize must be greater than or equal to Width. */ TEST_P(validation_test,exec_size_less_than_width)604 TEST_P(validation_test, exec_size_less_than_width) 605 { 606 brw_ADD(p, g0, g0, g0); 607 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_16); 608 609 EXPECT_FALSE(validate(p)); 610 611 clear_instructions(p); 612 613 brw_ADD(p, g0, g0, g0); 614 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_16); 615 616 EXPECT_FALSE(validate(p)); 617 } 618 619 /* If ExecSize = Width and HorzStride ≠ 0, 620 * VertStride must be set to Width * HorzStride. 621 */ TEST_P(validation_test,vertical_stride_is_width_by_horizontal_stride)622 TEST_P(validation_test, vertical_stride_is_width_by_horizontal_stride) 623 { 624 brw_ADD(p, g0, g0, g0); 625 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 626 627 EXPECT_FALSE(validate(p)); 628 629 clear_instructions(p); 630 631 brw_ADD(p, g0, g0, g0); 632 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 633 634 EXPECT_FALSE(validate(p)); 635 } 636 637 /* If Width = 1, HorzStride must be 0 regardless of the values 638 * of ExecSize and VertStride. 639 */ TEST_P(validation_test,horizontal_stride_must_be_0_if_width_is_1)640 TEST_P(validation_test, horizontal_stride_must_be_0_if_width_is_1) 641 { 642 brw_ADD(p, g0, g0, g0); 643 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0); 644 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1); 645 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 646 647 EXPECT_FALSE(validate(p)); 648 649 clear_instructions(p); 650 651 brw_ADD(p, g0, g0, g0); 652 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0); 653 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_1); 654 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 655 656 EXPECT_FALSE(validate(p)); 657 } 658 659 /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */ TEST_P(validation_test,scalar_region_must_be_0_1_0)660 TEST_P(validation_test, scalar_region_must_be_0_1_0) 661 { 662 struct brw_reg g0_0 = brw_vec1_grf(0, 0); 663 664 brw_ADD(p, g0, g0, g0_0); 665 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_1); 666 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_1); 667 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1); 668 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0); 669 670 EXPECT_FALSE(validate(p)); 671 672 clear_instructions(p); 673 674 brw_ADD(p, g0, g0_0, g0); 675 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_1); 676 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_1); 677 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_1); 678 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0); 679 680 EXPECT_FALSE(validate(p)); 681 } 682 683 /* If VertStride = HorzStride = 0, Width must be 1 regardless of the value 684 * of ExecSize. 685 */ TEST_P(validation_test,zero_stride_implies_0_1_0)686 TEST_P(validation_test, zero_stride_implies_0_1_0) 687 { 688 brw_ADD(p, g0, g0, g0); 689 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0); 690 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2); 691 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0); 692 693 EXPECT_FALSE(validate(p)); 694 695 clear_instructions(p); 696 697 brw_ADD(p, g0, g0, g0); 698 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0); 699 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_2); 700 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0); 701 702 EXPECT_FALSE(validate(p)); 703 } 704 705 /* Dst.HorzStride must not be 0. */ TEST_P(validation_test,dst_horizontal_stride_0)706 TEST_P(validation_test, dst_horizontal_stride_0) 707 { 708 brw_ADD(p, g0, g0, g0); 709 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0); 710 711 EXPECT_FALSE(validate(p)); 712 713 clear_instructions(p); 714 715 /* Align16 does not exist on Gfx11+ */ 716 if (devinfo.ver >= 11) 717 return; 718 719 brw_set_default_access_mode(p, BRW_ALIGN_16); 720 721 brw_ADD(p, g0, g0, g0); 722 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0); 723 724 EXPECT_FALSE(validate(p)); 725 } 726 727 /* VertStride must be used to cross FIXED_GRF register boundaries. This rule implies 728 * that elements within a 'Width' cannot cross FIXED_GRF boundaries. 729 */ TEST_P(validation_test,must_not_cross_grf_boundary_in_a_width)730 TEST_P(validation_test, must_not_cross_grf_boundary_in_a_width) 731 { 732 brw_ADD(p, g0, g0, g0); 733 brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 4); 734 735 EXPECT_FALSE(validate(p)); 736 737 clear_instructions(p); 738 739 brw_ADD(p, g0, g0, g0); 740 brw_inst_set_src1_da1_subreg_nr(&devinfo, last_inst, 4); 741 742 EXPECT_FALSE(validate(p)); 743 744 clear_instructions(p); 745 746 brw_ADD(p, g0, g0, g0); 747 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 748 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4); 749 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2); 750 751 EXPECT_FALSE(validate(p)); 752 753 clear_instructions(p); 754 755 brw_ADD(p, g0, g0, g0); 756 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 757 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4); 758 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2); 759 760 EXPECT_FALSE(validate(p)); 761 } 762 763 /* Destination Horizontal must be 1 in Align16 */ TEST_P(validation_test,dst_hstride_on_align16_must_be_1)764 TEST_P(validation_test, dst_hstride_on_align16_must_be_1) 765 { 766 /* Align16 does not exist on Gfx11+ */ 767 if (devinfo.ver >= 11) 768 return; 769 770 brw_set_default_access_mode(p, BRW_ALIGN_16); 771 772 brw_ADD(p, g0, g0, g0); 773 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2); 774 775 EXPECT_FALSE(validate(p)); 776 777 clear_instructions(p); 778 779 brw_ADD(p, g0, g0, g0); 780 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 781 782 EXPECT_TRUE(validate(p)); 783 } 784 785 /* VertStride must be 0 or 4 in Align16 */ TEST_P(validation_test,vstride_on_align16_must_be_0_or_4)786 TEST_P(validation_test, vstride_on_align16_must_be_0_or_4) 787 { 788 /* Align16 does not exist on Gfx11+ */ 789 if (devinfo.ver >= 11) 790 return; 791 792 const struct { 793 enum brw_vertical_stride vstride; 794 bool expected_result; 795 } vstride[] = { 796 { BRW_VERTICAL_STRIDE_0, true }, 797 { BRW_VERTICAL_STRIDE_1, false }, 798 { BRW_VERTICAL_STRIDE_2, devinfo.verx10 >= 75 }, 799 { BRW_VERTICAL_STRIDE_4, true }, 800 { BRW_VERTICAL_STRIDE_8, false }, 801 { BRW_VERTICAL_STRIDE_16, false }, 802 { BRW_VERTICAL_STRIDE_32, false }, 803 { BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL, false }, 804 }; 805 806 brw_set_default_access_mode(p, BRW_ALIGN_16); 807 808 for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) { 809 brw_ADD(p, g0, g0, g0); 810 brw_inst_set_src0_vstride(&devinfo, last_inst, vstride[i].vstride); 811 812 EXPECT_EQ(vstride[i].expected_result, validate(p)); 813 814 clear_instructions(p); 815 } 816 817 for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) { 818 brw_ADD(p, g0, g0, g0); 819 brw_inst_set_src1_vstride(&devinfo, last_inst, vstride[i].vstride); 820 821 EXPECT_EQ(vstride[i].expected_result, validate(p)); 822 823 clear_instructions(p); 824 } 825 } 826 827 /* In Direct Addressing mode, a source cannot span more than 2 adjacent FIXED_GRF 828 * registers. 829 */ TEST_P(validation_test,source_cannot_span_more_than_2_registers)830 TEST_P(validation_test, source_cannot_span_more_than_2_registers) 831 { 832 brw_ADD(p, g0, g0, g0); 833 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_32); 834 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 835 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 836 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 837 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16); 838 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_8); 839 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2); 840 841 EXPECT_FALSE(validate(p)); 842 843 clear_instructions(p); 844 845 brw_ADD(p, g0, g0, g0); 846 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16); 847 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 848 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 849 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 850 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16); 851 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_8); 852 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2); 853 brw_inst_set_src1_da1_subreg_nr(&devinfo, last_inst, 2); 854 855 EXPECT_TRUE(validate(p)); 856 857 clear_instructions(p); 858 859 brw_ADD(p, g0, g0, g0); 860 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16); 861 862 EXPECT_TRUE(validate(p)); 863 } 864 865 /* A destination cannot span more than 2 adjacent FIXED_GRF registers. */ TEST_P(validation_test,destination_cannot_span_more_than_2_registers)866 TEST_P(validation_test, destination_cannot_span_more_than_2_registers) 867 { 868 brw_ADD(p, g0, g0, g0); 869 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_32); 870 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2); 871 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 872 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 873 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 874 875 EXPECT_FALSE(validate(p)); 876 877 clear_instructions(p); 878 879 brw_ADD(p, g0, g0, g0); 880 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_8); 881 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 6); 882 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_4); 883 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 884 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 885 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16); 886 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4); 887 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 888 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 889 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16); 890 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4); 891 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 892 893 EXPECT_TRUE(validate(p)); 894 } 895 TEST_P(validation_test,src_region_spans_two_regs_dst_region_spans_one)896 TEST_P(validation_test, src_region_spans_two_regs_dst_region_spans_one) 897 { 898 /* Writes to dest are to the lower OWord */ 899 brw_ADD(p, g0, g0, g0); 900 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 901 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 902 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 903 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16); 904 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4); 905 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2); 906 907 EXPECT_TRUE(validate(p)); 908 909 clear_instructions(p); 910 911 /* Writes to dest are to the upper OWord */ 912 brw_ADD(p, g0, g0, g0); 913 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 16); 914 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 915 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 916 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 917 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16); 918 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4); 919 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2); 920 921 EXPECT_TRUE(validate(p)); 922 923 clear_instructions(p); 924 925 /* Writes to dest are evenly split between OWords */ 926 brw_ADD(p, g0, g0, g0); 927 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16); 928 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 929 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 930 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 931 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16); 932 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_8); 933 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2); 934 935 EXPECT_TRUE(validate(p)); 936 937 clear_instructions(p); 938 939 /* Writes to dest are uneven between OWords */ 940 brw_ADD(p, g0, g0, g0); 941 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4); 942 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 10); 943 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 944 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 945 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 946 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4); 947 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 948 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 949 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16); 950 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_2); 951 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 952 953 if (devinfo.ver >= 9) { 954 EXPECT_TRUE(validate(p)); 955 } else { 956 EXPECT_FALSE(validate(p)); 957 } 958 } 959 TEST_P(validation_test,dst_elements_must_be_evenly_split_between_registers)960 TEST_P(validation_test, dst_elements_must_be_evenly_split_between_registers) 961 { 962 brw_ADD(p, g0, g0, g0); 963 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 4); 964 965 if (devinfo.ver >= 9 && devinfo.verx10 < 125) { 966 EXPECT_TRUE(validate(p)); 967 } else { 968 EXPECT_FALSE(validate(p)); 969 } 970 971 clear_instructions(p); 972 973 brw_ADD(p, g0, g0, g0); 974 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16); 975 976 EXPECT_TRUE(validate(p)); 977 978 clear_instructions(p); 979 980 if (devinfo.ver >= 6) { 981 gfx6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null); 982 983 EXPECT_TRUE(validate(p)); 984 985 clear_instructions(p); 986 987 gfx6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null); 988 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 4); 989 990 EXPECT_FALSE(validate(p)); 991 } 992 } 993 TEST_P(validation_test,two_src_two_dst_source_offsets_must_be_same)994 TEST_P(validation_test, two_src_two_dst_source_offsets_must_be_same) 995 { 996 brw_ADD(p, g0, g0, g0); 997 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4); 998 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_4); 999 brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 16); 1000 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_2); 1001 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1); 1002 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0); 1003 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 1004 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4); 1005 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 1006 1007 if (devinfo.ver <= 7 || devinfo.verx10 >= 125) { 1008 EXPECT_FALSE(validate(p)); 1009 } else { 1010 EXPECT_TRUE(validate(p)); 1011 } 1012 1013 clear_instructions(p); 1014 1015 brw_ADD(p, g0, g0, g0); 1016 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4); 1017 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_4); 1018 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 1019 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1); 1020 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0); 1021 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_8); 1022 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_2); 1023 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 1024 1025 if (devinfo.verx10 >= 125) 1026 EXPECT_FALSE(validate(p)); 1027 else 1028 EXPECT_TRUE(validate(p)); 1029 } 1030 TEST_P(validation_test,two_src_two_dst_each_dst_must_be_derived_from_one_src)1031 TEST_P(validation_test, two_src_two_dst_each_dst_must_be_derived_from_one_src) 1032 { 1033 brw_MOV(p, g0, g0); 1034 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16); 1035 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 1036 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2); 1037 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 1038 brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 8); 1039 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 1040 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4); 1041 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 1042 1043 if (devinfo.ver <= 7) { 1044 EXPECT_FALSE(validate(p)); 1045 } else { 1046 EXPECT_TRUE(validate(p)); 1047 } 1048 1049 clear_instructions(p); 1050 1051 brw_MOV(p, g0, g0); 1052 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 16); 1053 brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 8); 1054 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_2); 1055 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2); 1056 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 1057 1058 if (devinfo.ver <= 7 || devinfo.verx10 >= 125) { 1059 EXPECT_FALSE(validate(p)); 1060 } else { 1061 EXPECT_TRUE(validate(p)); 1062 } 1063 } 1064 TEST_P(validation_test,one_src_two_dst)1065 TEST_P(validation_test, one_src_two_dst) 1066 { 1067 struct brw_reg g0_0 = brw_vec1_grf(0, 0); 1068 1069 brw_ADD(p, g0, g0_0, g0_0); 1070 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16); 1071 1072 EXPECT_TRUE(validate(p)); 1073 1074 clear_instructions(p); 1075 1076 brw_ADD(p, g0, g0, g0); 1077 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16); 1078 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D); 1079 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 1080 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D); 1081 1082 EXPECT_TRUE(validate(p)); 1083 1084 clear_instructions(p); 1085 1086 brw_ADD(p, g0, g0, g0); 1087 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16); 1088 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D); 1089 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D); 1090 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 1091 1092 if (devinfo.ver >= 8) { 1093 EXPECT_TRUE(validate(p)); 1094 } else { 1095 EXPECT_FALSE(validate(p)); 1096 } 1097 1098 clear_instructions(p); 1099 1100 brw_ADD(p, g0, g0, g0); 1101 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16); 1102 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D); 1103 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 1104 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 1105 1106 if (devinfo.ver >= 8) { 1107 EXPECT_TRUE(validate(p)); 1108 } else { 1109 EXPECT_FALSE(validate(p)); 1110 } 1111 1112 clear_instructions(p); 1113 1114 brw_ADD(p, g0, g0, g0); 1115 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16); 1116 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2); 1117 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 1118 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 1119 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 1120 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0); 1121 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_1); 1122 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0); 1123 1124 if (devinfo.ver >= 8) { 1125 EXPECT_TRUE(validate(p)); 1126 } else { 1127 EXPECT_FALSE(validate(p)); 1128 } 1129 1130 clear_instructions(p); 1131 1132 brw_ADD(p, g0, g0, g0); 1133 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16); 1134 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2); 1135 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 1136 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 1137 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0); 1138 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1); 1139 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0); 1140 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W); 1141 1142 if (devinfo.ver >= 8) { 1143 EXPECT_TRUE(validate(p)); 1144 } else { 1145 EXPECT_FALSE(validate(p)); 1146 } 1147 } 1148 TEST_P(validation_test,packed_byte_destination)1149 TEST_P(validation_test, packed_byte_destination) 1150 { 1151 static const struct { 1152 enum brw_reg_type dst_type; 1153 enum brw_reg_type src_type; 1154 bool neg, abs, sat; 1155 bool expected_result; 1156 } move[] = { 1157 { BRW_TYPE_UB, BRW_TYPE_UB, 0, 0, 0, true }, 1158 { BRW_TYPE_B , BRW_TYPE_B , 0, 0, 0, true }, 1159 { BRW_TYPE_UB, BRW_TYPE_B , 0, 0, 0, true }, 1160 { BRW_TYPE_B , BRW_TYPE_UB, 0, 0, 0, true }, 1161 1162 { BRW_TYPE_UB, BRW_TYPE_UB, 1, 0, 0, false }, 1163 { BRW_TYPE_B , BRW_TYPE_B , 1, 0, 0, false }, 1164 { BRW_TYPE_UB, BRW_TYPE_B , 1, 0, 0, false }, 1165 { BRW_TYPE_B , BRW_TYPE_UB, 1, 0, 0, false }, 1166 1167 { BRW_TYPE_UB, BRW_TYPE_UB, 0, 1, 0, false }, 1168 { BRW_TYPE_B , BRW_TYPE_B , 0, 1, 0, false }, 1169 { BRW_TYPE_UB, BRW_TYPE_B , 0, 1, 0, false }, 1170 { BRW_TYPE_B , BRW_TYPE_UB, 0, 1, 0, false }, 1171 1172 { BRW_TYPE_UB, BRW_TYPE_UB, 0, 0, 1, false }, 1173 { BRW_TYPE_B , BRW_TYPE_B , 0, 0, 1, false }, 1174 { BRW_TYPE_UB, BRW_TYPE_B , 0, 0, 1, false }, 1175 { BRW_TYPE_B , BRW_TYPE_UB, 0, 0, 1, false }, 1176 1177 { BRW_TYPE_UB, BRW_TYPE_UW, 0, 0, 0, false }, 1178 { BRW_TYPE_B , BRW_TYPE_W , 0, 0, 0, false }, 1179 { BRW_TYPE_UB, BRW_TYPE_UD, 0, 0, 0, false }, 1180 { BRW_TYPE_B , BRW_TYPE_D , 0, 0, 0, false }, 1181 }; 1182 1183 for (unsigned i = 0; i < ARRAY_SIZE(move); i++) { 1184 brw_MOV(p, retype(g0, move[i].dst_type), retype(g0, move[i].src_type)); 1185 brw_inst_set_src0_negate(&devinfo, last_inst, move[i].neg); 1186 brw_inst_set_src0_abs(&devinfo, last_inst, move[i].abs); 1187 brw_inst_set_saturate(&devinfo, last_inst, move[i].sat); 1188 1189 EXPECT_EQ(move[i].expected_result, validate(p)); 1190 1191 clear_instructions(p); 1192 } 1193 1194 brw_SEL(p, retype(g0, BRW_TYPE_UB), 1195 retype(g0, BRW_TYPE_UB), 1196 retype(g0, BRW_TYPE_UB)); 1197 brw_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL); 1198 1199 EXPECT_FALSE(validate(p)); 1200 1201 clear_instructions(p); 1202 1203 brw_SEL(p, retype(g0, BRW_TYPE_B), 1204 retype(g0, BRW_TYPE_B), 1205 retype(g0, BRW_TYPE_B)); 1206 brw_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL); 1207 1208 EXPECT_FALSE(validate(p)); 1209 } 1210 TEST_P(validation_test,byte_destination_relaxed_alignment)1211 TEST_P(validation_test, byte_destination_relaxed_alignment) 1212 { 1213 brw_SEL(p, retype(g0, BRW_TYPE_B), 1214 retype(g0, BRW_TYPE_W), 1215 retype(g0, BRW_TYPE_W)); 1216 brw_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL); 1217 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2); 1218 1219 EXPECT_TRUE(validate(p)); 1220 1221 clear_instructions(p); 1222 1223 brw_SEL(p, retype(g0, BRW_TYPE_B), 1224 retype(g0, BRW_TYPE_W), 1225 retype(g0, BRW_TYPE_W)); 1226 brw_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL); 1227 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2); 1228 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 1); 1229 1230 EXPECT_TRUE(validate(p)); 1231 } 1232 TEST_P(validation_test,byte_64bit_conversion)1233 TEST_P(validation_test, byte_64bit_conversion) 1234 { 1235 static const struct { 1236 enum brw_reg_type dst_type; 1237 enum brw_reg_type src_type; 1238 unsigned dst_stride; 1239 bool expected_result; 1240 } inst[] = { 1241 #define INST(dst_type, src_type, dst_stride, expected_result) \ 1242 { \ 1243 BRW_TYPE_##dst_type, \ 1244 BRW_TYPE_##src_type, \ 1245 BRW_HORIZONTAL_STRIDE_##dst_stride, \ 1246 expected_result, \ 1247 } 1248 1249 INST(B, Q, 1, false), 1250 INST(B, UQ, 1, false), 1251 INST(B, DF, 1, false), 1252 INST(UB, Q, 1, false), 1253 INST(UB, UQ, 1, false), 1254 INST(UB, DF, 1, false), 1255 1256 INST(B, Q, 2, false), 1257 INST(B, UQ, 2, false), 1258 INST(B , DF, 2, false), 1259 INST(UB, Q, 2, false), 1260 INST(UB, UQ, 2, false), 1261 INST(UB, DF, 2, false), 1262 1263 INST(B, Q, 4, false), 1264 INST(B, UQ, 4, false), 1265 INST(B, DF, 4, false), 1266 INST(UB, Q, 4, false), 1267 INST(UB, UQ, 4, false), 1268 INST(UB, DF, 4, false), 1269 1270 #undef INST 1271 }; 1272 1273 if (devinfo.ver < 8) 1274 return; 1275 1276 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 1277 if (!devinfo.has_64bit_float && 1278 inst[i].src_type == BRW_TYPE_DF) 1279 continue; 1280 1281 if (!devinfo.has_64bit_int && 1282 (inst[i].src_type == BRW_TYPE_Q || 1283 inst[i].src_type == BRW_TYPE_UQ)) 1284 continue; 1285 1286 brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type)); 1287 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); 1288 EXPECT_EQ(inst[i].expected_result, validate(p)); 1289 1290 clear_instructions(p); 1291 } 1292 } 1293 TEST_P(validation_test,half_float_conversion)1294 TEST_P(validation_test, half_float_conversion) 1295 { 1296 static const struct { 1297 enum brw_reg_type dst_type; 1298 enum brw_reg_type src_type; 1299 unsigned dst_stride; 1300 unsigned dst_subnr; 1301 bool expected_result_gfx9; 1302 bool expected_result_gfx125; 1303 } inst[] = { 1304 #define INST(dst_type, src_type, dst_stride, dst_subnr, \ 1305 expected_result_gfx9, \ 1306 expected_result_gfx125) \ 1307 { \ 1308 BRW_TYPE_##dst_type, \ 1309 BRW_TYPE_##src_type, \ 1310 BRW_HORIZONTAL_STRIDE_##dst_stride, \ 1311 dst_subnr, \ 1312 expected_result_gfx9, \ 1313 expected_result_gfx125, \ 1314 } 1315 1316 /* MOV to half-float destination */ 1317 INST(HF, B, 1, 0, false, false), /* 0 */ 1318 INST(HF, W, 1, 0, false, false), 1319 INST(HF, HF, 1, 0, true, true), 1320 INST(HF, HF, 1, 2, true, false), 1321 INST(HF, D, 1, 0, false, false), 1322 INST(HF, F, 1, 0, true, false), 1323 INST(HF, Q, 1, 0, false, false), 1324 INST(HF, B, 2, 0, true, false), 1325 INST(HF, B, 2, 2, false, false), 1326 INST(HF, W, 2, 0, true, false), 1327 INST(HF, W, 2, 2, false, false), /* 10 */ 1328 INST(HF, HF, 2, 0, true, false), 1329 INST(HF, HF, 2, 2, true, false), 1330 INST(HF, D, 2, 0, true, true), 1331 INST(HF, D, 2, 2, false, false), 1332 INST(HF, F, 2, 0, true, true), 1333 INST(HF, F, 2, 2, true, false), 1334 INST(HF, Q, 2, 0, false, false), 1335 INST(HF, DF, 2, 0, false, false), 1336 INST(HF, B, 4, 0, false, false), 1337 INST(HF, W, 4, 0, false, false), /* 20 */ 1338 INST(HF, HF, 4, 0, true, false), 1339 INST(HF, HF, 4, 2, true, false), 1340 INST(HF, D, 4, 0, false, false), 1341 INST(HF, F, 4, 0, false, false), 1342 INST(HF, Q, 4, 0, false, false), 1343 INST(HF, DF, 4, 0, false, false), 1344 1345 /* MOV from half-float source */ 1346 INST( B, HF, 1, 0, false, false), 1347 INST( W, HF, 1, 0, false, false), 1348 INST( D, HF, 1, 0, true, true), 1349 INST( D, HF, 1, 4, true, true), /* 30 */ 1350 INST( F, HF, 1, 0, true, false), 1351 INST( F, HF, 1, 4, true, false), 1352 INST( Q, HF, 1, 0, false, false), 1353 INST(DF, HF, 1, 0, false, false), 1354 INST( B, HF, 2, 0, false, false), 1355 INST( W, HF, 2, 0, true, true), 1356 INST( W, HF, 2, 2, false, false), 1357 INST( D, HF, 2, 0, false, false), 1358 INST( F, HF, 2, 0, true, false), 1359 INST( B, HF, 4, 0, true, true), /* 40 */ 1360 INST( B, HF, 4, 1, false, false), 1361 INST( W, HF, 4, 0, false, false), 1362 1363 #undef INST 1364 }; 1365 1366 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 1367 if (!devinfo.has_64bit_float && 1368 (inst[i].dst_type == BRW_TYPE_DF || 1369 inst[i].src_type == BRW_TYPE_DF)) 1370 continue; 1371 1372 if (!devinfo.has_64bit_int && 1373 (inst[i].dst_type == BRW_TYPE_Q || 1374 inst[i].dst_type == BRW_TYPE_UQ || 1375 inst[i].src_type == BRW_TYPE_Q || 1376 inst[i].src_type == BRW_TYPE_UQ)) 1377 continue; 1378 1379 brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type)); 1380 1381 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4); 1382 1383 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); 1384 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr); 1385 1386 if (inst[i].src_type == BRW_TYPE_B) { 1387 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 1388 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2); 1389 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2); 1390 } else { 1391 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 1392 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4); 1393 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 1394 } 1395 1396 if (devinfo.verx10 >= 125) { 1397 EXPECT_EQ(inst[i].expected_result_gfx125, validate(p)) << 1398 "Failing test is: " << i; 1399 } else { 1400 EXPECT_EQ(inst[i].expected_result_gfx9, validate(p)) << 1401 "Failing test is: " << i; 1402 } 1403 1404 clear_instructions(p); 1405 } 1406 } 1407 TEST_P(validation_test,mixed_float_source_indirect_addressing)1408 TEST_P(validation_test, mixed_float_source_indirect_addressing) 1409 { 1410 static const struct { 1411 enum brw_reg_type dst_type; 1412 enum brw_reg_type src0_type; 1413 enum brw_reg_type src1_type; 1414 unsigned dst_stride; 1415 bool dst_indirect; 1416 bool src0_indirect; 1417 bool expected_result; 1418 bool gfx125_expected_result; 1419 } inst[] = { 1420 #define INST(dst_type, src0_type, src1_type, \ 1421 dst_stride, dst_indirect, src0_indirect, expected_result, \ 1422 gfx125_expected_result) \ 1423 { \ 1424 BRW_TYPE_##dst_type, \ 1425 BRW_TYPE_##src0_type, \ 1426 BRW_TYPE_##src1_type, \ 1427 BRW_HORIZONTAL_STRIDE_##dst_stride, \ 1428 dst_indirect, \ 1429 src0_indirect, \ 1430 expected_result, \ 1431 gfx125_expected_result, \ 1432 } 1433 1434 /* Source and dest are mixed float: indirect src addressing not allowed */ 1435 INST(HF, F, F, 2, false, false, true, true), 1436 INST(HF, F, F, 2, true, false, true, true), 1437 INST(HF, F, F, 2, false, true, false, false), 1438 INST(HF, F, F, 2, true, true, false, false), 1439 INST( F, HF, F, 1, false, false, true, false), 1440 INST( F, HF, F, 1, true, false, true, false), 1441 INST( F, HF, F, 1, false, true, false, false), 1442 INST( F, HF, F, 1, true, true, false, false), 1443 1444 INST(HF, HF, F, 2, false, false, true, false), 1445 INST(HF, HF, F, 2, true, false, true, false), 1446 INST(HF, HF, F, 2, false, true, false, false), 1447 INST(HF, HF, F, 2, true, true, false, false), 1448 INST( F, F, HF, 1, false, false, true, false), 1449 INST( F, F, HF, 1, true, false, true, false), 1450 INST( F, F, HF, 1, false, true, false, false), 1451 INST( F, F, HF, 1, true, true, false, false), 1452 1453 #undef INST 1454 }; 1455 1456 if (devinfo.ver < 8) 1457 return; 1458 1459 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 1460 brw_ADD(p, retype(g0, inst[i].dst_type), 1461 retype(g0, inst[i].src0_type), 1462 retype(g0, inst[i].src1_type)); 1463 1464 brw_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_indirect); 1465 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); 1466 brw_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src0_indirect); 1467 1468 if (devinfo.verx10 >= 125) { 1469 EXPECT_EQ(inst[i].gfx125_expected_result, validate(p)); 1470 } else { 1471 EXPECT_EQ(inst[i].expected_result, validate(p)); 1472 } 1473 1474 clear_instructions(p); 1475 } 1476 } 1477 TEST_P(validation_test,mixed_float_align1_simd16)1478 TEST_P(validation_test, mixed_float_align1_simd16) 1479 { 1480 static const struct { 1481 unsigned exec_size; 1482 enum brw_reg_type dst_type; 1483 enum brw_reg_type src0_type; 1484 enum brw_reg_type src1_type; 1485 unsigned dst_stride; 1486 bool expected_result; 1487 bool gfx125_expected_result; 1488 } inst[] = { 1489 #define INST(exec_size, dst_type, src0_type, src1_type, \ 1490 dst_stride, expected_result, gfx125_expected_result) \ 1491 { \ 1492 BRW_EXECUTE_##exec_size, \ 1493 BRW_TYPE_##dst_type, \ 1494 BRW_TYPE_##src0_type, \ 1495 BRW_TYPE_##src1_type, \ 1496 BRW_HORIZONTAL_STRIDE_##dst_stride, \ 1497 expected_result, \ 1498 gfx125_expected_result, \ 1499 } 1500 1501 /* No SIMD16 in mixed mode when destination is packed f16 */ 1502 INST( 8, HF, F, HF, 2, true, false), 1503 INST(16, HF, HF, F, 2, true, false), 1504 INST(16, HF, HF, F, 1, false, false), 1505 INST(16, HF, F, HF, 1, false, false), 1506 1507 /* No SIMD16 in mixed mode when destination is f32 */ 1508 INST( 8, F, HF, F, 1, true, false), 1509 INST( 8, F, F, HF, 1, true, false), 1510 INST(16, F, HF, F, 1, false, false), 1511 INST(16, F, F, HF, 1, false, false), 1512 1513 #undef INST 1514 }; 1515 1516 if (devinfo.ver < 8) 1517 return; 1518 1519 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 1520 brw_ADD(p, retype(g0, inst[i].dst_type), 1521 retype(g0, inst[i].src0_type), 1522 retype(g0, inst[i].src1_type)); 1523 1524 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); 1525 1526 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); 1527 1528 if (devinfo.verx10 >= 125) { 1529 EXPECT_EQ(inst[i].gfx125_expected_result, validate(p)); 1530 } else { 1531 EXPECT_EQ(inst[i].expected_result, validate(p)); 1532 } 1533 1534 clear_instructions(p); 1535 } 1536 } 1537 TEST_P(validation_test,mixed_float_align1_packed_fp16_dst_acc_read_offset_0)1538 TEST_P(validation_test, mixed_float_align1_packed_fp16_dst_acc_read_offset_0) 1539 { 1540 static const struct { 1541 enum brw_reg_type dst_type; 1542 enum brw_reg_type src0_type; 1543 enum brw_reg_type src1_type; 1544 unsigned dst_stride; 1545 bool read_acc; 1546 unsigned subnr; 1547 bool expected_result_bdw; 1548 bool expected_result_chv_skl; 1549 bool expected_result_gfx125; 1550 } inst[] = { 1551 #define INST(dst_type, src0_type, src1_type, dst_stride, read_acc, subnr, \ 1552 expected_result_bdw, expected_result_chv_skl, \ 1553 expected_result_gfx125) \ 1554 { \ 1555 BRW_TYPE_##dst_type, \ 1556 BRW_TYPE_##src0_type, \ 1557 BRW_TYPE_##src1_type, \ 1558 BRW_HORIZONTAL_STRIDE_##dst_stride, \ 1559 read_acc, \ 1560 subnr, \ 1561 expected_result_bdw, \ 1562 expected_result_chv_skl, \ 1563 expected_result_gfx125, \ 1564 } 1565 1566 /* Destination is not packed */ 1567 INST(HF, HF, F, 2, true, 0, true, true, false), 1568 INST(HF, HF, F, 2, true, 2, true, true, false), 1569 INST(HF, HF, F, 2, true, 4, true, true, false), 1570 INST(HF, HF, F, 2, true, 8, true, true, false), 1571 INST(HF, HF, F, 2, true, 16, true, true, false), 1572 1573 /* Destination is packed, we don't read acc */ 1574 INST(HF, HF, F, 1, false, 0, false, true, false), 1575 INST(HF, HF, F, 1, false, 2, false, true, false), 1576 INST(HF, HF, F, 1, false, 4, false, true, false), 1577 INST(HF, HF, F, 1, false, 8, false, true, false), 1578 INST(HF, HF, F, 1, false, 16, false, true, false), 1579 1580 /* Destination is packed, we read acc */ 1581 INST(HF, HF, F, 1, true, 0, false, false, false), 1582 INST(HF, HF, F, 1, true, 2, false, false, false), 1583 INST(HF, HF, F, 1, true, 4, false, false, false), 1584 INST(HF, HF, F, 1, true, 8, false, false, false), 1585 INST(HF, HF, F, 1, true, 16, false, false, false), 1586 1587 #undef INST 1588 }; 1589 1590 if (devinfo.ver < 8) 1591 return; 1592 1593 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 1594 brw_ADD(p, retype(g0, inst[i].dst_type), 1595 retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type), 1596 retype(g0, inst[i].src1_type)); 1597 1598 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); 1599 1600 brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].subnr); 1601 1602 if (devinfo.verx10 >= 125) 1603 EXPECT_EQ(inst[i].expected_result_gfx125, validate(p)); 1604 else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9) 1605 EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p)); 1606 else 1607 EXPECT_EQ(inst[i].expected_result_bdw, validate(p)); 1608 1609 clear_instructions(p); 1610 } 1611 } 1612 TEST_P(validation_test,mixed_float_fp16_dest_with_acc)1613 TEST_P(validation_test, mixed_float_fp16_dest_with_acc) 1614 { 1615 static const struct { 1616 unsigned exec_size; 1617 unsigned opcode; 1618 enum brw_reg_type dst_type; 1619 enum brw_reg_type src0_type; 1620 enum brw_reg_type src1_type; 1621 unsigned dst_stride; 1622 bool read_acc; 1623 bool expected_result_bdw; 1624 bool expected_result_chv_skl; 1625 bool expected_result_gfx125; 1626 } inst[] = { 1627 #define INST(exec_size, opcode, dst_type, src0_type, src1_type, \ 1628 dst_stride, read_acc,expected_result_bdw, \ 1629 expected_result_chv_skl, expected_result_gfx125) \ 1630 { \ 1631 BRW_EXECUTE_##exec_size, \ 1632 BRW_OPCODE_##opcode, \ 1633 BRW_TYPE_##dst_type, \ 1634 BRW_TYPE_##src0_type, \ 1635 BRW_TYPE_##src1_type, \ 1636 BRW_HORIZONTAL_STRIDE_##dst_stride, \ 1637 read_acc, \ 1638 expected_result_bdw, \ 1639 expected_result_chv_skl, \ 1640 expected_result_gfx125, \ 1641 } 1642 1643 /* Packed fp16 dest with implicit acc needs hstride=2 */ 1644 INST(8, MAC, HF, HF, F, 1, false, false, false, false), 1645 INST(8, MAC, HF, HF, F, 2, false, true, true, false), 1646 INST(8, MAC, HF, F, HF, 1, false, false, false, false), 1647 INST(8, MAC, HF, F, HF, 2, false, true, true, false), 1648 1649 /* Packed fp16 dest with explicit acc needs hstride=2 */ 1650 INST(8, ADD, HF, HF, F, 1, true, false, false, false), 1651 INST(8, ADD, HF, HF, F, 2, true, true, true, false), 1652 INST(8, ADD, HF, F, HF, 1, true, false, false, false), 1653 INST(8, ADD, HF, F, HF, 2, true, true, true, false), 1654 1655 /* If destination is not fp16, restriction doesn't apply */ 1656 INST(8, MAC, F, HF, F, 1, false, true, true, false), 1657 INST(8, MAC, F, HF, F, 2, false, true, true, false), 1658 1659 /* If there is no implicit/explicit acc, restriction doesn't apply */ 1660 INST(8, ADD, HF, HF, F, 1, false, false, true, false), 1661 INST(8, ADD, HF, HF, F, 2, false, true, true, false), 1662 INST(8, ADD, HF, F, HF, 1, false, false, true, false), 1663 INST(8, ADD, HF, F, HF, 2, false, true, true, false), 1664 INST(8, ADD, F, HF, F, 1, false, true, true, false), 1665 INST(8, ADD, F, HF, F, 2, false, true, true, false), 1666 1667 #undef INST 1668 }; 1669 1670 if (devinfo.ver < 8) 1671 return; 1672 1673 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 1674 if (inst[i].opcode == BRW_OPCODE_MAC) { 1675 brw_MAC(p, retype(g0, inst[i].dst_type), 1676 retype(g0, inst[i].src0_type), 1677 retype(g0, inst[i].src1_type)); 1678 } else { 1679 assert(inst[i].opcode == BRW_OPCODE_ADD); 1680 brw_ADD(p, retype(g0, inst[i].dst_type), 1681 retype(inst[i].read_acc ? acc0: g0, inst[i].src0_type), 1682 retype(g0, inst[i].src1_type)); 1683 } 1684 1685 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); 1686 1687 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); 1688 1689 if (devinfo.verx10 >= 125) 1690 EXPECT_EQ(inst[i].expected_result_gfx125, validate(p)); 1691 else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9) 1692 EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p)); 1693 else 1694 EXPECT_EQ(inst[i].expected_result_bdw, validate(p)); 1695 1696 clear_instructions(p); 1697 } 1698 } 1699 TEST_P(validation_test,mixed_float_align1_math_strided_fp16_inputs)1700 TEST_P(validation_test, mixed_float_align1_math_strided_fp16_inputs) 1701 { 1702 static const struct { 1703 enum brw_reg_type dst_type; 1704 enum brw_reg_type src0_type; 1705 enum brw_reg_type src1_type; 1706 unsigned dst_stride; 1707 unsigned src0_stride; 1708 unsigned src1_stride; 1709 bool expected_result; 1710 bool expected_result_gfx125; 1711 } inst[] = { 1712 #define INST(dst_type, src0_type, src1_type, \ 1713 dst_stride, src0_stride, src1_stride, expected_result, \ 1714 expected_result_125) \ 1715 { \ 1716 BRW_TYPE_##dst_type, \ 1717 BRW_TYPE_##src0_type, \ 1718 BRW_TYPE_##src1_type, \ 1719 BRW_HORIZONTAL_STRIDE_##dst_stride, \ 1720 BRW_HORIZONTAL_STRIDE_##src0_stride, \ 1721 BRW_HORIZONTAL_STRIDE_##src1_stride, \ 1722 expected_result, \ 1723 expected_result_125, \ 1724 } 1725 1726 INST(HF, HF, F, 2, 2, 1, true, false), 1727 INST(HF, F, HF, 2, 1, 2, true, false), 1728 INST(HF, F, HF, 1, 1, 2, true, false), 1729 INST(HF, F, HF, 2, 1, 1, false, false), 1730 INST(HF, HF, F, 2, 1, 1, false, false), 1731 INST(HF, HF, F, 1, 1, 1, false, false), 1732 INST(HF, HF, F, 2, 1, 1, false, false), 1733 INST( F, HF, F, 1, 1, 1, false, false), 1734 INST( F, F, HF, 1, 1, 2, true, false), 1735 INST( F, HF, HF, 1, 2, 1, false, false), 1736 INST( F, HF, HF, 1, 2, 2, true, false), 1737 1738 #undef INST 1739 }; 1740 1741 /* No half-float math in gfx8 */ 1742 if (devinfo.ver < 9) 1743 return; 1744 1745 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 1746 gfx6_math(p, retype(g0, inst[i].dst_type), 1747 BRW_MATH_FUNCTION_POW, 1748 retype(g0, inst[i].src0_type), 1749 retype(g0, inst[i].src1_type)); 1750 1751 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); 1752 1753 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 1754 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4); 1755 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src0_stride); 1756 1757 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 1758 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4); 1759 brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].src1_stride); 1760 1761 if (devinfo.verx10 >= 125) 1762 EXPECT_EQ(inst[i].expected_result_gfx125, validate(p)); 1763 else 1764 EXPECT_EQ(inst[i].expected_result, validate(p)); 1765 1766 clear_instructions(p); 1767 } 1768 } 1769 TEST_P(validation_test,mixed_float_align1_packed_fp16_dst)1770 TEST_P(validation_test, mixed_float_align1_packed_fp16_dst) 1771 { 1772 static const struct { 1773 unsigned exec_size; 1774 enum brw_reg_type dst_type; 1775 enum brw_reg_type src0_type; 1776 enum brw_reg_type src1_type; 1777 unsigned dst_stride; 1778 unsigned dst_subnr; 1779 bool expected_result_bdw; 1780 bool expected_result_chv_skl; 1781 bool expected_result_gfx125; 1782 } inst[] = { 1783 #define INST(exec_size, dst_type, src0_type, src1_type, dst_stride, dst_subnr, \ 1784 expected_result_bdw, expected_result_chv_skl, \ 1785 expected_result_gfx125) \ 1786 { \ 1787 BRW_EXECUTE_##exec_size, \ 1788 BRW_TYPE_##dst_type, \ 1789 BRW_TYPE_##src0_type, \ 1790 BRW_TYPE_##src1_type, \ 1791 BRW_HORIZONTAL_STRIDE_##dst_stride, \ 1792 dst_subnr, \ 1793 expected_result_bdw, \ 1794 expected_result_chv_skl, \ 1795 expected_result_gfx125 \ 1796 } 1797 1798 /* SIMD8 packed fp16 dst won't cross oword boundaries if region is 1799 * oword-aligned 1800 */ 1801 INST( 8, HF, HF, F, 1, 0, false, true, false), 1802 INST( 8, HF, HF, F, 1, 2, false, false, false), 1803 INST( 8, HF, HF, F, 1, 4, false, false, false), 1804 INST( 8, HF, HF, F, 1, 8, false, false, false), 1805 INST( 8, HF, HF, F, 1, 16, false, true, false), 1806 1807 /* SIMD16 packed fp16 always crosses oword boundaries */ 1808 INST(16, HF, HF, F, 1, 0, false, false, false), 1809 INST(16, HF, HF, F, 1, 2, false, false, false), 1810 INST(16, HF, HF, F, 1, 4, false, false, false), 1811 INST(16, HF, HF, F, 1, 8, false, false, false), 1812 INST(16, HF, HF, F, 1, 16, false, false, false), 1813 1814 /* If destination is not packed (or not fp16) we can cross oword 1815 * boundaries 1816 */ 1817 INST( 8, HF, HF, F, 2, 0, true, true, false), 1818 INST( 8, F, HF, F, 1, 0, true, true, false), 1819 1820 #undef INST 1821 }; 1822 1823 if (devinfo.ver < 8) 1824 return; 1825 1826 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 1827 brw_ADD(p, retype(g0, inst[i].dst_type), 1828 retype(g0, inst[i].src0_type), 1829 retype(g0, inst[i].src1_type)); 1830 1831 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); 1832 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr); 1833 1834 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 1835 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4); 1836 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 1837 1838 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 1839 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4); 1840 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 1841 1842 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); 1843 1844 if (devinfo.verx10 >= 125) 1845 EXPECT_EQ(inst[i].expected_result_gfx125, validate(p)); 1846 else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9) 1847 EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p)); 1848 else 1849 EXPECT_EQ(inst[i].expected_result_bdw, validate(p)); 1850 1851 clear_instructions(p); 1852 } 1853 } 1854 TEST_P(validation_test,mixed_float_align16_packed_data)1855 TEST_P(validation_test, mixed_float_align16_packed_data) 1856 { 1857 static const struct { 1858 enum brw_reg_type dst_type; 1859 enum brw_reg_type src0_type; 1860 enum brw_reg_type src1_type; 1861 unsigned src0_vstride; 1862 unsigned src1_vstride; 1863 bool expected_result; 1864 } inst[] = { 1865 #define INST(dst_type, src0_type, src1_type, \ 1866 src0_vstride, src1_vstride, expected_result) \ 1867 { \ 1868 BRW_TYPE_##dst_type, \ 1869 BRW_TYPE_##src0_type, \ 1870 BRW_TYPE_##src1_type, \ 1871 BRW_VERTICAL_STRIDE_##src0_vstride, \ 1872 BRW_VERTICAL_STRIDE_##src1_vstride, \ 1873 expected_result, \ 1874 } 1875 1876 /* We only test with F destination because there is a restriction 1877 * by which F->HF conversions need to be DWord aligned but Align16 also 1878 * requires that destination horizontal stride is 1. 1879 */ 1880 INST(F, F, HF, 4, 4, true), 1881 INST(F, F, HF, 2, 4, false), 1882 INST(F, F, HF, 4, 2, false), 1883 INST(F, F, HF, 0, 4, false), 1884 INST(F, F, HF, 4, 0, false), 1885 INST(F, HF, F, 4, 4, true), 1886 INST(F, HF, F, 4, 2, false), 1887 INST(F, HF, F, 2, 4, false), 1888 INST(F, HF, F, 0, 4, false), 1889 INST(F, HF, F, 4, 0, false), 1890 1891 #undef INST 1892 }; 1893 1894 if (devinfo.ver < 8 || devinfo.ver >= 11) 1895 return; 1896 1897 brw_set_default_access_mode(p, BRW_ALIGN_16); 1898 1899 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 1900 brw_ADD(p, retype(g0, inst[i].dst_type), 1901 retype(g0, inst[i].src0_type), 1902 retype(g0, inst[i].src1_type)); 1903 1904 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride); 1905 brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride); 1906 1907 EXPECT_EQ(inst[i].expected_result, validate(p)); 1908 1909 clear_instructions(p); 1910 } 1911 } 1912 TEST_P(validation_test,mixed_float_align16_no_simd16)1913 TEST_P(validation_test, mixed_float_align16_no_simd16) 1914 { 1915 static const struct { 1916 unsigned exec_size; 1917 enum brw_reg_type dst_type; 1918 enum brw_reg_type src0_type; 1919 enum brw_reg_type src1_type; 1920 bool expected_result; 1921 } inst[] = { 1922 #define INST(exec_size, dst_type, src0_type, src1_type, expected_result) \ 1923 { \ 1924 BRW_EXECUTE_##exec_size, \ 1925 BRW_TYPE_##dst_type, \ 1926 BRW_TYPE_##src0_type, \ 1927 BRW_TYPE_##src1_type, \ 1928 expected_result, \ 1929 } 1930 1931 /* We only test with F destination because there is a restriction 1932 * by which F->HF conversions need to be DWord aligned but Align16 also 1933 * requires that destination horizontal stride is 1. 1934 */ 1935 INST( 8, F, F, HF, true), 1936 INST( 8, F, HF, F, true), 1937 INST( 8, F, F, HF, true), 1938 INST(16, F, F, HF, false), 1939 INST(16, F, HF, F, false), 1940 INST(16, F, F, HF, false), 1941 1942 #undef INST 1943 }; 1944 1945 if (devinfo.ver < 8 || devinfo.ver >= 11) 1946 return; 1947 1948 brw_set_default_access_mode(p, BRW_ALIGN_16); 1949 1950 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 1951 brw_ADD(p, retype(g0, inst[i].dst_type), 1952 retype(g0, inst[i].src0_type), 1953 retype(g0, inst[i].src1_type)); 1954 1955 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); 1956 1957 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 1958 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 1959 1960 EXPECT_EQ(inst[i].expected_result, validate(p)); 1961 1962 clear_instructions(p); 1963 } 1964 } 1965 TEST_P(validation_test,mixed_float_align16_no_acc_read)1966 TEST_P(validation_test, mixed_float_align16_no_acc_read) 1967 { 1968 static const struct { 1969 enum brw_reg_type dst_type; 1970 enum brw_reg_type src0_type; 1971 enum brw_reg_type src1_type; 1972 bool read_acc; 1973 bool expected_result; 1974 } inst[] = { 1975 #define INST(dst_type, src0_type, src1_type, read_acc, expected_result) \ 1976 { \ 1977 BRW_TYPE_##dst_type, \ 1978 BRW_TYPE_##src0_type, \ 1979 BRW_TYPE_##src1_type, \ 1980 read_acc, \ 1981 expected_result, \ 1982 } 1983 1984 /* We only test with F destination because there is a restriction 1985 * by which F->HF conversions need to be DWord aligned but Align16 also 1986 * requires that destination horizontal stride is 1. 1987 */ 1988 INST( F, F, HF, false, true), 1989 INST( F, F, HF, true, false), 1990 INST( F, HF, F, false, true), 1991 INST( F, HF, F, true, false), 1992 1993 #undef INST 1994 }; 1995 1996 if (devinfo.ver < 8 || devinfo.ver >= 11) 1997 return; 1998 1999 brw_set_default_access_mode(p, BRW_ALIGN_16); 2000 2001 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 2002 brw_ADD(p, retype(g0, inst[i].dst_type), 2003 retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type), 2004 retype(g0, inst[i].src1_type)); 2005 2006 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 2007 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); 2008 2009 EXPECT_EQ(inst[i].expected_result, validate(p)); 2010 2011 clear_instructions(p); 2012 } 2013 } 2014 TEST_P(validation_test,mixed_float_align16_math_packed_format)2015 TEST_P(validation_test, mixed_float_align16_math_packed_format) 2016 { 2017 static const struct { 2018 enum brw_reg_type dst_type; 2019 enum brw_reg_type src0_type; 2020 enum brw_reg_type src1_type; 2021 unsigned src0_vstride; 2022 unsigned src1_vstride; 2023 bool expected_result; 2024 } inst[] = { 2025 #define INST(dst_type, src0_type, src1_type, \ 2026 src0_vstride, src1_vstride, expected_result) \ 2027 { \ 2028 BRW_TYPE_##dst_type, \ 2029 BRW_TYPE_##src0_type, \ 2030 BRW_TYPE_##src1_type, \ 2031 BRW_VERTICAL_STRIDE_##src0_vstride, \ 2032 BRW_VERTICAL_STRIDE_##src1_vstride, \ 2033 expected_result, \ 2034 } 2035 2036 /* We only test with F destination because there is a restriction 2037 * by which F->HF conversions need to be DWord aligned but Align16 also 2038 * requires that destination horizontal stride is 1. 2039 */ 2040 INST( F, HF, F, 4, 0, false), 2041 INST( F, HF, HF, 4, 4, true), 2042 INST( F, F, HF, 4, 0, false), 2043 INST( F, F, HF, 2, 4, false), 2044 INST( F, F, HF, 4, 2, false), 2045 INST( F, HF, HF, 0, 4, false), 2046 2047 #undef INST 2048 }; 2049 2050 /* Align16 Math for mixed float mode is not supported in gfx8 */ 2051 if (devinfo.ver < 9 || devinfo.ver >= 11) 2052 return; 2053 2054 brw_set_default_access_mode(p, BRW_ALIGN_16); 2055 2056 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 2057 gfx6_math(p, retype(g0, inst[i].dst_type), 2058 BRW_MATH_FUNCTION_POW, 2059 retype(g0, inst[i].src0_type), 2060 retype(g0, inst[i].src1_type)); 2061 2062 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride); 2063 brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride); 2064 2065 EXPECT_EQ(inst[i].expected_result, validate(p)); 2066 2067 clear_instructions(p); 2068 } 2069 } 2070 TEST_P(validation_test,vector_immediate_destination_alignment)2071 TEST_P(validation_test, vector_immediate_destination_alignment) 2072 { 2073 static const struct { 2074 enum brw_reg_type dst_type; 2075 enum brw_reg_type src_type; 2076 unsigned subnr; 2077 unsigned exec_size; 2078 bool expected_result; 2079 } move[] = { 2080 { BRW_TYPE_F, BRW_TYPE_VF, 0, BRW_EXECUTE_4, true }, 2081 { BRW_TYPE_F, BRW_TYPE_VF, 16, BRW_EXECUTE_4, true }, 2082 { BRW_TYPE_F, BRW_TYPE_VF, 1, BRW_EXECUTE_4, false }, 2083 2084 { BRW_TYPE_W, BRW_TYPE_V, 0, BRW_EXECUTE_8, true }, 2085 { BRW_TYPE_W, BRW_TYPE_V, 16, BRW_EXECUTE_8, true }, 2086 { BRW_TYPE_W, BRW_TYPE_V, 1, BRW_EXECUTE_8, false }, 2087 2088 { BRW_TYPE_W, BRW_TYPE_UV, 0, BRW_EXECUTE_8, true }, 2089 { BRW_TYPE_W, BRW_TYPE_UV, 16, BRW_EXECUTE_8, true }, 2090 { BRW_TYPE_W, BRW_TYPE_UV, 1, BRW_EXECUTE_8, false }, 2091 }; 2092 2093 for (unsigned i = 0; i < ARRAY_SIZE(move); i++) { 2094 /* UV type is Gfx6+ */ 2095 if (devinfo.ver < 6 && 2096 move[i].src_type == BRW_TYPE_UV) 2097 continue; 2098 2099 brw_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type)); 2100 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, move[i].subnr); 2101 brw_inst_set_exec_size(&devinfo, last_inst, move[i].exec_size); 2102 2103 EXPECT_EQ(move[i].expected_result, validate(p)); 2104 2105 clear_instructions(p); 2106 } 2107 } 2108 TEST_P(validation_test,vector_immediate_destination_stride)2109 TEST_P(validation_test, vector_immediate_destination_stride) 2110 { 2111 static const struct { 2112 enum brw_reg_type dst_type; 2113 enum brw_reg_type src_type; 2114 unsigned stride; 2115 bool expected_result; 2116 } move[] = { 2117 { BRW_TYPE_F, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_1, true }, 2118 { BRW_TYPE_F, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_2, false }, 2119 { BRW_TYPE_D, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_1, true }, 2120 { BRW_TYPE_D, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_2, false }, 2121 { BRW_TYPE_W, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_2, true }, 2122 { BRW_TYPE_B, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_4, true }, 2123 2124 { BRW_TYPE_W, BRW_TYPE_V, BRW_HORIZONTAL_STRIDE_1, true }, 2125 { BRW_TYPE_W, BRW_TYPE_V, BRW_HORIZONTAL_STRIDE_2, false }, 2126 { BRW_TYPE_W, BRW_TYPE_V, BRW_HORIZONTAL_STRIDE_4, false }, 2127 { BRW_TYPE_B, BRW_TYPE_V, BRW_HORIZONTAL_STRIDE_2, true }, 2128 2129 { BRW_TYPE_W, BRW_TYPE_UV, BRW_HORIZONTAL_STRIDE_1, true }, 2130 { BRW_TYPE_W, BRW_TYPE_UV, BRW_HORIZONTAL_STRIDE_2, false }, 2131 { BRW_TYPE_W, BRW_TYPE_UV, BRW_HORIZONTAL_STRIDE_4, false }, 2132 { BRW_TYPE_B, BRW_TYPE_UV, BRW_HORIZONTAL_STRIDE_2, true }, 2133 }; 2134 2135 for (unsigned i = 0; i < ARRAY_SIZE(move); i++) { 2136 /* UV type is Gfx6+ */ 2137 if (devinfo.ver < 6 && 2138 move[i].src_type == BRW_TYPE_UV) 2139 continue; 2140 2141 brw_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type)); 2142 brw_inst_set_dst_hstride(&devinfo, last_inst, move[i].stride); 2143 2144 EXPECT_EQ(move[i].expected_result, validate(p)); 2145 2146 clear_instructions(p); 2147 } 2148 } 2149 TEST_P(validation_test,qword_low_power_align1_regioning_restrictions)2150 TEST_P(validation_test, qword_low_power_align1_regioning_restrictions) 2151 { 2152 static const struct { 2153 enum opcode opcode; 2154 unsigned exec_size; 2155 2156 enum brw_reg_type dst_type; 2157 unsigned dst_subreg; 2158 unsigned dst_stride; 2159 2160 enum brw_reg_type src_type; 2161 unsigned src_subreg; 2162 unsigned src_vstride; 2163 unsigned src_width; 2164 unsigned src_hstride; 2165 2166 bool expected_result; 2167 } inst[] = { 2168 #define INST(opcode, exec_size, dst_type, dst_subreg, dst_stride, src_type, \ 2169 src_subreg, src_vstride, src_width, src_hstride, expected_result) \ 2170 { \ 2171 BRW_OPCODE_##opcode, \ 2172 BRW_EXECUTE_##exec_size, \ 2173 BRW_TYPE_##dst_type, \ 2174 dst_subreg, \ 2175 BRW_HORIZONTAL_STRIDE_##dst_stride, \ 2176 BRW_TYPE_##src_type, \ 2177 src_subreg, \ 2178 BRW_VERTICAL_STRIDE_##src_vstride, \ 2179 BRW_WIDTH_##src_width, \ 2180 BRW_HORIZONTAL_STRIDE_##src_hstride, \ 2181 expected_result, \ 2182 } 2183 2184 /* Some instruction that violate no restrictions, as a control */ 2185 INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ), 2186 INST(MOV, 4, Q, 0, 1, Q, 0, 4, 4, 1, true ), 2187 INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ), 2188 2189 INST(MOV, 4, DF, 0, 1, F, 0, 8, 4, 2, true ), 2190 INST(MOV, 4, Q, 0, 1, D, 0, 8, 4, 2, true ), 2191 INST(MOV, 4, UQ, 0, 1, UD, 0, 8, 4, 2, true ), 2192 2193 INST(MOV, 4, F, 0, 2, DF, 0, 4, 4, 1, true ), 2194 INST(MOV, 4, D, 0, 2, Q, 0, 4, 4, 1, true ), 2195 INST(MOV, 4, UD, 0, 2, UQ, 0, 4, 4, 1, true ), 2196 2197 INST(MUL, 8, D, 0, 2, D, 0, 8, 4, 2, true ), 2198 INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ), 2199 2200 /* Something with subreg nrs */ 2201 INST(MOV, 2, DF, 8, 1, DF, 8, 2, 2, 1, true ), 2202 INST(MOV, 2, Q, 8, 1, Q, 8, 2, 2, 1, true ), 2203 INST(MOV, 2, UQ, 8, 1, UQ, 8, 2, 2, 1, true ), 2204 2205 INST(MUL, 2, D, 4, 2, D, 4, 4, 2, 2, true ), 2206 INST(MUL, 2, UD, 4, 2, UD, 4, 4, 2, 2, true ), 2207 2208 /* The PRMs say that for CHV, BXT: 2209 * 2210 * When source or destination datatype is 64b or operation is integer 2211 * DWord multiply, regioning in Align1 must follow these rules: 2212 * 2213 * 1. Source and Destination horizontal stride must be aligned to the 2214 * same qword. 2215 */ 2216 INST(MOV, 4, DF, 0, 2, DF, 0, 4, 4, 1, false), 2217 INST(MOV, 4, Q, 0, 2, Q, 0, 4, 4, 1, false), 2218 INST(MOV, 4, UQ, 0, 2, UQ, 0, 4, 4, 1, false), 2219 2220 INST(MOV, 4, DF, 0, 2, F, 0, 8, 4, 2, false), 2221 INST(MOV, 4, Q, 0, 2, D, 0, 8, 4, 2, false), 2222 INST(MOV, 4, UQ, 0, 2, UD, 0, 8, 4, 2, false), 2223 2224 INST(MOV, 4, DF, 0, 2, F, 0, 4, 4, 1, false), 2225 INST(MOV, 4, Q, 0, 2, D, 0, 4, 4, 1, false), 2226 INST(MOV, 4, UQ, 0, 2, UD, 0, 4, 4, 1, false), 2227 2228 INST(MUL, 4, D, 0, 2, D, 0, 4, 4, 1, false), 2229 INST(MUL, 4, UD, 0, 2, UD, 0, 4, 4, 1, false), 2230 2231 INST(MUL, 4, D, 0, 1, D, 0, 8, 4, 2, false), 2232 INST(MUL, 4, UD, 0, 1, UD, 0, 8, 4, 2, false), 2233 2234 /* 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. */ 2235 INST(MOV, 4, DF, 0, 1, DF, 0, 0, 2, 1, false), 2236 INST(MOV, 4, Q, 0, 1, Q, 0, 0, 2, 1, false), 2237 INST(MOV, 4, UQ, 0, 1, UQ, 0, 0, 2, 1, false), 2238 2239 INST(MOV, 4, DF, 0, 1, F, 0, 0, 2, 2, false), 2240 INST(MOV, 4, Q, 0, 1, D, 0, 0, 2, 2, false), 2241 INST(MOV, 4, UQ, 0, 1, UD, 0, 0, 2, 2, false), 2242 2243 INST(MOV, 8, F, 0, 2, DF, 0, 0, 2, 1, false), 2244 INST(MOV, 8, D, 0, 2, Q, 0, 0, 2, 1, false), 2245 INST(MOV, 8, UD, 0, 2, UQ, 0, 0, 2, 1, false), 2246 2247 INST(MUL, 8, D, 0, 2, D, 0, 0, 4, 2, false), 2248 INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false), 2249 2250 INST(MUL, 8, D, 0, 2, D, 0, 0, 4, 2, false), 2251 INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false), 2252 2253 /* 3. Source and Destination offset must be the same, except the case 2254 * of scalar source. 2255 */ 2256 INST(MOV, 2, DF, 8, 1, DF, 0, 2, 2, 1, false), 2257 INST(MOV, 2, Q, 8, 1, Q, 0, 2, 2, 1, false), 2258 INST(MOV, 2, UQ, 8, 1, UQ, 0, 2, 2, 1, false), 2259 2260 INST(MOV, 2, DF, 0, 1, DF, 8, 2, 2, 1, false), 2261 INST(MOV, 2, Q, 0, 1, Q, 8, 2, 2, 1, false), 2262 INST(MOV, 2, UQ, 0, 1, UQ, 8, 2, 2, 1, false), 2263 2264 INST(MUL, 4, D, 4, 2, D, 0, 4, 2, 2, false), 2265 INST(MUL, 4, UD, 4, 2, UD, 0, 4, 2, 2, false), 2266 2267 INST(MUL, 4, D, 0, 2, D, 4, 4, 2, 2, false), 2268 INST(MUL, 4, UD, 0, 2, UD, 4, 4, 2, 2, false), 2269 2270 INST(MOV, 2, DF, 8, 1, DF, 0, 0, 1, 0, true ), 2271 INST(MOV, 2, Q, 8, 1, Q, 0, 0, 1, 0, true ), 2272 INST(MOV, 2, UQ, 8, 1, UQ, 0, 0, 1, 0, true ), 2273 2274 INST(MOV, 2, DF, 8, 1, F, 4, 0, 1, 0, true ), 2275 INST(MOV, 2, Q, 8, 1, D, 4, 0, 1, 0, true ), 2276 INST(MOV, 2, UQ, 8, 1, UD, 4, 0, 1, 0, true ), 2277 2278 INST(MUL, 4, D, 4, 1, D, 0, 0, 1, 0, true ), 2279 INST(MUL, 4, UD, 4, 1, UD, 0, 0, 1, 0, true ), 2280 2281 INST(MUL, 4, D, 0, 1, D, 4, 0, 1, 0, true ), 2282 INST(MUL, 4, UD, 0, 1, UD, 4, 0, 1, 0, true ), 2283 2284 #undef INST 2285 }; 2286 2287 /* These restrictions only apply to Gfx8+ */ 2288 if (devinfo.ver < 8) 2289 return; 2290 2291 /* NoDDChk/NoDDClr does not exist on Gfx12+ */ 2292 if (devinfo.ver >= 12) 2293 return; 2294 2295 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 2296 if (!devinfo.has_64bit_float && 2297 (inst[i].dst_type == BRW_TYPE_DF || 2298 inst[i].src_type == BRW_TYPE_DF)) 2299 continue; 2300 2301 if (!devinfo.has_64bit_int && 2302 (inst[i].dst_type == BRW_TYPE_Q || 2303 inst[i].dst_type == BRW_TYPE_UQ || 2304 inst[i].src_type == BRW_TYPE_Q || 2305 inst[i].src_type == BRW_TYPE_UQ)) 2306 continue; 2307 2308 if (inst[i].opcode == BRW_OPCODE_MOV) { 2309 brw_MOV(p, retype(g0, inst[i].dst_type), 2310 retype(g0, inst[i].src_type)); 2311 } else { 2312 assert(inst[i].opcode == BRW_OPCODE_MUL); 2313 brw_MUL(p, retype(g0, inst[i].dst_type), 2314 retype(g0, inst[i].src_type), 2315 retype(zero, inst[i].src_type)); 2316 } 2317 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); 2318 2319 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subreg); 2320 brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].src_subreg); 2321 2322 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); 2323 2324 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride); 2325 brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width); 2326 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride); 2327 2328 if (devinfo.platform == INTEL_PLATFORM_CHV || 2329 intel_device_info_is_9lp(&devinfo)) { 2330 EXPECT_EQ(inst[i].expected_result, validate(p)); 2331 } else { 2332 EXPECT_TRUE(validate(p)); 2333 } 2334 2335 clear_instructions(p); 2336 } 2337 } 2338 TEST_P(validation_test,qword_low_power_no_indirect_addressing)2339 TEST_P(validation_test, qword_low_power_no_indirect_addressing) 2340 { 2341 static const struct { 2342 enum opcode opcode; 2343 unsigned exec_size; 2344 2345 enum brw_reg_type dst_type; 2346 bool dst_is_indirect; 2347 unsigned dst_stride; 2348 2349 enum brw_reg_type src_type; 2350 bool src_is_indirect; 2351 unsigned src_vstride; 2352 unsigned src_width; 2353 unsigned src_hstride; 2354 2355 bool expected_result; 2356 } inst[] = { 2357 #define INST(opcode, exec_size, dst_type, dst_is_indirect, dst_stride, \ 2358 src_type, src_is_indirect, src_vstride, src_width, src_hstride, \ 2359 expected_result) \ 2360 { \ 2361 BRW_OPCODE_##opcode, \ 2362 BRW_EXECUTE_##exec_size, \ 2363 BRW_TYPE_##dst_type, \ 2364 dst_is_indirect, \ 2365 BRW_HORIZONTAL_STRIDE_##dst_stride, \ 2366 BRW_TYPE_##src_type, \ 2367 src_is_indirect, \ 2368 BRW_VERTICAL_STRIDE_##src_vstride, \ 2369 BRW_WIDTH_##src_width, \ 2370 BRW_HORIZONTAL_STRIDE_##src_hstride, \ 2371 expected_result, \ 2372 } 2373 2374 /* Some instruction that violate no restrictions, as a control */ 2375 INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ), 2376 INST(MOV, 4, Q, 0, 1, Q, 0, 4, 4, 1, true ), 2377 INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ), 2378 2379 INST(MUL, 8, D, 0, 2, D, 0, 8, 4, 2, true ), 2380 INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ), 2381 2382 INST(MOV, 4, F, 1, 1, F, 0, 4, 4, 1, true ), 2383 INST(MOV, 4, F, 0, 1, F, 1, 4, 4, 1, true ), 2384 INST(MOV, 4, F, 1, 1, F, 1, 4, 4, 1, true ), 2385 2386 /* The PRMs say that for CHV, BXT: 2387 * 2388 * When source or destination datatype is 64b or operation is integer 2389 * DWord multiply, indirect addressing must not be used. 2390 */ 2391 INST(MOV, 4, DF, 1, 1, DF, 0, 4, 4, 1, false), 2392 INST(MOV, 4, Q, 1, 1, Q, 0, 4, 4, 1, false), 2393 INST(MOV, 4, UQ, 1, 1, UQ, 0, 4, 4, 1, false), 2394 2395 INST(MOV, 4, DF, 0, 1, DF, 1, 4, 4, 1, false), 2396 INST(MOV, 4, Q, 0, 1, Q, 1, 4, 4, 1, false), 2397 INST(MOV, 4, UQ, 0, 1, UQ, 1, 4, 4, 1, false), 2398 2399 INST(MOV, 4, DF, 1, 1, F, 0, 8, 4, 2, false), 2400 INST(MOV, 4, Q, 1, 1, D, 0, 8, 4, 2, false), 2401 INST(MOV, 4, UQ, 1, 1, UD, 0, 8, 4, 2, false), 2402 2403 INST(MOV, 4, DF, 0, 1, F, 1, 8, 4, 2, false), 2404 INST(MOV, 4, Q, 0, 1, D, 1, 8, 4, 2, false), 2405 INST(MOV, 4, UQ, 0, 1, UD, 1, 8, 4, 2, false), 2406 2407 INST(MOV, 4, F, 1, 2, DF, 0, 4, 4, 1, false), 2408 INST(MOV, 4, D, 1, 2, Q, 0, 4, 4, 1, false), 2409 INST(MOV, 4, UD, 1, 2, UQ, 0, 4, 4, 1, false), 2410 2411 INST(MOV, 4, F, 0, 2, DF, 1, 4, 4, 1, false), 2412 INST(MOV, 4, D, 0, 2, Q, 1, 4, 4, 1, false), 2413 INST(MOV, 4, UD, 0, 2, UQ, 1, 4, 4, 1, false), 2414 2415 INST(MUL, 8, D, 1, 2, D, 0, 8, 4, 2, false), 2416 INST(MUL, 8, UD, 1, 2, UD, 0, 8, 4, 2, false), 2417 2418 INST(MUL, 8, D, 0, 2, D, 1, 8, 4, 2, false), 2419 INST(MUL, 8, UD, 0, 2, UD, 1, 8, 4, 2, false), 2420 2421 #undef INST 2422 }; 2423 2424 /* These restrictions only apply to Gfx8+ */ 2425 if (devinfo.ver < 8) 2426 return; 2427 2428 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 2429 if (!devinfo.has_64bit_float && 2430 (inst[i].dst_type == BRW_TYPE_DF || 2431 inst[i].src_type == BRW_TYPE_DF)) 2432 continue; 2433 2434 if (!devinfo.has_64bit_int && 2435 (inst[i].dst_type == BRW_TYPE_Q || 2436 inst[i].dst_type == BRW_TYPE_UQ || 2437 inst[i].src_type == BRW_TYPE_Q || 2438 inst[i].src_type == BRW_TYPE_UQ)) 2439 continue; 2440 2441 if (inst[i].opcode == BRW_OPCODE_MOV) { 2442 brw_MOV(p, retype(g0, inst[i].dst_type), 2443 retype(g0, inst[i].src_type)); 2444 } else { 2445 assert(inst[i].opcode == BRW_OPCODE_MUL); 2446 brw_MUL(p, retype(g0, inst[i].dst_type), 2447 retype(g0, inst[i].src_type), 2448 retype(zero, inst[i].src_type)); 2449 } 2450 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); 2451 2452 brw_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_is_indirect); 2453 brw_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src_is_indirect); 2454 2455 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); 2456 2457 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride); 2458 brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width); 2459 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride); 2460 2461 if (devinfo.platform == INTEL_PLATFORM_CHV || 2462 intel_device_info_is_9lp(&devinfo)) { 2463 EXPECT_EQ(inst[i].expected_result, validate(p)); 2464 } else { 2465 EXPECT_TRUE(validate(p)); 2466 } 2467 2468 clear_instructions(p); 2469 } 2470 } 2471 TEST_P(validation_test,qword_low_power_no_64bit_arf)2472 TEST_P(validation_test, qword_low_power_no_64bit_arf) 2473 { 2474 static const struct { 2475 enum opcode opcode; 2476 unsigned exec_size; 2477 2478 struct brw_reg dst; 2479 enum brw_reg_type dst_type; 2480 unsigned dst_stride; 2481 2482 struct brw_reg src; 2483 enum brw_reg_type src_type; 2484 unsigned src_vstride; 2485 unsigned src_width; 2486 unsigned src_hstride; 2487 2488 bool acc_wr; 2489 bool expected_result; 2490 } inst[] = { 2491 #define INST(opcode, exec_size, dst, dst_type, dst_stride, \ 2492 src, src_type, src_vstride, src_width, src_hstride, \ 2493 acc_wr, expected_result) \ 2494 { \ 2495 BRW_OPCODE_##opcode, \ 2496 BRW_EXECUTE_##exec_size, \ 2497 dst, \ 2498 BRW_TYPE_##dst_type, \ 2499 BRW_HORIZONTAL_STRIDE_##dst_stride, \ 2500 src, \ 2501 BRW_TYPE_##src_type, \ 2502 BRW_VERTICAL_STRIDE_##src_vstride, \ 2503 BRW_WIDTH_##src_width, \ 2504 BRW_HORIZONTAL_STRIDE_##src_hstride, \ 2505 acc_wr, \ 2506 expected_result, \ 2507 } 2508 2509 /* Some instruction that violate no restrictions, as a control */ 2510 INST(MOV, 4, g0, DF, 1, g0, F, 4, 2, 2, 0, true ), 2511 INST(MOV, 4, g0, F, 2, g0, DF, 4, 4, 1, 0, true ), 2512 2513 INST(MOV, 4, g0, Q, 1, g0, D, 4, 2, 2, 0, true ), 2514 INST(MOV, 4, g0, D, 2, g0, Q, 4, 4, 1, 0, true ), 2515 2516 INST(MOV, 4, g0, UQ, 1, g0, UD, 4, 2, 2, 0, true ), 2517 INST(MOV, 4, g0, UD, 2, g0, UQ, 4, 4, 1, 0, true ), 2518 2519 INST(MOV, 4, null, F, 1, g0, F, 4, 4, 1, 0, true ), 2520 INST(MOV, 4, acc0, F, 1, g0, F, 4, 4, 1, 0, true ), 2521 INST(MOV, 4, g0, F, 1, acc0, F, 4, 4, 1, 0, true ), 2522 2523 INST(MOV, 4, null, D, 1, g0, D, 4, 4, 1, 0, true ), 2524 INST(MOV, 4, acc0, D, 1, g0, D, 4, 4, 1, 0, true ), 2525 INST(MOV, 4, g0, D, 1, acc0, D, 4, 4, 1, 0, true ), 2526 2527 INST(MOV, 4, null, UD, 1, g0, UD, 4, 4, 1, 0, true ), 2528 INST(MOV, 4, acc0, UD, 1, g0, UD, 4, 4, 1, 0, true ), 2529 INST(MOV, 4, g0, UD, 1, acc0, UD, 4, 4, 1, 0, true ), 2530 2531 INST(MUL, 4, g0, D, 2, g0, D, 4, 2, 2, 0, true ), 2532 INST(MUL, 4, g0, UD, 2, g0, UD, 4, 2, 2, 0, true ), 2533 2534 /* The PRMs say that for CHV, BXT: 2535 * 2536 * ARF registers must never be used with 64b datatype or when 2537 * operation is integer DWord multiply. 2538 */ 2539 INST(MOV, 4, acc0, DF, 1, g0, F, 4, 2, 2, 0, false), 2540 INST(MOV, 4, g0, DF, 1, acc0, F, 4, 2, 2, 0, false), 2541 2542 INST(MOV, 4, acc0, Q, 1, g0, D, 4, 2, 2, 0, false), 2543 INST(MOV, 4, g0, Q, 1, acc0, D, 4, 2, 2, 0, false), 2544 2545 INST(MOV, 4, acc0, UQ, 1, g0, UD, 4, 2, 2, 0, false), 2546 INST(MOV, 4, g0, UQ, 1, acc0, UD, 4, 2, 2, 0, false), 2547 2548 INST(MOV, 4, acc0, F, 2, g0, DF, 4, 4, 1, 0, false), 2549 INST(MOV, 4, g0, F, 2, acc0, DF, 4, 4, 1, 0, false), 2550 2551 INST(MOV, 4, acc0, D, 2, g0, Q, 4, 4, 1, 0, false), 2552 INST(MOV, 4, g0, D, 2, acc0, Q, 4, 4, 1, 0, false), 2553 2554 INST(MOV, 4, acc0, UD, 2, g0, UQ, 4, 4, 1, 0, false), 2555 INST(MOV, 4, g0, UD, 2, acc0, UQ, 4, 4, 1, 0, false), 2556 2557 INST(MUL, 4, acc0, D, 2, g0, D, 4, 2, 2, 0, false), 2558 INST(MUL, 4, acc0, UD, 2, g0, UD, 4, 2, 2, 0, false), 2559 /* MUL cannot have integer accumulator sources, so don't test that */ 2560 2561 /* We assume that the restriction does not apply to the null register */ 2562 INST(MOV, 4, null, DF, 1, g0, F, 4, 2, 2, 0, true ), 2563 INST(MOV, 4, null, Q, 1, g0, D, 4, 2, 2, 0, true ), 2564 INST(MOV, 4, null, UQ, 1, g0, UD, 4, 2, 2, 0, true ), 2565 2566 /* Check implicit accumulator write control */ 2567 INST(MOV, 4, null, DF, 1, g0, F, 4, 2, 2, 1, false), 2568 INST(MUL, 4, null, DF, 1, g0, F, 4, 2, 2, 1, false), 2569 2570 #undef INST 2571 }; 2572 2573 /* These restrictions only apply to Gfx8+ */ 2574 if (devinfo.ver < 8) 2575 return; 2576 2577 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 2578 if (!devinfo.has_64bit_float && 2579 (inst[i].dst_type == BRW_TYPE_DF || 2580 inst[i].src_type == BRW_TYPE_DF)) 2581 continue; 2582 2583 if (!devinfo.has_64bit_int && 2584 (inst[i].dst_type == BRW_TYPE_Q || 2585 inst[i].dst_type == BRW_TYPE_UQ || 2586 inst[i].src_type == BRW_TYPE_Q || 2587 inst[i].src_type == BRW_TYPE_UQ)) 2588 continue; 2589 2590 if (inst[i].opcode == BRW_OPCODE_MOV) { 2591 brw_MOV(p, retype(inst[i].dst, inst[i].dst_type), 2592 retype(inst[i].src, inst[i].src_type)); 2593 } else { 2594 assert(inst[i].opcode == BRW_OPCODE_MUL); 2595 brw_MUL(p, retype(inst[i].dst, inst[i].dst_type), 2596 retype(inst[i].src, inst[i].src_type), 2597 retype(zero, inst[i].src_type)); 2598 brw_inst_set_opcode(&isa, last_inst, inst[i].opcode); 2599 } 2600 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); 2601 brw_inst_set_acc_wr_control(&devinfo, last_inst, inst[i].acc_wr); 2602 2603 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); 2604 2605 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride); 2606 brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width); 2607 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride); 2608 2609 /* Note: The Broadwell PRM also lists the restriction that destination 2610 * of DWord multiplication cannot be the accumulator. 2611 */ 2612 if (devinfo.platform == INTEL_PLATFORM_CHV || 2613 intel_device_info_is_9lp(&devinfo) || 2614 (devinfo.ver == 8 && 2615 inst[i].opcode == BRW_OPCODE_MUL && 2616 brw_inst_dst_reg_file(&devinfo, last_inst) == ARF && 2617 brw_inst_dst_da_reg_nr(&devinfo, last_inst) != BRW_ARF_NULL)) { 2618 EXPECT_EQ(inst[i].expected_result, validate(p)); 2619 } else { 2620 EXPECT_TRUE(validate(p)); 2621 } 2622 2623 clear_instructions(p); 2624 } 2625 2626 if (!devinfo.has_64bit_float) 2627 return; 2628 2629 /* MAC implicitly reads the accumulator */ 2630 brw_MAC(p, retype(g0, BRW_TYPE_DF), 2631 retype(stride(g0, 4, 4, 1), BRW_TYPE_DF), 2632 retype(stride(g0, 4, 4, 1), BRW_TYPE_DF)); 2633 if (devinfo.platform == INTEL_PLATFORM_CHV || 2634 intel_device_info_is_9lp(&devinfo)) { 2635 EXPECT_FALSE(validate(p)); 2636 } else { 2637 EXPECT_TRUE(validate(p)); 2638 } 2639 } 2640 TEST_P(validation_test,align16_64_bit_integer)2641 TEST_P(validation_test, align16_64_bit_integer) 2642 { 2643 static const struct { 2644 enum opcode opcode; 2645 unsigned exec_size; 2646 2647 enum brw_reg_type dst_type; 2648 enum brw_reg_type src_type; 2649 2650 bool expected_result; 2651 } inst[] = { 2652 #define INST(opcode, exec_size, dst_type, src_type, expected_result) \ 2653 { \ 2654 BRW_OPCODE_##opcode, \ 2655 BRW_EXECUTE_##exec_size, \ 2656 BRW_TYPE_##dst_type, \ 2657 BRW_TYPE_##src_type, \ 2658 expected_result, \ 2659 } 2660 2661 /* Some instruction that violate no restrictions, as a control */ 2662 INST(MOV, 2, Q, D, true ), 2663 INST(MOV, 2, UQ, UD, true ), 2664 INST(MOV, 2, DF, F, true ), 2665 2666 INST(ADD, 2, Q, D, true ), 2667 INST(ADD, 2, UQ, UD, true ), 2668 INST(ADD, 2, DF, F, true ), 2669 2670 /* The PRMs say that for BDW, SKL: 2671 * 2672 * If Align16 is required for an operation with QW destination and non-QW 2673 * source datatypes, the execution size cannot exceed 2. 2674 */ 2675 2676 INST(MOV, 4, Q, D, false), 2677 INST(MOV, 4, UQ, UD, false), 2678 INST(MOV, 4, DF, F, false), 2679 2680 INST(ADD, 4, Q, D, false), 2681 INST(ADD, 4, UQ, UD, false), 2682 INST(ADD, 4, DF, F, false), 2683 2684 #undef INST 2685 }; 2686 2687 /* 64-bit integer types exist on Gfx8+ */ 2688 if (devinfo.ver < 8) 2689 return; 2690 2691 /* Align16 does not exist on Gfx11+ */ 2692 if (devinfo.ver >= 11) 2693 return; 2694 2695 brw_set_default_access_mode(p, BRW_ALIGN_16); 2696 2697 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 2698 if (inst[i].opcode == BRW_OPCODE_MOV) { 2699 brw_MOV(p, retype(g0, inst[i].dst_type), 2700 retype(g0, inst[i].src_type)); 2701 } else { 2702 assert(inst[i].opcode == BRW_OPCODE_ADD); 2703 brw_ADD(p, retype(g0, inst[i].dst_type), 2704 retype(g0, inst[i].src_type), 2705 retype(g0, inst[i].src_type)); 2706 } 2707 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); 2708 2709 EXPECT_EQ(inst[i].expected_result, validate(p)); 2710 2711 clear_instructions(p); 2712 } 2713 } 2714 TEST_P(validation_test,qword_low_power_no_depctrl)2715 TEST_P(validation_test, qword_low_power_no_depctrl) 2716 { 2717 static const struct { 2718 enum opcode opcode; 2719 unsigned exec_size; 2720 2721 enum brw_reg_type dst_type; 2722 unsigned dst_stride; 2723 2724 enum brw_reg_type src_type; 2725 unsigned src_vstride; 2726 unsigned src_width; 2727 unsigned src_hstride; 2728 2729 bool no_dd_check; 2730 bool no_dd_clear; 2731 2732 bool expected_result; 2733 } inst[] = { 2734 #define INST(opcode, exec_size, dst_type, dst_stride, \ 2735 src_type, src_vstride, src_width, src_hstride, \ 2736 no_dd_check, no_dd_clear, expected_result) \ 2737 { \ 2738 BRW_OPCODE_##opcode, \ 2739 BRW_EXECUTE_##exec_size, \ 2740 BRW_TYPE_##dst_type, \ 2741 BRW_HORIZONTAL_STRIDE_##dst_stride, \ 2742 BRW_TYPE_##src_type, \ 2743 BRW_VERTICAL_STRIDE_##src_vstride, \ 2744 BRW_WIDTH_##src_width, \ 2745 BRW_HORIZONTAL_STRIDE_##src_hstride, \ 2746 no_dd_check, \ 2747 no_dd_clear, \ 2748 expected_result, \ 2749 } 2750 2751 /* Some instruction that violate no restrictions, as a control */ 2752 INST(MOV, 4, DF, 1, F, 8, 4, 2, 0, 0, true ), 2753 INST(MOV, 4, Q, 1, D, 8, 4, 2, 0, 0, true ), 2754 INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 0, true ), 2755 2756 INST(MOV, 4, F, 2, DF, 4, 4, 1, 0, 0, true ), 2757 INST(MOV, 4, D, 2, Q, 4, 4, 1, 0, 0, true ), 2758 INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 0, true ), 2759 2760 INST(MUL, 8, D, 2, D, 8, 4, 2, 0, 0, true ), 2761 INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 0, true ), 2762 2763 INST(MOV, 4, F, 1, F, 4, 4, 1, 1, 1, true ), 2764 2765 /* The PRMs say that for CHV, BXT: 2766 * 2767 * When source or destination datatype is 64b or operation is integer 2768 * DWord multiply, DepCtrl must not be used. 2769 */ 2770 INST(MOV, 4, DF, 1, F, 8, 4, 2, 1, 0, false), 2771 INST(MOV, 4, Q, 1, D, 8, 4, 2, 1, 0, false), 2772 INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 1, 0, false), 2773 2774 INST(MOV, 4, F, 2, DF, 4, 4, 1, 1, 0, false), 2775 INST(MOV, 4, D, 2, Q, 4, 4, 1, 1, 0, false), 2776 INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 1, 0, false), 2777 2778 INST(MOV, 4, DF, 1, F, 8, 4, 2, 0, 1, false), 2779 INST(MOV, 4, Q, 1, D, 8, 4, 2, 0, 1, false), 2780 INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 1, false), 2781 2782 INST(MOV, 4, F, 2, DF, 4, 4, 1, 0, 1, false), 2783 INST(MOV, 4, D, 2, Q, 4, 4, 1, 0, 1, false), 2784 INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 1, false), 2785 2786 INST(MUL, 8, D, 2, D, 8, 4, 2, 1, 0, false), 2787 INST(MUL, 8, UD, 2, UD, 8, 4, 2, 1, 0, false), 2788 2789 INST(MUL, 8, D, 2, D, 8, 4, 2, 0, 1, false), 2790 INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 1, false), 2791 2792 #undef INST 2793 }; 2794 2795 /* These restrictions only apply to Gfx8+ */ 2796 if (devinfo.ver < 8) 2797 return; 2798 2799 /* NoDDChk/NoDDClr does not exist on Gfx12+ */ 2800 if (devinfo.ver >= 12) 2801 return; 2802 2803 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 2804 if (!devinfo.has_64bit_float && 2805 (inst[i].dst_type == BRW_TYPE_DF || 2806 inst[i].src_type == BRW_TYPE_DF)) 2807 continue; 2808 2809 if (!devinfo.has_64bit_int && 2810 (inst[i].dst_type == BRW_TYPE_Q || 2811 inst[i].dst_type == BRW_TYPE_UQ || 2812 inst[i].src_type == BRW_TYPE_Q || 2813 inst[i].src_type == BRW_TYPE_UQ)) 2814 continue; 2815 2816 if (inst[i].opcode == BRW_OPCODE_MOV) { 2817 brw_MOV(p, retype(g0, inst[i].dst_type), 2818 retype(g0, inst[i].src_type)); 2819 } else { 2820 assert(inst[i].opcode == BRW_OPCODE_MUL); 2821 brw_MUL(p, retype(g0, inst[i].dst_type), 2822 retype(g0, inst[i].src_type), 2823 retype(zero, inst[i].src_type)); 2824 } 2825 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); 2826 2827 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); 2828 2829 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride); 2830 brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width); 2831 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride); 2832 2833 brw_inst_set_no_dd_check(&devinfo, last_inst, inst[i].no_dd_check); 2834 brw_inst_set_no_dd_clear(&devinfo, last_inst, inst[i].no_dd_clear); 2835 2836 if (devinfo.platform == INTEL_PLATFORM_CHV || 2837 intel_device_info_is_9lp(&devinfo)) { 2838 EXPECT_EQ(inst[i].expected_result, validate(p)); 2839 } else { 2840 EXPECT_TRUE(validate(p)); 2841 } 2842 2843 clear_instructions(p); 2844 } 2845 } 2846 TEST_P(validation_test,gfx11_no_byte_src_1_2)2847 TEST_P(validation_test, gfx11_no_byte_src_1_2) 2848 { 2849 static const struct { 2850 enum opcode opcode; 2851 unsigned access_mode; 2852 2853 enum brw_reg_type dst_type; 2854 struct { 2855 enum brw_reg_type type; 2856 unsigned vstride; 2857 unsigned width; 2858 unsigned hstride; 2859 } srcs[3]; 2860 2861 int gfx_ver; 2862 bool expected_result; 2863 } inst[] = { 2864 #define INST(opcode, access_mode, dst_type, \ 2865 src0_type, src0_vstride, src0_width, src0_hstride, \ 2866 src1_type, src1_vstride, src1_width, src1_hstride, \ 2867 src2_type, \ 2868 gfx_ver, expected_result) \ 2869 { \ 2870 BRW_OPCODE_##opcode, \ 2871 BRW_ALIGN_##access_mode, \ 2872 BRW_TYPE_##dst_type, \ 2873 { \ 2874 { \ 2875 BRW_TYPE_##src0_type, \ 2876 BRW_VERTICAL_STRIDE_##src0_vstride, \ 2877 BRW_WIDTH_##src0_width, \ 2878 BRW_HORIZONTAL_STRIDE_##src0_hstride, \ 2879 }, \ 2880 { \ 2881 BRW_TYPE_##src1_type, \ 2882 BRW_VERTICAL_STRIDE_##src1_vstride, \ 2883 BRW_WIDTH_##src1_width, \ 2884 BRW_HORIZONTAL_STRIDE_##src1_hstride, \ 2885 }, \ 2886 { \ 2887 BRW_TYPE_##src2_type, \ 2888 }, \ 2889 }, \ 2890 gfx_ver, \ 2891 expected_result, \ 2892 } 2893 2894 /* Passes on < 11 */ 2895 INST(MOV, 16, F, B, 2, 4, 0, UD, 0, 4, 0, D, 8, true ), 2896 INST(ADD, 16, UD, F, 0, 4, 0, UB, 0, 1, 0, D, 7, true ), 2897 INST(MAD, 16, D, B, 0, 4, 0, UB, 0, 1, 0, B, 10, true ), 2898 2899 /* Fails on 11+ */ 2900 INST(MAD, 1, UB, W, 1, 1, 0, D, 0, 4, 0, B, 11, false ), 2901 INST(MAD, 1, UB, W, 1, 1, 1, UB, 1, 1, 0, W, 11, false ), 2902 INST(ADD, 1, W, W, 1, 4, 1, B, 1, 1, 0, D, 11, false ), 2903 2904 /* Passes on 11+ */ 2905 INST(MOV, 1, W, B, 8, 8, 1, D, 8, 8, 1, D, 11, true ), 2906 INST(ADD, 1, UD, B, 8, 8, 1, W, 8, 8, 1, D, 11, true ), 2907 INST(MAD, 1, B, B, 0, 1, 0, D, 0, 4, 0, W, 11, true ), 2908 2909 #undef INST 2910 }; 2911 2912 2913 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 2914 /* Skip instruction not meant for this gfx_ver. */ 2915 if (devinfo.ver != inst[i].gfx_ver) 2916 continue; 2917 2918 brw_push_insn_state(p); 2919 2920 brw_set_default_exec_size(p, BRW_EXECUTE_8); 2921 brw_set_default_access_mode(p, inst[i].access_mode); 2922 2923 switch (inst[i].opcode) { 2924 case BRW_OPCODE_MOV: 2925 brw_MOV(p, retype(g0, inst[i].dst_type), 2926 retype(g0, inst[i].srcs[0].type)); 2927 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride); 2928 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride); 2929 break; 2930 case BRW_OPCODE_ADD: 2931 brw_ADD(p, retype(g0, inst[i].dst_type), 2932 retype(g0, inst[i].srcs[0].type), 2933 retype(g0, inst[i].srcs[1].type)); 2934 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride); 2935 brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width); 2936 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride); 2937 brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].srcs[1].vstride); 2938 brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width); 2939 brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].srcs[1].hstride); 2940 break; 2941 case BRW_OPCODE_MAD: 2942 brw_MAD(p, retype(g0, inst[i].dst_type), 2943 retype(g0, inst[i].srcs[0].type), 2944 retype(g0, inst[i].srcs[1].type), 2945 retype(g0, inst[i].srcs[2].type)); 2946 brw_inst_set_3src_a1_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride); 2947 brw_inst_set_3src_a1_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride); 2948 brw_inst_set_3src_a1_src1_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride); 2949 brw_inst_set_3src_a1_src1_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride); 2950 break; 2951 default: 2952 unreachable("invalid opcode"); 2953 } 2954 2955 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); 2956 2957 brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width); 2958 brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width); 2959 2960 brw_pop_insn_state(p); 2961 2962 EXPECT_EQ(inst[i].expected_result, validate(p)); 2963 2964 clear_instructions(p); 2965 } 2966 } 2967 TEST_P(validation_test,add3_source_types)2968 TEST_P(validation_test, add3_source_types) 2969 { 2970 static const struct { 2971 enum brw_reg_type dst_type; 2972 enum brw_reg_type src0_type; 2973 enum brw_reg_type src1_type; 2974 enum brw_reg_type src2_type; 2975 bool expected_result; 2976 } inst[] = { 2977 #define INST(dst_type, src0_type, src1_type, src2_type, expected_result) \ 2978 { \ 2979 BRW_TYPE_##dst_type, \ 2980 BRW_TYPE_##src0_type, \ 2981 BRW_TYPE_##src1_type, \ 2982 BRW_TYPE_##src2_type, \ 2983 expected_result, \ 2984 } 2985 2986 INST( F, F, F, F, false), 2987 INST(HF, HF, HF, HF, false), 2988 INST( B, B, B, B, false), 2989 INST(UB, UB, UB, UB, false), 2990 2991 INST( W, W, W, W, true), 2992 INST(UW, UW, UW, UW, true), 2993 INST( D, D, D, D, true), 2994 INST(UD, UD, UD, UD, true), 2995 2996 INST( W, D, W, W, true), 2997 INST(UW, UW, UD, UW, true), 2998 INST( D, D, W, D, true), 2999 INST(UD, UD, UD, UW, true), 3000 #undef INST 3001 }; 3002 3003 3004 if (devinfo.verx10 < 125) 3005 return; 3006 3007 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 3008 brw_ADD3(p, 3009 retype(g0, inst[i].dst_type), 3010 retype(g0, inst[i].src0_type), 3011 retype(g0, inst[i].src1_type), 3012 retype(g0, inst[i].src2_type)); 3013 3014 EXPECT_EQ(inst[i].expected_result, validate(p)); 3015 3016 clear_instructions(p); 3017 } 3018 } 3019 TEST_P(validation_test,add3_immediate_types)3020 TEST_P(validation_test, add3_immediate_types) 3021 { 3022 static const struct { 3023 enum brw_reg_type reg_type; 3024 enum brw_reg_type imm_type; 3025 unsigned imm_src; 3026 bool expected_result; 3027 } inst[] = { 3028 #define INST(reg_type, imm_type, imm_src, expected_result) \ 3029 { \ 3030 BRW_TYPE_##reg_type, \ 3031 BRW_TYPE_##imm_type, \ 3032 imm_src, \ 3033 expected_result, \ 3034 } 3035 3036 INST( W, W, 0, true), 3037 INST( W, W, 2, true), 3038 INST(UW, UW, 0, true), 3039 INST(UW, UW, 2, true), 3040 INST( D, W, 0, true), 3041 INST(UD, W, 2, true), 3042 INST( D, UW, 0, true), 3043 INST(UW, UW, 2, true), 3044 3045 INST( W, D, 0, false), 3046 INST( W, D, 2, false), 3047 INST(UW, UD, 0, false), 3048 INST(UW, UD, 2, false), 3049 INST( D, D, 0, false), 3050 INST(UD, D, 2, false), 3051 INST( D, UD, 0, false), 3052 INST(UW, UD, 2, false), 3053 #undef INST 3054 }; 3055 3056 3057 if (devinfo.verx10 < 125) 3058 return; 3059 3060 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) { 3061 brw_ADD3(p, 3062 retype(g0, inst[i].reg_type), 3063 inst[i].imm_src == 0 ? retype(brw_imm_d(0x1234), inst[i].imm_type) 3064 : retype(g0, inst[i].reg_type), 3065 retype(g0, inst[i].reg_type), 3066 inst[i].imm_src == 2 ? retype(brw_imm_d(0x2143), inst[i].imm_type) 3067 : retype(g0, inst[i].reg_type)); 3068 3069 EXPECT_EQ(inst[i].expected_result, validate(p)); 3070 3071 clear_instructions(p); 3072 } 3073 } 3074 TEST_P(validation_test,dpas_sdepth)3075 TEST_P(validation_test, dpas_sdepth) 3076 { 3077 if (devinfo.verx10 < 125) 3078 return; 3079 3080 static const enum gfx12_systolic_depth depth[] = { 3081 BRW_SYSTOLIC_DEPTH_16, 3082 BRW_SYSTOLIC_DEPTH_2, 3083 BRW_SYSTOLIC_DEPTH_4, 3084 BRW_SYSTOLIC_DEPTH_8, 3085 }; 3086 3087 for (unsigned i = 0; i < ARRAY_SIZE(depth); i++) { 3088 brw_DPAS(p, 3089 depth[i], 3090 8, 3091 retype(brw_vec8_grf(0, 0), BRW_TYPE_F), 3092 null, 3093 retype(brw_vec8_grf(16, 0), BRW_TYPE_HF), 3094 retype(brw_vec8_grf(32, 0), BRW_TYPE_HF)); 3095 3096 const bool expected_result = depth[i] == BRW_SYSTOLIC_DEPTH_8; 3097 3098 EXPECT_EQ(expected_result, validate(p)) << 3099 "Encoded systolic depth value is: " << depth[i]; 3100 3101 clear_instructions(p); 3102 } 3103 } 3104 TEST_P(validation_test,dpas_exec_size)3105 TEST_P(validation_test, dpas_exec_size) 3106 { 3107 if (devinfo.verx10 < 125) 3108 return; 3109 3110 static const enum brw_execution_size test_vectors[] = { 3111 BRW_EXECUTE_1, 3112 BRW_EXECUTE_2, 3113 BRW_EXECUTE_4, 3114 BRW_EXECUTE_8, 3115 BRW_EXECUTE_16, 3116 BRW_EXECUTE_32, 3117 }; 3118 3119 for (unsigned i = 0; i < ARRAY_SIZE(test_vectors); i++) { 3120 brw_set_default_exec_size(p, test_vectors[i]); 3121 3122 brw_DPAS(p, 3123 BRW_SYSTOLIC_DEPTH_8, 3124 8, 3125 retype(brw_vec8_grf(0, 0), BRW_TYPE_F), 3126 null, 3127 retype(brw_vec8_grf(16, 0), BRW_TYPE_HF), 3128 retype(brw_vec8_grf(32, 0), BRW_TYPE_HF)); 3129 3130 const bool expected_result = test_vectors[i] == BRW_EXECUTE_8; 3131 3132 EXPECT_EQ(expected_result, validate(p)) << 3133 "Exec size = " << (1u << test_vectors[i]); 3134 3135 clear_instructions(p); 3136 } 3137 3138 brw_set_default_exec_size(p, BRW_EXECUTE_8); 3139 } 3140 TEST_P(validation_test,dpas_sub_byte_precision)3141 TEST_P(validation_test, dpas_sub_byte_precision) 3142 { 3143 if (devinfo.verx10 < 125) 3144 return; 3145 3146 static const struct { 3147 brw_reg_type dst_type; 3148 brw_reg_type src0_type; 3149 brw_reg_type src1_type; 3150 enum gfx12_sub_byte_precision src1_prec; 3151 brw_reg_type src2_type; 3152 enum gfx12_sub_byte_precision src2_prec; 3153 bool expected_result; 3154 } test_vectors[] = { 3155 { 3156 BRW_TYPE_F, 3157 BRW_TYPE_F, 3158 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE, 3159 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE, 3160 true, 3161 }, 3162 { 3163 BRW_TYPE_F, 3164 BRW_TYPE_F, 3165 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE, 3166 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_4BIT, 3167 false, 3168 }, 3169 { 3170 BRW_TYPE_F, 3171 BRW_TYPE_F, 3172 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE, 3173 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_2BIT, 3174 false, 3175 }, 3176 { 3177 BRW_TYPE_F, 3178 BRW_TYPE_F, 3179 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_4BIT, 3180 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE, 3181 false, 3182 }, 3183 { 3184 BRW_TYPE_F, 3185 BRW_TYPE_F, 3186 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_2BIT, 3187 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE, 3188 false, 3189 }, 3190 3191 { 3192 BRW_TYPE_UD, 3193 BRW_TYPE_UD, 3194 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE, 3195 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE, 3196 true, 3197 }, 3198 { 3199 BRW_TYPE_UD, 3200 BRW_TYPE_UD, 3201 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE, 3202 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_4BIT, 3203 true, 3204 }, 3205 { 3206 BRW_TYPE_UD, 3207 BRW_TYPE_UD, 3208 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE, 3209 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_2BIT, 3210 true, 3211 }, 3212 { 3213 BRW_TYPE_UD, 3214 BRW_TYPE_UD, 3215 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE, 3216 BRW_TYPE_UB, (enum gfx12_sub_byte_precision) 3, 3217 false, 3218 }, 3219 { 3220 BRW_TYPE_UD, 3221 BRW_TYPE_UD, 3222 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_4BIT, 3223 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE, 3224 true, 3225 }, 3226 { 3227 BRW_TYPE_UD, 3228 BRW_TYPE_UD, 3229 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_2BIT, 3230 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE, 3231 true, 3232 }, 3233 { 3234 BRW_TYPE_UD, 3235 BRW_TYPE_UD, 3236 BRW_TYPE_UB, (enum gfx12_sub_byte_precision) 3, 3237 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE, 3238 false, 3239 }, 3240 }; 3241 3242 for (unsigned i = 0; i < ARRAY_SIZE(test_vectors); i++) { 3243 brw_inst *inst = 3244 brw_DPAS(p, 3245 BRW_SYSTOLIC_DEPTH_8, 3246 8, 3247 retype(brw_vec8_grf(0, 0), test_vectors[i].dst_type), 3248 retype(brw_vec8_grf(16, 0), test_vectors[i].src0_type), 3249 retype(brw_vec8_grf(32, 0), test_vectors[i].src1_type), 3250 retype(brw_vec8_grf(48, 0), test_vectors[i].src2_type)); 3251 3252 brw_inst_set_dpas_3src_src1_subbyte(&devinfo, inst, 3253 test_vectors[i].src1_prec); 3254 brw_inst_set_dpas_3src_src2_subbyte(&devinfo, inst, 3255 test_vectors[i].src2_prec); 3256 3257 EXPECT_EQ(test_vectors[i].expected_result, validate(p)) << 3258 "test vector index = " << i; 3259 3260 clear_instructions(p); 3261 } 3262 } 3263 TEST_P(validation_test,dpas_types)3264 TEST_P(validation_test, dpas_types) 3265 { 3266 if (devinfo.verx10 < 125) 3267 return; 3268 3269 #define TV(a, b, c, d, r) \ 3270 { BRW_TYPE_ ## a, BRW_TYPE_ ## b, BRW_TYPE_ ## c, BRW_TYPE_ ## d, r } 3271 3272 static const struct { 3273 brw_reg_type dst_type; 3274 brw_reg_type src0_type; 3275 brw_reg_type src1_type; 3276 brw_reg_type src2_type; 3277 bool expected_result; 3278 } test_vectors[] = { 3279 TV( F, F, HF, HF, true), 3280 TV( F, HF, HF, HF, false), 3281 TV(HF, F, HF, HF, false), 3282 TV( F, F, F, HF, false), 3283 TV( F, F, HF, F, false), 3284 3285 TV(DF, DF, DF, DF, false), 3286 TV(DF, DF, DF, F, false), 3287 TV(DF, DF, F, DF, false), 3288 TV(DF, F, DF, DF, false), 3289 TV(DF, DF, DF, HF, false), 3290 TV(DF, DF, HF, DF, false), 3291 TV(DF, HF, DF, DF, false), 3292 3293 TV(UD, UD, UB, UB, true), 3294 TV(UD, UD, UB, UD, false), 3295 TV(UD, UD, UD, UB, false), 3296 TV(UD, UD, UB, UW, false), 3297 TV(UD, UD, UW, UB, false), 3298 3299 TV(UD, UB, UB, UB, false), 3300 TV(UD, UW, UB, UB, false), 3301 3302 TV(UQ, UQ, UB, UB, false), 3303 TV(UQ, UQ, UB, UQ, false), 3304 TV(UQ, UQ, UQ, UB, false), 3305 TV(UQ, UQ, UB, UW, false), 3306 TV(UQ, UQ, UW, UB, false), 3307 3308 TV( D, D, B, B, true), 3309 TV( D, D, B, UB, true), 3310 TV( D, D, UB, B, true), 3311 TV( D, UD, B, B, true), 3312 3313 TV( D, D, B, D, false), 3314 TV( D, D, D, B, false), 3315 TV( D, D, B, W, false), 3316 TV( D, D, W, B, false), 3317 3318 TV( D, B, B, B, false), 3319 TV( D, W, B, B, false), 3320 3321 TV( Q, Q, B, B, false), 3322 TV( Q, Q, B, Q, false), 3323 TV( Q, Q, Q, B, false), 3324 TV( Q, Q, B, W, false), 3325 TV( Q, Q, W, B, false), 3326 3327 TV(UD, UD, UB, B, false), 3328 TV(UD, UD, B, UB, false), 3329 TV(UD, D, UB, UB, false), 3330 }; 3331 3332 #undef TV 3333 3334 for (unsigned i = 0; i < ARRAY_SIZE(test_vectors); i++) { 3335 brw_DPAS(p, 3336 BRW_SYSTOLIC_DEPTH_8, 3337 8, 3338 retype(brw_vec8_grf(0, 0), test_vectors[i].dst_type), 3339 retype(brw_vec8_grf(16, 0), test_vectors[i].src0_type), 3340 retype(brw_vec8_grf(32, 0), test_vectors[i].src1_type), 3341 retype(brw_vec8_grf(48, 0), test_vectors[i].src2_type)); 3342 3343 EXPECT_EQ(test_vectors[i].expected_result, validate(p)) << 3344 "test vector index = " << i; 3345 3346 clear_instructions(p); 3347 } 3348 } 3349 TEST_P(validation_test,dpas_src_subreg_nr)3350 TEST_P(validation_test, dpas_src_subreg_nr) 3351 { 3352 if (devinfo.verx10 < 125) 3353 return; 3354 3355 #define TV(dt, od, t0, o0, t1, o1, o2, r) \ 3356 { BRW_TYPE_ ## dt, od, BRW_TYPE_ ## t0, o0, BRW_TYPE_ ## t1, o1, o2, r } 3357 3358 static const struct { 3359 brw_reg_type dst_type; 3360 unsigned dst_subnr; 3361 brw_reg_type src0_type; 3362 unsigned src0_subnr; 3363 brw_reg_type src1_src2_type; 3364 unsigned src1_subnr; 3365 unsigned src2_subnr; 3366 bool expected_result; 3367 } test_vectors[] = { 3368 TV( F, 0, F, 0, HF, 0, 0, true), 3369 TV( D, 0, D, 0, B, 0, 0, true), 3370 TV( D, 0, D, 0, UB, 0, 0, true), 3371 TV( D, 0, UD, 0, B, 0, 0, true), 3372 3373 TV( F, 1, F, 0, HF, 0, 0, false), 3374 TV( F, 2, F, 0, HF, 0, 0, false), 3375 TV( F, 3, F, 0, HF, 0, 0, false), 3376 TV( F, 4, F, 0, HF, 0, 0, false), 3377 TV( F, 5, F, 0, HF, 0, 0, false), 3378 TV( F, 6, F, 0, HF, 0, 0, false), 3379 TV( F, 7, F, 0, HF, 0, 0, false), 3380 3381 TV( F, 0, F, 1, HF, 0, 0, false), 3382 TV( F, 0, F, 2, HF, 0, 0, false), 3383 TV( F, 0, F, 3, HF, 0, 0, false), 3384 TV( F, 0, F, 4, HF, 0, 0, false), 3385 TV( F, 0, F, 5, HF, 0, 0, false), 3386 TV( F, 0, F, 6, HF, 0, 0, false), 3387 TV( F, 0, F, 7, HF, 0, 0, false), 3388 3389 TV( F, 0, F, 0, HF, 1, 0, false), 3390 TV( F, 0, F, 0, HF, 2, 0, false), 3391 TV( F, 0, F, 0, HF, 3, 0, false), 3392 TV( F, 0, F, 0, HF, 4, 0, false), 3393 TV( F, 0, F, 0, HF, 5, 0, false), 3394 TV( F, 0, F, 0, HF, 6, 0, false), 3395 TV( F, 0, F, 0, HF, 7, 0, false), 3396 TV( F, 0, F, 0, HF, 8, 0, false), 3397 TV( F, 0, F, 0, HF, 9, 0, false), 3398 TV( F, 0, F, 0, HF, 10, 0, false), 3399 TV( F, 0, F, 0, HF, 11, 0, false), 3400 TV( F, 0, F, 0, HF, 12, 0, false), 3401 TV( F, 0, F, 0, HF, 13, 0, false), 3402 TV( F, 0, F, 0, HF, 14, 0, false), 3403 TV( F, 0, F, 0, HF, 15, 0, false), 3404 3405 TV( F, 0, F, 0, HF, 0, 1, false), 3406 TV( F, 0, F, 0, HF, 0, 2, false), 3407 TV( F, 0, F, 0, HF, 0, 3, false), 3408 TV( F, 0, F, 0, HF, 0, 4, false), 3409 TV( F, 0, F, 0, HF, 0, 5, false), 3410 TV( F, 0, F, 0, HF, 0, 6, false), 3411 TV( F, 0, F, 0, HF, 0, 7, false), 3412 TV( F, 0, F, 0, HF, 0, 8, false), 3413 TV( F, 0, F, 0, HF, 0, 9, false), 3414 TV( F, 0, F, 0, HF, 0, 10, false), 3415 TV( F, 0, F, 0, HF, 0, 11, false), 3416 TV( F, 0, F, 0, HF, 0, 12, false), 3417 TV( F, 0, F, 0, HF, 0, 13, false), 3418 TV( F, 0, F, 0, HF, 0, 14, false), 3419 TV( F, 0, F, 0, HF, 0, 15, false), 3420 3421 /* These meet the requirements, but they specify a subnr that is part of 3422 * the next register. It is currently not possible to specify a subnr of 3423 * 32 for the B and UB values because brw_reg::subnr is only 5 bits. 3424 */ 3425 TV( F, 16, F, 0, HF, 0, 0, false), 3426 TV( F, 0, F, 16, HF, 0, 0, false), 3427 TV( F, 0, F, 0, HF, 0, 16, false), 3428 3429 TV( D, 16, D, 0, B, 0, 0, false), 3430 TV( D, 0, D, 16, B, 0, 0, false), 3431 }; 3432 3433 #undef TV 3434 3435 for (unsigned i = 0; i < ARRAY_SIZE(test_vectors); i++) { 3436 struct brw_reg dst = 3437 retype(brw_vec8_grf( 0, 0), test_vectors[i].dst_type); 3438 struct brw_reg src0 = 3439 retype(brw_vec8_grf(16, 0), test_vectors[i].src0_type); 3440 struct brw_reg src1 = 3441 retype(brw_vec8_grf(32, 0), test_vectors[i].src1_src2_type); 3442 struct brw_reg src2 = 3443 retype(brw_vec8_grf(48, 0), test_vectors[i].src1_src2_type); 3444 3445 /* subnr for DPAS is in units of datatype precision instead of bytes as 3446 * it is for every other instruction. Set the value by hand instead of 3447 * using byte_offset() or similar. 3448 */ 3449 dst.subnr = test_vectors[i].dst_subnr; 3450 src0.subnr = test_vectors[i].src0_subnr; 3451 src1.subnr = test_vectors[i].src1_subnr; 3452 src2.subnr = test_vectors[i].src2_subnr; 3453 3454 brw_DPAS(p, BRW_SYSTOLIC_DEPTH_8, 8, dst, src0, src1, src2); 3455 3456 EXPECT_EQ(test_vectors[i].expected_result, validate(p)) << 3457 "test vector index = " << i; 3458 3459 clear_instructions(p); 3460 } 3461 } 3462