xref: /aosp_15_r20/external/mesa3d/src/amd/compiler/tests/test_builder.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 #include "helpers.h"
7 
8 using namespace aco;
9 
10 BEGIN_TEST(builder.v_mul_imm)
11    for (unsigned i = GFX8; i <= GFX10; i++) {
12       //>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
13       if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
14          continue;
15 
16       /* simple optimizations */
17 
18       //! p_unit_test 0, 0
19       writeout(0, bld.v_mul_imm(bld.def(v1), inputs[0], 0));
20 
21       //! p_unit_test 1, %a
22       writeout(1, bld.v_mul_imm(bld.def(v1), inputs[0], 1));
23 
24       //! v1: %res2 = v_lshlrev_b32 2, %a
25       //! p_unit_test 2, %res2
26       writeout(2, bld.v_mul_imm(bld.def(v1), inputs[0], 4));
27 
28       //! v1: %res3 = v_lshlrev_b32 31, %a
29       //! p_unit_test 3, %res3
30       writeout(3, bld.v_mul_imm(bld.def(v1), inputs[0], 2147483648u));
31 
32       /* single lshl+add/sub */
33 
34       //~gfx8! v1: %res4_tmp = v_lshlrev_b32 3, %a
35       //~gfx8! v1: %res4,  s2: %_ = v_add_co_u32 %res4_tmp, %a
36       //~gfx(9|10)! v1: %res4 = v_lshl_add_u32 %a, 3, %a
37       //! p_unit_test 4, %res4
38       writeout(4, bld.v_mul_imm(bld.def(v1), inputs[0], 9));
39 
40       //~gfx[89]! v1: %res5_tmp = v_lshlrev_b32 3, %a
41       //~gfx8! v1: %res5,  s2: %_ = v_sub_co_u32 %res5_tmp, %a
42       //~gfx9! v1: %res5 = v_sub_u32 %res5_tmp, %a
43       //~gfx10! v1: %res5 = v_mul_lo_u32 7, %a
44       //! p_unit_test 5, %res5
45       writeout(5, bld.v_mul_imm(bld.def(v1), inputs[0], 7));
46 
47       /* lshl+add optimization with literal */
48 
49       //~gfx8! v1: %res6_tmp0 = v_lshlrev_b32 2, %a
50       //~gfx8! v1: %res6_tmp1 = v_lshlrev_b32 6, %a
51       //~gfx8! v1: %res6,  s2: %_ = v_add_co_u32 %res6_tmp1, %res6_tmp0
52       //~gfx9! v1: %res6_tmp = v_lshlrev_b32 2, %a
53       //~gfx9! v1: %res6 = v_lshl_add_u32 %a, 6, %res6_tmp
54       //~gfx10! v1: %res6 = v_mul_lo_u32 0x44, %a
55       //! p_unit_test 6, %res6
56       writeout(6, bld.v_mul_imm(bld.def(v1), inputs[0], 4 | 64));
57 
58       //~gfx8! s1: %res7_tmp = p_parallelcopy 0x144
59       //~gfx8! v1: %res7 = v_mul_lo_u32 %res7_tmp, %a
60       //~gfx9! v1: %res7_tmp0 = v_lshlrev_b32 2, %a
61       //~gfx9! v1: %res7_tmp1 = v_lshl_add_u32 %a, 6, %res7_tmp0
62       //~gfx9! v1: %res7 = v_lshl_add_u32 %a, 8, %res7_tmp1
63       //~gfx10! v1: %res7 = v_mul_lo_u32 0x144, %a
64       //! p_unit_test 7, %res7
65       writeout(7, bld.v_mul_imm(bld.def(v1), inputs[0], 4 | 64 | 256));
66 
67       //~gfx8! s1: %res8_tmp = p_parallelcopy 0x944
68       //~gfx8! v1: %res8 = v_mul_lo_u32 %res8_tmp, %a
69       //~gfx9! v1: %res8_tmp0 = v_lshlrev_b32 2, %a
70       //~gfx9! v1: %res8_tmp1 = v_lshl_add_u32 %a, 6, %res8_tmp0
71       //~gfx9! v1: %res8_tmp2 = v_lshl_add_u32 %a, 8, %res8_tmp1
72       //~gfx9! v1: %res8 = v_lshl_add_u32 %a, 11, %res8_tmp2
73       //~gfx10! v1: %res8 = v_mul_lo_u32 0x944, %a
74       //! p_unit_test 8, %res8
75       writeout(8, bld.v_mul_imm(bld.def(v1), inputs[0], 4 | 64 | 256 | 2048));
76 
77       /* lshl+add optimization with inline constant */
78 
79       //~gfx8! v1: %res9_tmp0 = v_lshlrev_b32 1, %a
80       //~gfx8! v1: %res9_tmp1 = v_lshlrev_b32 2, %a
81       //~gfx8! v1: %res9,  s2: %_ = v_add_co_u32 %res9_tmp1, %res9_tmp0
82       //~gfx9! v1: %res9_tmp0 = v_lshlrev_b32 1, %a
83       //~gfx9! v1: %res9 = v_lshl_add_u32 %a, 2, %res9_tmp0
84       //~gfx10! v1: %res9 = v_mul_lo_u32 6, %a
85       //! p_unit_test 9, %res9
86       writeout(9, bld.v_mul_imm(bld.def(v1), inputs[0], 2 | 4));
87 
88       //~gfx(8|10)! v1: %res10 = v_mul_lo_u32 14, %a
89       //~gfx9! v1: %res10_tmp0 = v_lshlrev_b32 1, %a
90       //~gfx9! v1: %res10_tmp1 = v_lshl_add_u32 %a, 2, %res10_tmp0
91       //~gfx9! v1: %res10 = v_lshl_add_u32 %a, 3, %res10_tmp1
92       //! p_unit_test 10, %res10
93       writeout(10, bld.v_mul_imm(bld.def(v1), inputs[0], 2 | 4 | 8));
94 
95       //! v1: %res11 = v_mul_lo_u32 30, %a
96       //! p_unit_test 11, %res11
97       writeout(11, bld.v_mul_imm(bld.def(v1), inputs[0], 2 | 4 | 8 | 16));
98 
99       finish_opt_test();
100    }
101 END_TEST
102