xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/misched-matrix.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
2*9880d681SAndroid Build Coastguard Worker; RUN:          -misched-topdown -verify-machineinstrs \
3*9880d681SAndroid Build Coastguard Worker; RUN:     | FileCheck %s -check-prefix=TOPDOWN
4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
5*9880d681SAndroid Build Coastguard Worker; RUN:          -misched=ilpmin -verify-machineinstrs \
6*9880d681SAndroid Build Coastguard Worker; RUN:     | FileCheck %s -check-prefix=ILPMIN
7*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
8*9880d681SAndroid Build Coastguard Worker; RUN:          -misched=ilpmax -verify-machineinstrs \
9*9880d681SAndroid Build Coastguard Worker; RUN:     | FileCheck %s -check-prefix=ILPMAX
10*9880d681SAndroid Build Coastguard Worker;
11*9880d681SAndroid Build Coastguard Worker; Verify that the MI scheduler minimizes register pressure for a
12*9880d681SAndroid Build Coastguard Worker; uniform set of bottom-up subtrees (unrolled matrix multiply).
13*9880d681SAndroid Build Coastguard Worker;
14*9880d681SAndroid Build Coastguard Worker; For current top-down heuristics, ensure that some folded imulls have
15*9880d681SAndroid Build Coastguard Worker; been reordered with the stores. This tests the scheduler's cheap
16*9880d681SAndroid Build Coastguard Worker; alias analysis ability (that doesn't require any AliasAnalysis pass).
17*9880d681SAndroid Build Coastguard Worker;
18*9880d681SAndroid Build Coastguard Worker; TOPDOWN-LABEL: %for.body
19*9880d681SAndroid Build Coastguard Worker; TOPDOWN: movl %{{.*}}, (
20*9880d681SAndroid Build Coastguard Worker; TOPDOWN: imull {{[0-9]*}}(
21*9880d681SAndroid Build Coastguard Worker; TOPDOWN: movl %{{.*}}, 4(
22*9880d681SAndroid Build Coastguard Worker; TOPDOWN: imull {{[0-9]*}}(
23*9880d681SAndroid Build Coastguard Worker; TOPDOWN: movl %{{.*}}, 8(
24*9880d681SAndroid Build Coastguard Worker; TOPDOWN: movl %{{.*}}, 12(
25*9880d681SAndroid Build Coastguard Worker; TOPDOWN-LABEL: %for.end
26*9880d681SAndroid Build Coastguard Worker;
27*9880d681SAndroid Build Coastguard Worker; For -misched=ilpmin, verify that each expression subtree is
28*9880d681SAndroid Build Coastguard Worker; scheduled independently, and that the imull/adds are interleaved.
29*9880d681SAndroid Build Coastguard Worker;
30*9880d681SAndroid Build Coastguard Worker; ILPMIN-LABEL: %for.body
31*9880d681SAndroid Build Coastguard Worker; ILPMIN: movl %{{.*}}, (
32*9880d681SAndroid Build Coastguard Worker; ILPMIN: imull
33*9880d681SAndroid Build Coastguard Worker; ILPMIN: imull
34*9880d681SAndroid Build Coastguard Worker; ILPMIN: addl
35*9880d681SAndroid Build Coastguard Worker; ILPMIN: imull
36*9880d681SAndroid Build Coastguard Worker; ILPMIN: addl
37*9880d681SAndroid Build Coastguard Worker; ILPMIN: imull
38*9880d681SAndroid Build Coastguard Worker; ILPMIN: addl
39*9880d681SAndroid Build Coastguard Worker; ILPMIN: movl %{{.*}}, 4(
40*9880d681SAndroid Build Coastguard Worker; ILPMIN: imull
41*9880d681SAndroid Build Coastguard Worker; ILPMIN: imull
42*9880d681SAndroid Build Coastguard Worker; ILPMIN: addl
43*9880d681SAndroid Build Coastguard Worker; ILPMIN: imull
44*9880d681SAndroid Build Coastguard Worker; ILPMIN: addl
45*9880d681SAndroid Build Coastguard Worker; ILPMIN: imull
46*9880d681SAndroid Build Coastguard Worker; ILPMIN: addl
47*9880d681SAndroid Build Coastguard Worker; ILPMIN: movl %{{.*}}, 8(
48*9880d681SAndroid Build Coastguard Worker; ILPMIN: imull
49*9880d681SAndroid Build Coastguard Worker; ILPMIN: imull
50*9880d681SAndroid Build Coastguard Worker; ILPMIN: addl
51*9880d681SAndroid Build Coastguard Worker; ILPMIN: imull
52*9880d681SAndroid Build Coastguard Worker; ILPMIN: addl
53*9880d681SAndroid Build Coastguard Worker; ILPMIN: imull
54*9880d681SAndroid Build Coastguard Worker; ILPMIN: addl
55*9880d681SAndroid Build Coastguard Worker; ILPMIN: movl %{{.*}}, 12(
56*9880d681SAndroid Build Coastguard Worker; ILPMIN-LABEL: %for.end
57*9880d681SAndroid Build Coastguard Worker;
58*9880d681SAndroid Build Coastguard Worker; For -misched=ilpmax, verify that each expression subtree is
59*9880d681SAndroid Build Coastguard Worker; scheduled independently, and that the imull/adds are clustered.
60*9880d681SAndroid Build Coastguard Worker;
61*9880d681SAndroid Build Coastguard Worker; ILPMAX-LABEL: %for.body
62*9880d681SAndroid Build Coastguard Worker; ILPMAX: movl %{{.*}}, (
63*9880d681SAndroid Build Coastguard Worker; ILPMAX: imull
64*9880d681SAndroid Build Coastguard Worker; ILPMAX: imull
65*9880d681SAndroid Build Coastguard Worker; ILPMAX: imull
66*9880d681SAndroid Build Coastguard Worker; ILPMAX: imull
67*9880d681SAndroid Build Coastguard Worker; ILPMAX: addl
68*9880d681SAndroid Build Coastguard Worker; ILPMAX: addl
69*9880d681SAndroid Build Coastguard Worker; ILPMAX: addl
70*9880d681SAndroid Build Coastguard Worker; ILPMAX: movl %{{.*}}, 4(
71*9880d681SAndroid Build Coastguard Worker; ILPMAX: imull
72*9880d681SAndroid Build Coastguard Worker; ILPMAX: imull
73*9880d681SAndroid Build Coastguard Worker; ILPMAX: imull
74*9880d681SAndroid Build Coastguard Worker; ILPMAX: imull
75*9880d681SAndroid Build Coastguard Worker; ILPMAX: addl
76*9880d681SAndroid Build Coastguard Worker; ILPMAX: addl
77*9880d681SAndroid Build Coastguard Worker; ILPMAX: addl
78*9880d681SAndroid Build Coastguard Worker; ILPMAX: movl %{{.*}}, 8(
79*9880d681SAndroid Build Coastguard Worker; ILPMAX: imull
80*9880d681SAndroid Build Coastguard Worker; ILPMAX: imull
81*9880d681SAndroid Build Coastguard Worker; ILPMAX: imull
82*9880d681SAndroid Build Coastguard Worker; ILPMAX: imull
83*9880d681SAndroid Build Coastguard Worker; ILPMAX: addl
84*9880d681SAndroid Build Coastguard Worker; ILPMAX: addl
85*9880d681SAndroid Build Coastguard Worker; ILPMAX: addl
86*9880d681SAndroid Build Coastguard Worker; ILPMAX: movl %{{.*}}, 12(
87*9880d681SAndroid Build Coastguard Worker; ILPMAX-LABEL: %for.end
88*9880d681SAndroid Build Coastguard Worker
89*9880d681SAndroid Build Coastguard Workerdefine void @mmult([4 x i32]* noalias nocapture %m1, [4 x i32]* noalias nocapture %m2,
90*9880d681SAndroid Build Coastguard Worker[4 x i32]* noalias nocapture %m3) nounwind uwtable ssp {
91*9880d681SAndroid Build Coastguard Workerentry:
92*9880d681SAndroid Build Coastguard Worker  br label %for.body
93*9880d681SAndroid Build Coastguard Worker
94*9880d681SAndroid Build Coastguard Workerfor.body:                              ; preds = %for.body, %entry
95*9880d681SAndroid Build Coastguard Worker  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
96*9880d681SAndroid Build Coastguard Worker  %arrayidx8 = getelementptr inbounds [4 x i32], [4 x i32]* %m1, i64 %indvars.iv, i64 0
97*9880d681SAndroid Build Coastguard Worker  %tmp = load i32, i32* %arrayidx8, align 4
98*9880d681SAndroid Build Coastguard Worker  %arrayidx12 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 0, i64 0
99*9880d681SAndroid Build Coastguard Worker  %tmp1 = load i32, i32* %arrayidx12, align 4
100*9880d681SAndroid Build Coastguard Worker  %arrayidx8.1 = getelementptr inbounds [4 x i32], [4 x i32]* %m1, i64 %indvars.iv, i64 1
101*9880d681SAndroid Build Coastguard Worker  %tmp2 = load i32, i32* %arrayidx8.1, align 4
102*9880d681SAndroid Build Coastguard Worker  %arrayidx12.1 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 1, i64 0
103*9880d681SAndroid Build Coastguard Worker  %tmp3 = load i32, i32* %arrayidx12.1, align 4
104*9880d681SAndroid Build Coastguard Worker  %arrayidx8.2 = getelementptr inbounds [4 x i32], [4 x i32]* %m1, i64 %indvars.iv, i64 2
105*9880d681SAndroid Build Coastguard Worker  %tmp4 = load i32, i32* %arrayidx8.2, align 4
106*9880d681SAndroid Build Coastguard Worker  %arrayidx12.2 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 2, i64 0
107*9880d681SAndroid Build Coastguard Worker  %tmp5 = load i32, i32* %arrayidx12.2, align 4
108*9880d681SAndroid Build Coastguard Worker  %arrayidx8.3 = getelementptr inbounds [4 x i32], [4 x i32]* %m1, i64 %indvars.iv, i64 3
109*9880d681SAndroid Build Coastguard Worker  %tmp6 = load i32, i32* %arrayidx8.3, align 4
110*9880d681SAndroid Build Coastguard Worker  %arrayidx12.3 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 3, i64 0
111*9880d681SAndroid Build Coastguard Worker  %tmp8 = load i32, i32* %arrayidx8, align 4
112*9880d681SAndroid Build Coastguard Worker  %arrayidx12.137 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 0, i64 1
113*9880d681SAndroid Build Coastguard Worker  %tmp9 = load i32, i32* %arrayidx12.137, align 4
114*9880d681SAndroid Build Coastguard Worker  %tmp10 = load i32, i32* %arrayidx8.1, align 4
115*9880d681SAndroid Build Coastguard Worker  %arrayidx12.1.1 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 1, i64 1
116*9880d681SAndroid Build Coastguard Worker  %tmp11 = load i32, i32* %arrayidx12.1.1, align 4
117*9880d681SAndroid Build Coastguard Worker  %tmp12 = load i32, i32* %arrayidx8.2, align 4
118*9880d681SAndroid Build Coastguard Worker  %arrayidx12.2.1 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 2, i64 1
119*9880d681SAndroid Build Coastguard Worker  %tmp13 = load i32, i32* %arrayidx12.2.1, align 4
120*9880d681SAndroid Build Coastguard Worker  %tmp14 = load i32, i32* %arrayidx8.3, align 4
121*9880d681SAndroid Build Coastguard Worker  %arrayidx12.3.1 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 3, i64 1
122*9880d681SAndroid Build Coastguard Worker  %tmp15 = load i32, i32* %arrayidx12.3.1, align 4
123*9880d681SAndroid Build Coastguard Worker  %tmp16 = load i32, i32* %arrayidx8, align 4
124*9880d681SAndroid Build Coastguard Worker  %arrayidx12.239 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 0, i64 2
125*9880d681SAndroid Build Coastguard Worker  %tmp17 = load i32, i32* %arrayidx12.239, align 4
126*9880d681SAndroid Build Coastguard Worker  %tmp18 = load i32, i32* %arrayidx8.1, align 4
127*9880d681SAndroid Build Coastguard Worker  %arrayidx12.1.2 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 1, i64 2
128*9880d681SAndroid Build Coastguard Worker  %tmp19 = load i32, i32* %arrayidx12.1.2, align 4
129*9880d681SAndroid Build Coastguard Worker  %tmp20 = load i32, i32* %arrayidx8.2, align 4
130*9880d681SAndroid Build Coastguard Worker  %arrayidx12.2.2 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 2, i64 2
131*9880d681SAndroid Build Coastguard Worker  %tmp21 = load i32, i32* %arrayidx12.2.2, align 4
132*9880d681SAndroid Build Coastguard Worker  %tmp22 = load i32, i32* %arrayidx8.3, align 4
133*9880d681SAndroid Build Coastguard Worker  %arrayidx12.3.2 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 3, i64 2
134*9880d681SAndroid Build Coastguard Worker  %tmp23 = load i32, i32* %arrayidx12.3.2, align 4
135*9880d681SAndroid Build Coastguard Worker  %tmp24 = load i32, i32* %arrayidx8, align 4
136*9880d681SAndroid Build Coastguard Worker  %arrayidx12.341 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 0, i64 3
137*9880d681SAndroid Build Coastguard Worker  %tmp25 = load i32, i32* %arrayidx12.341, align 4
138*9880d681SAndroid Build Coastguard Worker  %tmp26 = load i32, i32* %arrayidx8.1, align 4
139*9880d681SAndroid Build Coastguard Worker  %arrayidx12.1.3 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 1, i64 3
140*9880d681SAndroid Build Coastguard Worker  %tmp27 = load i32, i32* %arrayidx12.1.3, align 4
141*9880d681SAndroid Build Coastguard Worker  %tmp28 = load i32, i32* %arrayidx8.2, align 4
142*9880d681SAndroid Build Coastguard Worker  %arrayidx12.2.3 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 2, i64 3
143*9880d681SAndroid Build Coastguard Worker  %tmp29 = load i32, i32* %arrayidx12.2.3, align 4
144*9880d681SAndroid Build Coastguard Worker  %tmp30 = load i32, i32* %arrayidx8.3, align 4
145*9880d681SAndroid Build Coastguard Worker  %arrayidx12.3.3 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 3, i64 3
146*9880d681SAndroid Build Coastguard Worker  %tmp31 = load i32, i32* %arrayidx12.3.3, align 4
147*9880d681SAndroid Build Coastguard Worker  %tmp7 = load i32, i32* %arrayidx12.3, align 4
148*9880d681SAndroid Build Coastguard Worker  %mul = mul nsw i32 %tmp1, %tmp
149*9880d681SAndroid Build Coastguard Worker  %mul.1 = mul nsw i32 %tmp3, %tmp2
150*9880d681SAndroid Build Coastguard Worker  %mul.2 = mul nsw i32 %tmp5, %tmp4
151*9880d681SAndroid Build Coastguard Worker  %mul.3 = mul nsw i32 %tmp7, %tmp6
152*9880d681SAndroid Build Coastguard Worker  %mul.138 = mul nsw i32 %tmp9, %tmp8
153*9880d681SAndroid Build Coastguard Worker  %mul.1.1 = mul nsw i32 %tmp11, %tmp10
154*9880d681SAndroid Build Coastguard Worker  %mul.2.1 = mul nsw i32 %tmp13, %tmp12
155*9880d681SAndroid Build Coastguard Worker  %mul.3.1 = mul nsw i32 %tmp15, %tmp14
156*9880d681SAndroid Build Coastguard Worker  %mul.240 = mul nsw i32 %tmp17, %tmp16
157*9880d681SAndroid Build Coastguard Worker  %mul.1.2 = mul nsw i32 %tmp19, %tmp18
158*9880d681SAndroid Build Coastguard Worker  %mul.2.2 = mul nsw i32 %tmp21, %tmp20
159*9880d681SAndroid Build Coastguard Worker  %mul.3.2 = mul nsw i32 %tmp23, %tmp22
160*9880d681SAndroid Build Coastguard Worker  %mul.342 = mul nsw i32 %tmp25, %tmp24
161*9880d681SAndroid Build Coastguard Worker  %mul.1.3 = mul nsw i32 %tmp27, %tmp26
162*9880d681SAndroid Build Coastguard Worker  %mul.2.3 = mul nsw i32 %tmp29, %tmp28
163*9880d681SAndroid Build Coastguard Worker  %mul.3.3 = mul nsw i32 %tmp31, %tmp30
164*9880d681SAndroid Build Coastguard Worker  %add.1 = add nsw i32 %mul.1, %mul
165*9880d681SAndroid Build Coastguard Worker  %add.2 = add nsw i32 %mul.2, %add.1
166*9880d681SAndroid Build Coastguard Worker  %add.3 = add nsw i32 %mul.3, %add.2
167*9880d681SAndroid Build Coastguard Worker  %add.1.1 = add nsw i32 %mul.1.1, %mul.138
168*9880d681SAndroid Build Coastguard Worker  %add.2.1 = add nsw i32 %mul.2.1, %add.1.1
169*9880d681SAndroid Build Coastguard Worker  %add.3.1 = add nsw i32 %mul.3.1, %add.2.1
170*9880d681SAndroid Build Coastguard Worker  %add.1.2 = add nsw i32 %mul.1.2, %mul.240
171*9880d681SAndroid Build Coastguard Worker  %add.2.2 = add nsw i32 %mul.2.2, %add.1.2
172*9880d681SAndroid Build Coastguard Worker  %add.3.2 = add nsw i32 %mul.3.2, %add.2.2
173*9880d681SAndroid Build Coastguard Worker  %add.1.3 = add nsw i32 %mul.1.3, %mul.342
174*9880d681SAndroid Build Coastguard Worker  %add.2.3 = add nsw i32 %mul.2.3, %add.1.3
175*9880d681SAndroid Build Coastguard Worker  %add.3.3 = add nsw i32 %mul.3.3, %add.2.3
176*9880d681SAndroid Build Coastguard Worker  %arrayidx16 = getelementptr inbounds [4 x i32], [4 x i32]* %m3, i64 %indvars.iv, i64 0
177*9880d681SAndroid Build Coastguard Worker  store i32 %add.3, i32* %arrayidx16, align 4
178*9880d681SAndroid Build Coastguard Worker  %arrayidx16.1 = getelementptr inbounds [4 x i32], [4 x i32]* %m3, i64 %indvars.iv, i64 1
179*9880d681SAndroid Build Coastguard Worker  store i32 %add.3.1, i32* %arrayidx16.1, align 4
180*9880d681SAndroid Build Coastguard Worker  %arrayidx16.2 = getelementptr inbounds [4 x i32], [4 x i32]* %m3, i64 %indvars.iv, i64 2
181*9880d681SAndroid Build Coastguard Worker  store i32 %add.3.2, i32* %arrayidx16.2, align 4
182*9880d681SAndroid Build Coastguard Worker  %arrayidx16.3 = getelementptr inbounds [4 x i32], [4 x i32]* %m3, i64 %indvars.iv, i64 3
183*9880d681SAndroid Build Coastguard Worker  store i32 %add.3.3, i32* %arrayidx16.3, align 4
184*9880d681SAndroid Build Coastguard Worker  %indvars.iv.next = add i64 %indvars.iv, 1
185*9880d681SAndroid Build Coastguard Worker  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
186*9880d681SAndroid Build Coastguard Worker  %exitcond = icmp eq i32 %lftr.wideiv, 4
187*9880d681SAndroid Build Coastguard Worker  br i1 %exitcond, label %for.end, label %for.body
188*9880d681SAndroid Build Coastguard Worker
189*9880d681SAndroid Build Coastguard Workerfor.end:                                        ; preds = %for.body
190*9880d681SAndroid Build Coastguard Worker  ret void
191*9880d681SAndroid Build Coastguard Worker}
192