xref: /XiangShan/src/main/scala/xiangshan/backend/decode/FusionDecoder.scala (revision a1ea7f76add43b40af78084f7f646a0010120cd7)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util.BitPat.bitPatToUInt
22import chisel3.util._
23import xiangshan._
24import utils._
25
26abstract class BaseFusionCase(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]] = None)(implicit p: Parameters)
27  extends DecodeUnitConstants {
28  require(pair.length == 2)
29
30  protected def instr: Seq[UInt] = pair.map(_.bits)
31  protected def pairValid: Bool = VecInit(pair.map(_.valid)).asUInt().andR()
32  protected def instr1Rs1: UInt = instr(0)(RS1_MSB, RS1_LSB)
33  protected def instr1Rs2: UInt = instr(0)(RS2_MSB, RS2_LSB)
34  protected def instr1Rd: UInt = instr(0)(RD_MSB, RD_LSB)
35  protected def instr2Rs1: UInt = instr(1)(RS1_MSB, RS1_LSB)
36  protected def instr2Rs2: UInt = instr(1)(RS2_MSB, RS2_LSB)
37  protected def instr2Rd: UInt = instr(1)(RD_MSB, RD_LSB)
38  protected def withSameDest: Bool = instr1Rd === instr2Rd
39  protected def destToRs1: Bool = instr1Rd === instr2Rs1
40  protected def destToRs2: Bool = instr1Rd === instr2Rs2
41
42  protected def getBaseCS(pat: BitPat): CtrlSignals = {
43    val allDecodeTable = XDecode.table ++ X64Decode.table ++ BDecode.table
44    val baseTable = allDecodeTable.filter(_._1 == pat).map(_._2).head
45    val cs = Wire(new CtrlSignals)
46    cs := DontCare
47    cs.decode(baseTable)
48    // For simple instruction fusions, we assume their destination registers are the same.
49    cs.ldest := instr1Rd
50    cs
51  }
52
53  def isValid: Bool
54  // TODO: optimize timing
55  def target: CtrlSignals
56  // clear the next instruction
57  // def needClear: Boolean = true
58  def fusionName: String
59}
60
61// Case: clear upper 32 bits / get lower 32 bits
62// Source: `slli r1, r0, 32` + `srli r1, r1, 32`
63// Target: `add.uw r1, r0, zero` (pseudo instruction: `zext.w r1, r0`)
64class FusedAdduw(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFusionCase(pair) {
65  def inst1Cond = instr(0) === Instructions.SLLI && instr(0)(25, 20) === 32.U
66  def inst2Cond = instr(1) === Instructions.SRLI && instr(1)(25, 20) === 32.U
67
68  def isValid: Bool = inst1Cond && inst2Cond && withSameDest && destToRs1
69  def target: CtrlSignals = {
70    val cs = getBaseCS(Instructions.ADDU_W)
71    cs.lsrc(0) := instr1Rs1
72    cs.lsrc(1) := 0.U
73    cs
74  }
75
76  def fusionName: String = "slli32_srli32"
77}
78
79// Case: clear upper 48 bits / get lower 16 bits
80// Source: `slli r1, r0, 48` + `srli r1, r1, 48`
81// Target: `zext.h r1, r0`
82class FusedZexth(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFusionCase(pair) {
83  def inst1Cond = instr(0) === Instructions.SLLI && instr(0)(25, 20) === 48.U
84  def inst2Cond = instr(1) === Instructions.SRLI && instr(1)(25, 20) === 48.U
85
86  def isValid: Bool = inst1Cond && inst2Cond && withSameDest && destToRs1
87  def target: CtrlSignals = {
88    val cs = getBaseCS(Instructions.ZEXT_H)
89    cs.lsrc(0) := instr1Rs1
90    cs
91  }
92
93  def fusionName: String = "slli48_srli48"
94}
95
96// Another case of Zext.h
97// Source: `slliw r1, r0, 16` + `srliw r1, r1, 16`
98// Target: `zext.h r1, r0`
99class FusedZexth1(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends FusedZexth(pair) {
100  override def inst1Cond: Bool = instr(0) === Instructions.SLLIW && instr(0)(24, 20) === 16.U
101  override def inst2Cond: Bool = instr(1) === Instructions.SRLIW && instr(1)(24, 20) === 16.U
102
103  override def fusionName: String = "slliw16_srliw16"
104}
105
106// Case: sign-extend a 16-bit number
107// Source: `slliw r1, r0, 16` + `sraiw r1, r1, 16`
108// Target: `sext.h r1, r0`
109class FusedSexth(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFusionCase(pair) {
110  def inst1Cond = instr(0) === Instructions.SLLIW && instr(0)(24, 20) === 16.U
111  def inst2Cond = instr(1) === Instructions.SRAIW && instr(1)(24, 20) === 16.U
112
113  def isValid: Bool = inst1Cond && inst2Cond && withSameDest && destToRs1
114  def target: CtrlSignals = {
115    val cs = getBaseCS(Instructions.SEXT_H)
116    cs.lsrc(0) := instr1Rs1
117    cs
118  }
119
120  def fusionName: String = "slliw16_sraiw16"
121}
122
123// Case: shift left by one and add
124// Source: `slli r1, r0, 1` + `add r1, r1, r2`
125// Target: `sh1add r1, r0, r2`
126class FusedSh1add(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFusionCase(pair) {
127  def inst1Cond = instr(0) === Instructions.SLLI && instr(0)(25, 20) === 1.U
128  def inst2Cond = instr(1) === Instructions.ADD
129
130  def isValid: Bool = inst1Cond && inst2Cond && withSameDest && (destToRs1 || destToRs2)
131  def target: CtrlSignals = {
132    val cs = getBaseCS(Instructions.SH1ADD)
133    cs.lsrc(0) := instr1Rs1
134    cs.lsrc(1) := Mux(destToRs1, instr2Rs2, instr2Rs1)
135    cs
136  }
137
138  def fusionName: String = "slli1_add"
139}
140
141// Case: shift left by two and add
142// Source: `slli r1, r0, 2` + `add r1, r1, r2`
143// Target: `sh2add r1, r0, r2`
144class FusedSh2add(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFusionCase(pair) {
145  def inst1Cond = instr(0) === Instructions.SLLI && instr(0)(25, 20) === 2.U
146  def inst2Cond = instr(1) === Instructions.ADD
147
148  def isValid: Bool = inst1Cond && inst2Cond && withSameDest && (destToRs1 || destToRs2)
149  def target: CtrlSignals = {
150    val cs = getBaseCS(Instructions.SH2ADD)
151    cs.lsrc(0) := instr1Rs1
152    cs.lsrc(1) := Mux(destToRs1, instr2Rs2, instr2Rs1)
153    cs
154  }
155
156  def fusionName: String = "slli2_add"
157}
158
159// Case: shift left by three and add
160// Source: `slli r1, r0, 3` + `add r1, r1, r2`
161// Target: `sh3add r1, r0, r2`
162class FusedSh3add(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFusionCase(pair) {
163  def inst1Cond = instr(0) === Instructions.SLLI && instr(0)(25, 20) === 3.U
164  def inst2Cond = instr(1) === Instructions.ADD
165
166  def isValid: Bool = inst1Cond && inst2Cond && withSameDest && (destToRs1 || destToRs2)
167  def target: CtrlSignals = {
168    val cs = getBaseCS(Instructions.SH3ADD)
169    cs.lsrc(0) := instr1Rs1
170    cs.lsrc(1) := Mux(destToRs1, instr2Rs2, instr2Rs1)
171    cs
172  }
173
174  def fusionName: String = "slli3_add"
175}
176
177// Case: shift zero-extended word left by one
178// Source: `slli r1, r0, 32` + `srli r1, r0, 31`
179// Target: `szewl1 r1, r0` (customized internal opcode)
180class FusedSzewl1(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFusionCase(pair) {
181  def inst1Cond = instr(0) === Instructions.SLLI && instr(0)(25, 20) === 32.U
182  def inst2Cond = instr(1) === Instructions.SRLI && instr(1)(25, 20) === 31.U
183
184  def isValid: Bool = inst1Cond && inst2Cond && withSameDest && destToRs1
185  def target: CtrlSignals = {
186    val cs = getBaseCS(Instructions.ZEXT_H)
187    // replace the fuOpType with szewl1
188    cs.fuOpType := ALUOpType.szewl1
189    cs.lsrc(0) := instr1Rs1
190    cs
191  }
192
193  def fusionName: String = "slli32_srli31"
194}
195
196// Case: shift zero-extended word left by two
197// Source: `slli r1, r0, 32` + `srli r1, r0, 30`
198// Target: `szewl2 r1, r0` (customized internal opcode)
199class FusedSzewl2(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFusionCase(pair) {
200  def inst1Cond = instr(0) === Instructions.SLLI && instr(0)(25, 20) === 32.U
201  def inst2Cond = instr(1) === Instructions.SRLI && instr(1)(25, 20) === 30.U
202
203  def isValid: Bool = inst1Cond && inst2Cond && withSameDest && destToRs1
204  def target: CtrlSignals = {
205    val cs = getBaseCS(Instructions.ZEXT_H)
206    // replace the fuOpType with szewl2
207    cs.fuOpType := ALUOpType.szewl2
208    cs.lsrc(0) := instr1Rs1
209    cs
210  }
211
212  def fusionName: String = "slli32_srli30"
213}
214
215// Case: get the second byte
216// Source: `srli r1, r0, 8` + `andi r1, r1, 255`
217// Target: `byte2 r1, r0` (customized internal opcode)
218class FusedByte2(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFusionCase(pair) {
219  def inst1Cond = instr(0) === Instructions.SRLI && instr(0)(25, 20) === 8.U
220  def inst2Cond = instr(1) === Instructions.ANDI && instr(1)(31, 20) === 255.U
221
222  def isValid: Bool = inst1Cond && inst2Cond && withSameDest && destToRs1
223  def target: CtrlSignals = {
224    val cs = getBaseCS(Instructions.ZEXT_H)
225    // replace the fuOpType with byte2
226    cs.fuOpType := ALUOpType.byte2
227    cs.lsrc(0) := instr1Rs1
228    cs
229  }
230
231  def fusionName: String = "srli8_andi255"
232}
233
234// Case: shift left by four and add
235// Source: `slli r1, r0, 4` + `add r1, r1, r2`
236// Target: `sh4add r1, r0, r2` (customized internal opcode)
237class FusedSh4add(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFusionCase(pair) {
238  def inst1Cond = instr(0) === Instructions.SLLI && instr(0)(25, 20) === 4.U
239  def inst2Cond = instr(1) === Instructions.ADD
240
241  def isValid: Bool = inst1Cond && inst2Cond && withSameDest && (destToRs1 || destToRs2)
242  def target: CtrlSignals = {
243    val cs = getBaseCS(Instructions.SH3ADD)
244    // replace the fuOpType with sh4add
245    cs.fuOpType := ALUOpType.sh4add
246    cs.lsrc(0) := instr1Rs1
247    cs.lsrc(1) := Mux(destToRs1, instr2Rs2, instr2Rs1)
248    cs
249  }
250
251  def fusionName: String = "slli4_add"
252}
253
254// Case: shift right by 30 and add
255// Source: `srli r1, r0, 30` + `add r1, r1, r2`
256// Target: `sr30add r1, r0, r2` (customized internal opcode)
257class FusedSr30add(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFusionCase(pair) {
258  def inst1Cond = instr(0) === Instructions.SRLI && instr(0)(25, 20) === 30.U
259  def inst2Cond = instr(1) === Instructions.ADD
260
261  def isValid: Bool = inst1Cond && inst2Cond && withSameDest && (destToRs1 || destToRs2)
262  def target: CtrlSignals = {
263    val cs = getBaseCS(Instructions.SH3ADD)
264    // replace the fuOpType with sr30add
265    cs.fuOpType := ALUOpType.sr30add
266    cs.lsrc(0) := instr1Rs1
267    cs.lsrc(1) := Mux(destToRs1, instr2Rs2, instr2Rs1)
268    cs
269  }
270
271  def fusionName: String = "srli30_add"
272}
273
274// Case: shift right by 31 and add
275// Source: `srli r1, r0, 31` + `add r1, r1, r2`
276// Target: `sr31add r1, r0, r2` (customized internal opcode)
277class FusedSr31add(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFusionCase(pair) {
278  def inst1Cond = instr(0) === Instructions.SRLI && instr(0)(25, 20) === 31.U
279  def inst2Cond = instr(1) === Instructions.ADD
280
281  def isValid: Bool = inst1Cond && inst2Cond && withSameDest && (destToRs1 || destToRs2)
282  def target: CtrlSignals = {
283    val cs = getBaseCS(Instructions.SH3ADD)
284    // replace the fuOpType with sr31add
285    cs.fuOpType := ALUOpType.sr31add
286    cs.lsrc(0) := instr1Rs1
287    cs.lsrc(1) := Mux(destToRs1, instr2Rs2, instr2Rs1)
288    cs
289  }
290
291  def fusionName: String = "srli31_add"
292}
293
294// Case: shift right by 32 and add
295// Source: `srli r1, r0, 32` + `add r1, r1, r2`
296// Target: `sr32add r1, r0, r2` (customized internal opcode)
297class FusedSr32add(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFusionCase(pair) {
298  def inst1Cond = instr(0) === Instructions.SRLI && instr(0)(25, 20) === 32.U
299  def inst2Cond = instr(1) === Instructions.ADD
300
301  def isValid: Bool = inst1Cond && inst2Cond && withSameDest && (destToRs1 || destToRs2)
302  def target: CtrlSignals = {
303    val cs = getBaseCS(Instructions.SH3ADD)
304    // replace the fuOpType with sr32add
305    cs.fuOpType := ALUOpType.sr32add
306    cs.lsrc(0) := instr1Rs1
307    cs.lsrc(1) := Mux(destToRs1, instr2Rs2, instr2Rs1)
308    cs
309  }
310
311  def fusionName: String = "srli32_add"
312}
313
314// Case: add one if odd, otherwise unchanged
315// Source: `andi r1, r0, 1`` + `add r1, r1, r2`
316// Target: `oddadd r1, r0, r2` (customized internal opcode)
317class FusedOddadd(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFusionCase(pair) {
318  def inst1Cond = instr(0) === Instructions.ANDI && instr(0)(31, 20) === 1.U
319  def inst2Cond = instr(1) === Instructions.ADD
320
321  def isValid: Bool = inst1Cond && inst2Cond && withSameDest && (destToRs1 || destToRs2)
322  def target: CtrlSignals = {
323    val cs = getBaseCS(Instructions.SH3ADD)
324    // replace the fuOpType with oddadd
325    cs.fuOpType := ALUOpType.oddadd
326    cs.lsrc(0) := instr1Rs1
327    cs.lsrc(1) := Mux(destToRs1, instr2Rs2, instr2Rs1)
328    cs
329  }
330
331  def fusionName: String = "andi1_add"
332}
333
334// Case: add one if odd (in word format), otherwise unchanged
335// Source: `andi r1, r0, 1`` + `addw r1, r1, r2`
336// Target: `oddaddw r1, r0, r2` (customized internal opcode)
337class FusedOddaddw(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFusionCase(pair) {
338  def inst1Cond = instr(0) === Instructions.ANDI && instr(0)(31, 20) === 1.U
339  def inst2Cond = instr(1) === Instructions.ADDW
340
341  def isValid: Bool = inst1Cond && inst2Cond && withSameDest && (destToRs1 || destToRs2)
342  def target: CtrlSignals = {
343    val cs = getBaseCS(Instructions.SH3ADD)
344    // replace the fuOpType with oddaddw
345    cs.fuOpType := ALUOpType.oddaddw
346    cs.lsrc(0) := instr1Rs1
347    cs.lsrc(1) := Mux(destToRs1, instr2Rs2, instr2Rs1)
348    cs
349  }
350
351  def fusionName: String = "andi1_addw"
352}
353
354// Case: addw and extract its lower 8 bits (fused into addwbyte)
355class FusedAddwbyte(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]])(implicit p: Parameters)
356  extends BaseFusionCase(pair, csPair) {
357  require(csPair.isDefined)
358
359  // the first instruction is a addw
360  def inst1Cond = csPair.get(0).fuType === FuType.alu && ALUOpType.isAddw(csPair.get(0).fuOpType)
361  def inst2Cond = instr(1) === Instructions.ANDI && instr(1)(31, 20) === 0xff.U
362
363  def isValid: Bool = inst1Cond && inst2Cond && withSameDest && destToRs1
364  def target: CtrlSignals = {
365    val cs = WireInit(csPair.get(0))
366    // replace the fuOpType with addwbyte
367    cs.fuOpType := ALUOpType.addwbyte
368    cs
369  }
370
371  def fusionName: String = "andw_andi255"
372}
373
374// Case: addw and extract its lower 1 bit (fused into addwbit)
375class FusedAddwbit(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]])(implicit p: Parameters)
376  extends FusedAddwbyte(pair, csPair) {
377  override def inst2Cond = instr(1) === Instructions.ANDI && instr(1)(31, 20) === 0x1.U
378  override def target: CtrlSignals = {
379    val cs = WireInit(csPair.get(0))
380    // replace the fuOpType with addwbit
381    cs.fuOpType := ALUOpType.addwbit
382    cs
383  }
384  override def fusionName: String = "andw_andi1"
385}
386
387// Case: logic operation and extract its LSB
388class FusedLogiclsb(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]])(implicit p: Parameters)
389  extends BaseFusionCase(pair, csPair) {
390  require(csPair.isDefined)
391
392  // the first instruction is a logic
393  def inst1Cond = csPair.get(0).fuType === FuType.alu && ALUOpType.isLogic(csPair.get(0).fuOpType)
394  def inst2Cond = instr(1) === Instructions.ANDI && instr(1)(31, 20) === 1.U
395
396  def isValid: Bool = inst1Cond && inst2Cond && withSameDest && destToRs1
397  def target: CtrlSignals = {
398    val cs = WireInit(csPair.get(0))
399    // change the opType to lsb format
400    cs.fuOpType := ALUOpType.logicToLSB(csPair.get(0).fuOpType)
401    cs
402  }
403
404  def fusionName: String = "logic_andi1"
405}
406
407// Case: OR(Cat(src1(63, 8), 0.U(8.W)), src2)
408// Source: `andi r1, r0, -256`` + `or r1, r1, r2`
409class FusedOrh48(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFusionCase(pair) {
410  def inst1Cond = instr(0) === Instructions.ANDI && instr(0)(31, 20) === 0xf00.U
411  def inst2Cond = instr(1) === Instructions.OR
412
413  def isValid: Bool = inst1Cond && inst2Cond && withSameDest && (destToRs1 || destToRs2)
414  def target: CtrlSignals = {
415    val cs = getBaseCS(Instructions.OR)
416    // replace the fuOpType with orh48
417    cs.fuOpType := ALUOpType.orh48
418    cs.lsrc(0) := instr1Rs1
419    cs.lsrc(1) := Mux(destToRs1, instr2Rs2, instr2Rs1)
420    cs
421  }
422
423  def fusionName: String = "andi_f00_or"
424}
425
426// Case: mul 7bit data with 32-bit data
427// Source: `andi r1, r0, 127`` + `mulw r1, r1, r2`
428// Target: `mulw7 r1, r0, r2`
429class FusedMulw7(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]])(implicit p: Parameters)
430  extends BaseFusionCase(pair, csPair) {
431  require(csPair.isDefined)
432
433  def inst1Cond = instr(0) === Instructions.ANDI && instr(0)(31, 20) === 127.U
434  def inst2Cond = instr(1) === Instructions.MULW
435
436  def isValid: Bool = inst1Cond && inst2Cond && withSameDest && (destToRs1 || destToRs2)
437  def target: CtrlSignals = {
438    // use MULW as the base
439    val cs = WireInit(csPair.get(1))
440    // replace the fuOpType with mulw7
441    cs.fuOpType := MDUOpType.mulw7
442    cs.lsrc(0) := instr1Rs1
443    cs.lsrc(1) := Mux(destToRs1, instr2Rs2, instr2Rs1)
444    cs
445  }
446
447  def fusionName: String = "andi127_mulw"
448}
449
450class FusionDecoder(implicit p: Parameters) extends XSModule {
451  val io = IO(new Bundle {
452    // detect instruction fusions in these instructions
453    val in = Vec(DecodeWidth, Flipped(ValidIO(UInt(32.W))))
454    val dec = Vec(DecodeWidth, Input(new CtrlSignals()))
455    // whether an instruction fusion is found
456    val out = Vec(DecodeWidth - 1, DecoupledIO(new CtrlSignals))
457    // fused instruction needs to be cleared
458    val clear = Vec(DecodeWidth, Output(Bool()))
459  })
460
461  io.clear.head := false.B
462
463  val instrPairs = io.in.dropRight(1).zip(io.in.drop(1)).map(x => Seq(x._1, x._2))
464  val csPairs = io.dec.dropRight(1).zip(io.dec.drop(1)).map(x => Seq(x._1, x._2))
465  instrPairs.zip(csPairs).zip(io.out).zipWithIndex.foreach{ case (((pair, cs), out), i) =>
466    val fusionList = Seq(
467      new FusedAdduw(pair),
468      new FusedZexth(pair),
469      new FusedZexth1(pair),
470      new FusedSexth(pair),
471      new FusedSh1add(pair),
472      new FusedSh2add(pair),
473      new FusedSh3add(pair),
474      new FusedSzewl1(pair),
475      new FusedSzewl2(pair),
476      new FusedByte2(pair),
477      new FusedSh4add(pair),
478      new FusedSr30add(pair),
479      new FusedSr31add(pair),
480      new FusedSr32add(pair),
481      new FusedOddadd(pair),
482      new FusedOddaddw(pair),
483      new FusedAddwbyte(pair, Some(cs)),
484      new FusedAddwbit(pair, Some(cs)),
485      new FusedLogiclsb(pair, Some(cs)),
486      new FusedOrh48(pair),
487      new FusedMulw7(pair, Some(cs))
488    )
489    val pairValid = VecInit(pair.map(_.valid)).asUInt().andR
490    val thisCleared = io.clear(i)
491    val fusionVec = VecInit(fusionList.map(_.isValid))
492    out.valid := pairValid && !thisCleared && fusionVec.asUInt().orR()
493    XSError(PopCount(fusionVec) > 1.U, "more then one fusion matched\n")
494    out.bits := Mux1H(fusionVec, fusionList.map(_.target))
495    // TODO: assume every instruction fusion clears the second instruction now
496    io.clear(i + 1) := out.valid
497    fusionList.zip(fusionVec).foreach { case (f, v) =>
498      XSPerfAccumulate(s"case_${f.fusionName}_$i", pairValid && !thisCleared && v && out.ready)
499    }
500    XSPerfAccumulate(s"conflict_fusion_$i", pairValid && thisCleared && fusionVec.asUInt().orR() && out.ready)
501  }
502
503  XSPerfAccumulate("fused_instr", PopCount(io.out.map(_.fire)))
504}
505