xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision 042e89e414f8956a139ecd64336469a6a7b5ff6f)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15*
16*
17* Acknowledgement
18*
19* This implementation is inspired by several key papers:
20* [1] Glenn Reinman, Todd Austin, and Brad Calder. "[A scalable front-end architecture for fast instruction delivery.]
21* (https://doi.org/10.1109/ISCA.1999.765954)" 26th International Symposium on Computer Architecture (ISCA). 1999.
22*
23***************************************************************************************/
24
25package xiangshan.frontend
26
27import chisel3._
28import chisel3.util._
29import org.chipsalliance.cde.config.Parameters
30import utility._
31import utility.ChiselDB
32import utils._
33import xiangshan._
34import xiangshan.backend.CtrlToFtqIO
35import xiangshan.backend.decode.ImmUnion
36import xiangshan.frontend.icache._
37
38class FtqDebugBundle extends Bundle {
39  val pc        = UInt(39.W)
40  val target    = UInt(39.W)
41  val isBr      = Bool()
42  val isJmp     = Bool()
43  val isCall    = Bool()
44  val isRet     = Bool()
45  val misPred   = Bool()
46  val isTaken   = Bool()
47  val predStage = UInt(2.W)
48}
49
50class FtqPtr(entries: Int) extends CircularQueuePtr[FtqPtr](
51      entries
52    ) {
53  def this()(implicit p: Parameters) = this(p(XSCoreParamsKey).FtqSize)
54}
55
56object FtqPtr {
57  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
58    val ptr = Wire(new FtqPtr)
59    ptr.flag  := f
60    ptr.value := v
61    ptr
62  }
63  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr =
64    apply(!ptr.flag, ptr.value)
65}
66
67class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
68
69  val io = IO(new Bundle() {
70    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
71    val ren   = Input(Vec(numRead, Bool()))
72    val rdata = Output(Vec(numRead, gen))
73    val waddr = Input(UInt(log2Up(FtqSize).W))
74    val wen   = Input(Bool())
75    val wdata = Input(gen)
76  })
77
78  for (i <- 0 until numRead) {
79    val sram = Module(new SRAMTemplate(gen, FtqSize, withClockGate = true))
80    sram.io.r.req.valid       := io.ren(i)
81    sram.io.r.req.bits.setIdx := io.raddr(i)
82    io.rdata(i)               := sram.io.r.resp.data(0)
83    sram.io.w.req.valid       := io.wen
84    sram.io.w.req.bits.setIdx := io.waddr
85    sram.io.w.req.bits.data   := VecInit(io.wdata)
86  }
87
88}
89
90class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
91  val startAddr     = UInt(VAddrBits.W)
92  val nextLineAddr  = UInt(VAddrBits.W)
93  val isNextMask    = Vec(PredictWidth, Bool())
94  val fallThruError = Bool()
95  // val carry = Bool()
96  def getPc(offset: UInt) = {
97    def getHigher(pc: UInt) = pc(VAddrBits - 1, log2Ceil(PredictWidth) + instOffsetBits + 1)
98    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth) + instOffsetBits, instOffsetBits)
99    Cat(
100      getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth) + instOffsetBits), nextLineAddr, startAddr)),
101      getOffset(startAddr) + offset,
102      0.U(instOffsetBits.W)
103    )
104  }
105  def fromBranchPrediction(resp: BranchPredictionBundle) = {
106    def carryPos(addr: UInt) = addr(instOffsetBits + log2Ceil(PredictWidth) + 1)
107    this.startAddr    := resp.pc(3)
108    this.nextLineAddr := resp.pc(3) + (FetchWidth * 4 * 2).U // may be broken on other configs
109    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
110      (resp.pc(3)(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool
111    ))
112    this.fallThruError := resp.fallThruError(3)
113    this
114  }
115  override def toPrintable: Printable =
116    p"startAddr:${Hexadecimal(startAddr)}"
117}
118
119class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
120  val brMask    = Vec(PredictWidth, Bool())
121  val jmpInfo   = ValidUndirectioned(Vec(3, Bool()))
122  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
123  val jalTarget = UInt(VAddrBits.W)
124  val rvcMask   = Vec(PredictWidth, Bool())
125  def hasJal    = jmpInfo.valid && !jmpInfo.bits(0)
126  def hasJalr   = jmpInfo.valid && jmpInfo.bits(0)
127  def hasCall   = jmpInfo.valid && jmpInfo.bits(1)
128  def hasRet    = jmpInfo.valid && jmpInfo.bits(2)
129
130  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
131    val pds = pdWb.pd
132    this.brMask        := VecInit(pds.map(pd => pd.isBr && pd.valid))
133    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
134    this.jmpInfo.bits := ParallelPriorityMux(
135      pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
136      pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet))
137    )
138    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
139    this.rvcMask   := VecInit(pds.map(pd => pd.isRVC))
140    this.jalTarget := pdWb.jalTarget
141  }
142
143  def toPd(offset: UInt) = {
144    require(offset.getWidth == log2Ceil(PredictWidth))
145    val pd = Wire(new PreDecodeInfo)
146    pd.valid := true.B
147    pd.isRVC := rvcMask(offset)
148    val isBr   = brMask(offset)
149    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
150    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
151    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
152    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
153    pd
154  }
155}
156
157class PrefetchPtrDB(implicit p: Parameters) extends Bundle {
158  val fromFtqPtr = UInt(log2Up(p(XSCoreParamsKey).FtqSize).W)
159  val fromIfuPtr = UInt(log2Up(p(XSCoreParamsKey).FtqSize).W)
160}
161
162class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends SpeculativeInfo {
163  val sc_disagree = if (!env.FPGAPlatform) Some(Vec(numBr, Bool())) else None
164}
165
166class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
167  val meta      = UInt(MaxMetaLength.W)
168  val ftb_entry = new FTBEntry
169}
170
171class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
172  val target   = UInt(VAddrBits.W)
173  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
174}
175
176class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
177  val valid  = Output(Bool())
178  val ptr    = Output(new FtqPtr)
179  val offset = Output(UInt(log2Ceil(PredictWidth).W))
180  val data   = Input(gen)
181  def apply(valid: Bool, ptr: FtqPtr, offset: UInt) = {
182    this.valid  := valid
183    this.ptr    := ptr
184    this.offset := offset
185    this.data
186  }
187}
188
189class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
190  val redirect       = Valid(new BranchPredictionRedirect)
191  val update         = Valid(new BranchPredictionUpdate)
192  val enq_ptr        = Output(new FtqPtr)
193  val redirctFromIFU = Output(Bool())
194}
195
196class BpuFlushInfo(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
197  // when ifu pipeline is not stalled,
198  // a packet from bpu s3 can reach f1 at most
199  val s2 = Valid(new FtqPtr)
200  val s3 = Valid(new FtqPtr)
201  def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) =
202    src.valid && !isAfter(src.bits, idx_to_flush)
203  def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
204  def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
205}
206
207class FtqToIfuIO(implicit p: Parameters) extends XSBundle {
208  val req              = Decoupled(new FetchRequestBundle)
209  val redirect         = Valid(new BranchPredictionRedirect)
210  val topdown_redirect = Valid(new BranchPredictionRedirect)
211  val flushFromBpu     = new BpuFlushInfo
212}
213
214class FtqToICacheIO(implicit p: Parameters) extends XSBundle {
215  // NOTE: req.bits must be prepare in T cycle
216  // while req.valid is set true in T + 1 cycle
217  val req = Decoupled(new FtqToICacheRequestBundle)
218}
219
220class FtqToPrefetchIO(implicit p: Parameters) extends XSBundle {
221  val req              = Decoupled(new FtqICacheInfo)
222  val flushFromBpu     = new BpuFlushInfo
223  val backendException = UInt(ExceptionType.width.W)
224}
225
226trait HasBackendRedirectInfo extends HasXSParameter {
227  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
228}
229
230class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
231  // write to backend pc mem
232  val pc_mem_wen   = Output(Bool())
233  val pc_mem_waddr = Output(UInt(log2Ceil(FtqSize).W))
234  val pc_mem_wdata = Output(new Ftq_RF_Components)
235  // newest target
236  val newest_entry_en     = Output(Bool())
237  val newest_entry_target = Output(UInt(VAddrBits.W))
238  val newest_entry_ptr    = Output(new FtqPtr)
239}
240
241class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
242  val io = IO(new Bundle {
243    val start_addr     = Input(UInt(VAddrBits.W))
244    val old_entry      = Input(new FTBEntry)
245    val pd             = Input(new Ftq_pd_Entry)
246    val cfiIndex       = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
247    val target         = Input(UInt(VAddrBits.W))
248    val hit            = Input(Bool())
249    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
250
251    val new_entry         = Output(new FTBEntry)
252    val new_br_insert_pos = Output(Vec(numBr, Bool()))
253    val taken_mask        = Output(Vec(numBr, Bool()))
254    val jmp_taken         = Output(Bool())
255    val mispred_mask      = Output(Vec(numBr + 1, Bool()))
256
257    // for perf counters
258    val is_init_entry           = Output(Bool())
259    val is_old_entry            = Output(Bool())
260    val is_new_br               = Output(Bool())
261    val is_jalr_target_modified = Output(Bool())
262    val is_strong_bias_modified = Output(Bool())
263    val is_br_full              = Output(Bool())
264  })
265
266  // no mispredictions detected at predecode
267  val hit = io.hit
268  val pd  = io.pd
269
270  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
271
272  val cfi_is_br       = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
273  val entry_has_jmp   = pd.jmpInfo.valid
274  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
275  val new_jmp_is_jalr = entry_has_jmp && pd.jmpInfo.bits(0) && io.cfiIndex.valid
276  val new_jmp_is_call = entry_has_jmp && pd.jmpInfo.bits(1) && io.cfiIndex.valid
277  val new_jmp_is_ret  = entry_has_jmp && pd.jmpInfo.bits(2) && io.cfiIndex.valid
278  val last_jmp_rvi    = entry_has_jmp && pd.jmpOffset === (PredictWidth - 1).U && !pd.rvcMask.last
279  // val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
280
281  val cfi_is_jal  = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
282  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
283
284  def carryPos = log2Ceil(PredictWidth) + instOffsetBits
285  def getLower(pc: UInt) = pc(carryPos - 1, instOffsetBits)
286  // if not hit, establish a new entry
287  init_entry.valid := true.B
288  // tag is left for ftb to assign
289
290  // case br
291  val init_br_slot = init_entry.getSlotForBr(0)
292  when(cfi_is_br) {
293    init_br_slot.valid  := true.B
294    init_br_slot.offset := io.cfiIndex.bits
295    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, numBr == 1)
296    init_entry.strong_bias(0) := true.B // set to strong bias on init
297  }
298
299  // case jmp
300  when(entry_has_jmp) {
301    init_entry.tailSlot.offset := pd.jmpOffset
302    init_entry.tailSlot.valid  := new_jmp_is_jal || new_jmp_is_jalr
303    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare = false)
304    init_entry.strong_bias(numBr - 1) := new_jmp_is_jalr // set strong bias for the jalr on init
305  }
306
307  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
308  init_entry.pftAddr := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft, getLower(io.start_addr))
309  init_entry.carry   := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft(carryPos - instOffsetBits), true.B)
310
311  require(
312    isPow2(PredictWidth),
313    "If PredictWidth does not satisfy the power of 2," +
314      "pftAddr := getLower(io.start_addr) and carry := true.B  not working!!"
315  )
316
317  init_entry.isJalr := new_jmp_is_jalr
318  init_entry.isCall := new_jmp_is_call
319  init_entry.isRet  := new_jmp_is_ret
320  // that means fall thru points to the middle of an inst
321  init_entry.last_may_be_rvi_call := pd.jmpOffset === (PredictWidth - 1).U && !pd.rvcMask(pd.jmpOffset)
322
323  // if hit, check whether a new cfi(only br is possible) is detected
324  val oe              = io.old_entry
325  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
326  val br_recorded     = br_recorded_vec.asUInt.orR
327  val is_new_br       = cfi_is_br && !br_recorded
328  val new_br_offset   = io.cfiIndex.bits
329  // vec(i) means new br will be inserted BEFORE old br(i)
330  val allBrSlotsVec = oe.allSlotsForBr
331  val new_br_insert_onehot = VecInit((0 until numBr).map {
332    i =>
333      i match {
334        case 0 =>
335          !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
336        case idx =>
337          allBrSlotsVec(idx - 1).valid && new_br_offset > allBrSlotsVec(idx - 1).offset &&
338          (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
339      }
340  })
341
342  val old_entry_modified = WireInit(io.old_entry)
343  for (i <- 0 until numBr) {
344    val slot = old_entry_modified.allSlotsForBr(i)
345    when(new_br_insert_onehot(i)) {
346      slot.valid  := true.B
347      slot.offset := new_br_offset
348      slot.setLowerStatByTarget(io.start_addr, io.target, i == numBr - 1)
349      old_entry_modified.strong_bias(i) := true.B
350    }.elsewhen(new_br_offset > oe.allSlotsForBr(i).offset) {
351      old_entry_modified.strong_bias(i) := false.B
352      // all other fields remain unchanged
353    }.otherwise {
354      // case i == 0, remain unchanged
355      if (i != 0) {
356        val noNeedToMoveFromFormerSlot = (i == numBr - 1).B && !oe.brSlots.last.valid
357        when(!noNeedToMoveFromFormerSlot) {
358          slot.fromAnotherSlot(oe.allSlotsForBr(i - 1))
359          old_entry_modified.strong_bias(i) := oe.strong_bias(i)
360        }
361      }
362    }
363  }
364
365  // two circumstances:
366  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
367  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
368  //        the previous last br or the new br
369  val may_have_to_replace = oe.noEmptySlotForNewBr
370  val pft_need_to_change  = is_new_br && may_have_to_replace
371  // it should either be the given last br or the new br
372  when(pft_need_to_change) {
373    val new_pft_offset =
374      Mux(!new_br_insert_onehot.asUInt.orR, new_br_offset, oe.allSlotsForBr.last.offset)
375
376    // set jmp to invalid
377    old_entry_modified.pftAddr              := getLower(io.start_addr) + new_pft_offset
378    old_entry_modified.carry                := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
379    old_entry_modified.last_may_be_rvi_call := false.B
380    old_entry_modified.isCall               := false.B
381    old_entry_modified.isRet                := false.B
382    old_entry_modified.isJalr               := false.B
383  }
384
385  val old_entry_jmp_target_modified = WireInit(oe)
386  val old_target      = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
387  val old_tail_is_jmp = !oe.tailSlot.sharing
388  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
389  when(jalr_target_modified) {
390    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
391    old_entry_jmp_target_modified.strong_bias := 0.U.asTypeOf(Vec(numBr, Bool()))
392  }
393
394  val old_entry_strong_bias    = WireInit(oe)
395  val strong_bias_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
396  for (i <- 0 until numBr) {
397    when(br_recorded_vec(0)) {
398      old_entry_strong_bias.strong_bias(0) :=
399        oe.strong_bias(0) && io.cfiIndex.valid && oe.brValids(0) && io.cfiIndex.bits === oe.brOffset(0)
400    }.elsewhen(br_recorded_vec(numBr - 1)) {
401      old_entry_strong_bias.strong_bias(0) := false.B
402      old_entry_strong_bias.strong_bias(numBr - 1) :=
403        oe.strong_bias(numBr - 1) && io.cfiIndex.valid && oe.brValids(numBr - 1) && io.cfiIndex.bits === oe.brOffset(
404          numBr - 1
405        )
406    }
407    strong_bias_modified_vec(i) := oe.strong_bias(i) && oe.brValids(i) && !old_entry_strong_bias.strong_bias(i)
408  }
409  val strong_bias_modified = strong_bias_modified_vec.reduce(_ || _)
410
411  val derived_from_old_entry =
412    Mux(is_new_br, old_entry_modified, Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_strong_bias))
413
414  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
415
416  io.new_br_insert_pos := new_br_insert_onehot
417  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map {
418    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
419  })
420  io.jmp_taken := io.new_entry.jmpValid && io.new_entry.tailSlot.offset === io.cfiIndex.bits
421  for (i <- 0 until numBr) {
422    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
423  }
424  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
425
426  // for perf counters
427  io.is_init_entry           := !hit
428  io.is_old_entry            := hit && !is_new_br && !jalr_target_modified && !strong_bias_modified
429  io.is_new_br               := hit && is_new_br
430  io.is_jalr_target_modified := hit && jalr_target_modified
431  io.is_strong_bias_modified := hit && strong_bias_modified
432  io.is_br_full              := hit && is_new_br && may_have_to_replace
433}
434
435class FtqPcMemWrapper(numOtherReads: Int)(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo {
436  val io = IO(new Bundle {
437    val ifuPtr_w           = Input(new FtqPtr)
438    val ifuPtrPlus1_w      = Input(new FtqPtr)
439    val ifuPtrPlus2_w      = Input(new FtqPtr)
440    val pfPtr_w            = Input(new FtqPtr)
441    val pfPtrPlus1_w       = Input(new FtqPtr)
442    val commPtr_w          = Input(new FtqPtr)
443    val commPtrPlus1_w     = Input(new FtqPtr)
444    val ifuPtr_rdata       = Output(new Ftq_RF_Components)
445    val ifuPtrPlus1_rdata  = Output(new Ftq_RF_Components)
446    val ifuPtrPlus2_rdata  = Output(new Ftq_RF_Components)
447    val pfPtr_rdata        = Output(new Ftq_RF_Components)
448    val pfPtrPlus1_rdata   = Output(new Ftq_RF_Components)
449    val commPtr_rdata      = Output(new Ftq_RF_Components)
450    val commPtrPlus1_rdata = Output(new Ftq_RF_Components)
451
452    val wen   = Input(Bool())
453    val waddr = Input(UInt(log2Ceil(FtqSize).W))
454    val wdata = Input(new Ftq_RF_Components)
455  })
456
457  val num_pc_read = numOtherReads + 5
458  val mem         = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, num_pc_read, 1, "FtqPC"))
459  mem.io.wen(0)   := io.wen
460  mem.io.waddr(0) := io.waddr
461  mem.io.wdata(0) := io.wdata
462
463  // read one cycle ahead for ftq local reads
464  val raddr_vec = VecInit(Seq(
465    io.ifuPtr_w.value,
466    io.ifuPtrPlus1_w.value,
467    io.ifuPtrPlus2_w.value,
468    io.pfPtr_w.value,
469    io.pfPtrPlus1_w.value,
470    io.commPtrPlus1_w.value,
471    io.commPtr_w.value
472  ))
473
474  mem.io.raddr := raddr_vec
475
476  io.ifuPtr_rdata       := mem.io.rdata.dropRight(6).last
477  io.ifuPtrPlus1_rdata  := mem.io.rdata.dropRight(5).last
478  io.ifuPtrPlus2_rdata  := mem.io.rdata.dropRight(4).last
479  io.pfPtr_rdata        := mem.io.rdata.dropRight(3).last
480  io.pfPtrPlus1_rdata   := mem.io.rdata.dropRight(2).last
481  io.commPtrPlus1_rdata := mem.io.rdata.dropRight(1).last
482  io.commPtr_rdata      := mem.io.rdata.last
483}
484
485class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
486    with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents
487    with HasICacheParameters {
488  val io = IO(new Bundle {
489    val fromBpu     = Flipped(new BpuToFtqIO)
490    val fromIfu     = Flipped(new IfuToFtqIO)
491    val fromBackend = Flipped(new CtrlToFtqIO)
492
493    val toBpu       = new FtqToBpuIO
494    val toIfu       = new FtqToIfuIO
495    val toICache    = new FtqToICacheIO
496    val toBackend   = new FtqToCtrlIO
497    val toPrefetch  = new FtqToPrefetchIO
498    val icacheFlush = Output(Bool())
499
500    val bpuInfo = new Bundle {
501      val bpRight = Output(UInt(XLEN.W))
502      val bpWrong = Output(UInt(XLEN.W))
503    }
504
505    val mmioCommitRead = Flipped(new mmioCommitRead)
506
507    // for perf
508    val ControlBTBMissBubble = Output(Bool())
509    val TAGEMissBubble       = Output(Bool())
510    val SCMissBubble         = Output(Bool())
511    val ITTAGEMissBubble     = Output(Bool())
512    val RASMissBubble        = Output(Bool())
513  })
514  io.bpuInfo := DontCare
515
516  val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle))
517  // only driven by clock, not valid-ready
518  topdown_stage                  := io.fromBpu.resp.bits.topdown_info
519  io.toIfu.req.bits.topdown_info := topdown_stage
520
521  val ifuRedirected = RegInit(VecInit(Seq.fill(FtqSize)(false.B)))
522
523  // io.fromBackend.ftqIdxAhead: bju(BjuCnt) + ldReplay + exception
524  val ftqIdxAhead = VecInit(Seq.tabulate(FtqRedirectAheadNum)(i => io.fromBackend.ftqIdxAhead(i))) // only bju
525  val ftqIdxSelOH = io.fromBackend.ftqIdxSelOH.bits(FtqRedirectAheadNum - 1, 0)
526
527  val aheadValid         = ftqIdxAhead.map(_.valid).reduce(_ | _) && !io.fromBackend.redirect.valid
528  val realAhdValid       = io.fromBackend.redirect.valid && (ftqIdxSelOH > 0.U) && RegNext(aheadValid)
529  val backendRedirect    = Wire(Valid(new BranchPredictionRedirect))
530  val backendRedirectReg = Wire(Valid(new BranchPredictionRedirect))
531  backendRedirectReg.valid := RegNext(Mux(realAhdValid, false.B, backendRedirect.valid))
532  backendRedirectReg.bits  := RegEnable(backendRedirect.bits, backendRedirect.valid)
533  val fromBackendRedirect = Wire(Valid(new BranchPredictionRedirect))
534  fromBackendRedirect := Mux(realAhdValid, backendRedirect, backendRedirectReg)
535
536  val stage2Flush  = backendRedirect.valid
537  val backendFlush = stage2Flush || RegNext(stage2Flush)
538  val ifuFlush     = Wire(Bool())
539
540  val flush = stage2Flush || RegNext(stage2Flush)
541
542  val allowBpuIn, allowToIfu = WireInit(false.B)
543  val flushToIfu             = !allowToIfu
544  allowBpuIn := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
545  allowToIfu := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
546
547  def copyNum                                              = 5
548  val bpuPtr, ifuPtr, pfPtr, ifuWbPtr, commPtr, robCommPtr = RegInit(FtqPtr(false.B, 0.U))
549  val ifuPtrPlus1                                          = RegInit(FtqPtr(false.B, 1.U))
550  val ifuPtrPlus2                                          = RegInit(FtqPtr(false.B, 2.U))
551  val pfPtrPlus1                                           = RegInit(FtqPtr(false.B, 1.U))
552  val commPtrPlus1                                         = RegInit(FtqPtr(false.B, 1.U))
553  val copied_ifu_ptr                                       = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
554  val copied_bpu_ptr                                       = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
555  require(FtqSize >= 4)
556  val ifuPtr_write       = WireInit(ifuPtr)
557  val ifuPtrPlus1_write  = WireInit(ifuPtrPlus1)
558  val ifuPtrPlus2_write  = WireInit(ifuPtrPlus2)
559  val pfPtr_write        = WireInit(pfPtr)
560  val pfPtrPlus1_write   = WireInit(pfPtrPlus1)
561  val ifuWbPtr_write     = WireInit(ifuWbPtr)
562  val commPtr_write      = WireInit(commPtr)
563  val commPtrPlus1_write = WireInit(commPtrPlus1)
564  val robCommPtr_write   = WireInit(robCommPtr)
565  ifuPtr       := ifuPtr_write
566  ifuPtrPlus1  := ifuPtrPlus1_write
567  ifuPtrPlus2  := ifuPtrPlus2_write
568  pfPtr        := pfPtr_write
569  pfPtrPlus1   := pfPtrPlus1_write
570  ifuWbPtr     := ifuWbPtr_write
571  commPtr      := commPtr_write
572  commPtrPlus1 := commPtrPlus1_write
573  copied_ifu_ptr.map { ptr =>
574    ptr := ifuPtr_write
575    dontTouch(ptr)
576  }
577  robCommPtr := robCommPtr_write
578  val validEntries = distanceBetween(bpuPtr, commPtr)
579  val canCommit    = Wire(Bool())
580
581  // Instruction page fault and instruction access fault are sent from backend with redirect requests.
582  // When IPF and IAF are sent, backendPcFaultIfuPtr points to the FTQ entry whose first instruction
583  // raises IPF or IAF, which is ifuWbPtr_write or IfuPtr_write.
584  // Only when IFU has written back that FTQ entry can backendIpf and backendIaf be false because this
585  // makes sure that IAF and IPF are correctly raised instead of being flushed by redirect requests.
586  val backendException  = RegInit(ExceptionType.none)
587  val backendPcFaultPtr = RegInit(FtqPtr(false.B, 0.U))
588  when(fromBackendRedirect.valid) {
589    backendException := ExceptionType.fromOH(
590      has_pf = fromBackendRedirect.bits.cfiUpdate.backendIPF,
591      has_gpf = fromBackendRedirect.bits.cfiUpdate.backendIGPF,
592      has_af = fromBackendRedirect.bits.cfiUpdate.backendIAF
593    )
594    when(
595      fromBackendRedirect.bits.cfiUpdate.backendIPF || fromBackendRedirect.bits.cfiUpdate.backendIGPF ||
596        fromBackendRedirect.bits.cfiUpdate.backendIAF
597    ) {
598      backendPcFaultPtr := ifuWbPtr_write
599    }
600  }.elsewhen(ifuWbPtr =/= backendPcFaultPtr) {
601    backendException := ExceptionType.none
602  }
603
604  // **********************************************************************
605  // **************************** enq from bpu ****************************
606  // **********************************************************************
607  val new_entry_ready = validEntries < FtqSize.U || canCommit
608  io.fromBpu.resp.ready := new_entry_ready
609
610  val bpu_s2_resp     = io.fromBpu.resp.bits.s2
611  val bpu_s3_resp     = io.fromBpu.resp.bits.s3
612  val bpu_s2_redirect = bpu_s2_resp.valid(3) && bpu_s2_resp.hasRedirect(3)
613  val bpu_s3_redirect = bpu_s3_resp.valid(3) && bpu_s3_resp.hasRedirect(3)
614
615  io.toBpu.enq_ptr := bpuPtr
616  val enq_fire    = io.fromBpu.resp.fire && allowBpuIn // from bpu s1
617  val bpu_in_fire = (io.fromBpu.resp.fire || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
618
619  val bpu_in_resp     = io.fromBpu.resp.bits.selectedResp
620  val bpu_in_stage    = io.fromBpu.resp.bits.selectedRespIdxForFtq
621  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
622  val bpu_in_resp_idx = bpu_in_resp_ptr.value
623
624  // read ports:      pfReq1 + pfReq2 ++  ifuReq1 + ifuReq2 + ifuReq3 + commitUpdate2 + commitUpdate
625  val ftq_pc_mem = Module(new FtqPcMemWrapper(2))
626  // resp from uBTB
627  ftq_pc_mem.io.wen   := bpu_in_fire
628  ftq_pc_mem.io.waddr := bpu_in_resp_idx
629  ftq_pc_mem.io.wdata.fromBranchPrediction(bpu_in_resp)
630
631  //                                                            ifuRedirect + backendRedirect + commit
632  val ftq_redirect_mem = Module(new SyncDataModuleTemplate(
633    new Ftq_Redirect_SRAMEntry,
634    FtqSize,
635    IfuRedirectNum + FtqRedirectAheadNum + 1,
636    1,
637    hasRen = true
638  ))
639  // these info is intended to enq at the last stage of bpu
640  ftq_redirect_mem.io.wen(0)   := io.fromBpu.resp.bits.lastStage.valid(3)
641  ftq_redirect_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
642  ftq_redirect_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_spec_info
643  println(f"ftq redirect MEM: entry ${ftq_redirect_mem.io.wdata(0).getWidth} * ${FtqSize} * 3")
644
645  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
646  // these info is intended to enq at the last stage of bpu
647  ftq_meta_1r_sram.io.wen             := io.fromBpu.resp.bits.lastStage.valid(3)
648  ftq_meta_1r_sram.io.waddr           := io.fromBpu.resp.bits.lastStage.ftq_idx.value
649  ftq_meta_1r_sram.io.wdata.meta      := io.fromBpu.resp.bits.last_stage_meta
650  ftq_meta_1r_sram.io.wdata.ftb_entry := io.fromBpu.resp.bits.last_stage_ftb_entry
651  //                                                            ifuRedirect + backendRedirect (commit moved to ftq_meta_1r_sram)
652  val ftb_entry_mem = Module(new SyncDataModuleTemplate(
653    new FTBEntry_FtqMem,
654    FtqSize,
655    IfuRedirectNum + FtqRedirectAheadNum,
656    1,
657    hasRen = true
658  ))
659  ftb_entry_mem.io.wen(0)   := io.fromBpu.resp.bits.lastStage.valid(3)
660  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
661  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_ftb_entry
662
663  // multi-write
664  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) // could be taken target or fallThrough //TODO: remove this
665  val newest_entry_target          = Reg(UInt(VAddrBits.W))
666  val newest_entry_target_modified = RegInit(false.B)
667  val newest_entry_ptr             = Reg(new FtqPtr)
668  val newest_entry_ptr_modified    = RegInit(false.B)
669  val cfiIndex_vec                 = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
670  val mispredict_vec               = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
671  val pred_stage                   = Reg(Vec(FtqSize, UInt(2.W)))
672  val pred_s1_cycle                = if (!env.FPGAPlatform) Some(Reg(Vec(FtqSize, UInt(64.W)))) else None
673
674  val c_empty :: c_toCommit :: c_committed :: c_flushed :: Nil = Enum(4)
675  val commitStateQueueReg = RegInit(VecInit(Seq.fill(FtqSize) {
676    VecInit(Seq.fill(PredictWidth)(c_empty))
677  }))
678  val commitStateQueueEnable = WireInit(VecInit(Seq.fill(FtqSize)(false.B)))
679  val commitStateQueueNext   = WireInit(commitStateQueueReg)
680
681  for (f <- 0 until FtqSize) {
682    when(commitStateQueueEnable(f)) {
683      commitStateQueueReg(f) := commitStateQueueNext(f)
684    }
685  }
686
687  val f_to_send :: f_sent :: Nil = Enum(2)
688  val entry_fetch_status         = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
689
690  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
691  val entry_hit_status                         = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
692
693  // modify registers one cycle later to cut critical path
694  val last_cycle_bpu_in       = RegNext(bpu_in_fire)
695  val last_cycle_bpu_in_ptr   = RegEnable(bpu_in_resp_ptr, bpu_in_fire)
696  val last_cycle_bpu_in_idx   = last_cycle_bpu_in_ptr.value
697  val last_cycle_bpu_target   = RegEnable(bpu_in_resp.getTarget(3), bpu_in_fire)
698  val last_cycle_cfiIndex     = RegEnable(bpu_in_resp.cfiIndex(3), bpu_in_fire)
699  val last_cycle_bpu_in_stage = RegEnable(bpu_in_stage, bpu_in_fire)
700
701  def extra_copyNum_for_commitStateQueue = 2
702  val copied_last_cycle_bpu_in =
703    VecInit(Seq.fill(copyNum + extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_fire)))
704  val copied_last_cycle_bpu_in_ptr_for_ftq =
705    VecInit(Seq.fill(extra_copyNum_for_commitStateQueue)(RegEnable(bpu_in_resp_ptr, bpu_in_fire)))
706
707  newest_entry_target_modified := false.B
708  newest_entry_ptr_modified    := false.B
709  when(last_cycle_bpu_in) {
710    entry_fetch_status(last_cycle_bpu_in_idx) := f_to_send
711    cfiIndex_vec(last_cycle_bpu_in_idx)       := last_cycle_cfiIndex
712    pred_stage(last_cycle_bpu_in_idx)         := last_cycle_bpu_in_stage
713
714    update_target(last_cycle_bpu_in_idx) := last_cycle_bpu_target // TODO: remove this
715    newest_entry_target_modified         := true.B
716    newest_entry_target                  := last_cycle_bpu_target
717    newest_entry_ptr_modified            := true.B
718    newest_entry_ptr                     := last_cycle_bpu_in_ptr
719  }
720
721  // reduce fanout by delay write for a cycle
722  when(RegNext(last_cycle_bpu_in)) {
723    mispredict_vec(RegEnable(last_cycle_bpu_in_idx, last_cycle_bpu_in)) :=
724      WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
725  }
726
727  // record s1 pred cycles
728  pred_s1_cycle.map { vec =>
729    when(bpu_in_fire && (bpu_in_stage === BP_S1)) {
730      vec(bpu_in_resp_ptr.value) := bpu_in_resp.full_pred(0).predCycle.getOrElse(0.U)
731    }
732  }
733
734  // reduce fanout using copied last_cycle_bpu_in and copied last_cycle_bpu_in_ptr
735  val copied_last_cycle_bpu_in_for_ftq = copied_last_cycle_bpu_in.takeRight(extra_copyNum_for_commitStateQueue)
736  copied_last_cycle_bpu_in_for_ftq.zip(copied_last_cycle_bpu_in_ptr_for_ftq).zipWithIndex.map {
737    case ((in, ptr), i) =>
738      when(in) {
739        val perSetEntries = FtqSize / extra_copyNum_for_commitStateQueue // 32
740        require(FtqSize % extra_copyNum_for_commitStateQueue == 0)
741        for (j <- 0 until perSetEntries) {
742          when(ptr.value === (i * perSetEntries + j).U) {
743            commitStateQueueNext(i * perSetEntries + j) := VecInit(Seq.fill(PredictWidth)(c_empty))
744            // Clock gating optimization, use 1 gate cell to control a row
745            commitStateQueueEnable(i * perSetEntries + j) := true.B
746          }
747        }
748      }
749  }
750
751  bpuPtr := bpuPtr + enq_fire
752  copied_bpu_ptr.map(_ := bpuPtr + enq_fire)
753  when(io.toIfu.req.fire && allowToIfu) {
754    ifuPtr_write      := ifuPtrPlus1
755    ifuPtrPlus1_write := ifuPtrPlus2
756    ifuPtrPlus2_write := ifuPtrPlus2 + 1.U
757  }
758  when(io.toPrefetch.req.fire && allowToIfu) {
759    pfPtr_write      := pfPtrPlus1
760    pfPtrPlus1_write := pfPtrPlus1 + 1.U
761  }
762
763  // only use ftb result to assign hit status
764  when(bpu_s2_resp.valid(3)) {
765    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.full_pred(3).hit, h_hit, h_not_hit)
766  }
767
768  io.toIfu.flushFromBpu.s2.valid      := bpu_s2_redirect
769  io.toIfu.flushFromBpu.s2.bits       := bpu_s2_resp.ftq_idx
770  io.toPrefetch.flushFromBpu.s2.valid := bpu_s2_redirect
771  io.toPrefetch.flushFromBpu.s2.bits  := bpu_s2_resp.ftq_idx
772  when(bpu_s2_redirect) {
773    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
774    copied_bpu_ptr.map(_ := bpu_s2_resp.ftq_idx + 1.U)
775    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
776    when(!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
777      ifuPtr_write      := bpu_s2_resp.ftq_idx
778      ifuPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
779      ifuPtrPlus2_write := bpu_s2_resp.ftq_idx + 2.U
780    }
781    when(!isBefore(pfPtr, bpu_s2_resp.ftq_idx)) {
782      pfPtr_write      := bpu_s2_resp.ftq_idx
783      pfPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
784    }
785  }
786
787  io.toIfu.flushFromBpu.s3.valid      := bpu_s3_redirect
788  io.toIfu.flushFromBpu.s3.bits       := bpu_s3_resp.ftq_idx
789  io.toPrefetch.flushFromBpu.s3.valid := bpu_s3_redirect
790  io.toPrefetch.flushFromBpu.s3.bits  := bpu_s3_resp.ftq_idx
791  when(bpu_s3_redirect) {
792    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
793    copied_bpu_ptr.map(_ := bpu_s3_resp.ftq_idx + 1.U)
794    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
795    when(!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
796      ifuPtr_write      := bpu_s3_resp.ftq_idx
797      ifuPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
798      ifuPtrPlus2_write := bpu_s3_resp.ftq_idx + 2.U
799    }
800    when(!isBefore(pfPtr, bpu_s3_resp.ftq_idx)) {
801      pfPtr_write      := bpu_s3_resp.ftq_idx
802      pfPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
803    }
804  }
805
806  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
807  XSError(isBefore(bpuPtr, pfPtr) && !isFull(bpuPtr, pfPtr), "\npfPtr is before bpuPtr!\n")
808  XSError(isBefore(ifuWbPtr, commPtr) && !isFull(ifuWbPtr, commPtr), "\ncommPtr is before ifuWbPtr!\n")
809
810  (0 until copyNum).map(i => XSError(copied_bpu_ptr(i) =/= bpuPtr, "\ncopiedBpuPtr is different from bpuPtr!\n"))
811
812  // ****************************************************************
813  // **************************** to ifu ****************************
814  // ****************************************************************
815  // 0  for ifu, and 1-4 for ICache
816  val bpu_in_bypass_buf         = RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)
817  val copied_bpu_in_bypass_buf  = VecInit(Seq.fill(copyNum)(RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)))
818  val bpu_in_bypass_buf_for_ifu = bpu_in_bypass_buf
819  val bpu_in_bypass_ptr         = RegEnable(bpu_in_resp_ptr, bpu_in_fire)
820  val last_cycle_to_ifu_fire    = RegNext(io.toIfu.req.fire)
821  val last_cycle_to_pf_fire     = RegNext(io.toPrefetch.req.fire)
822
823  val copied_bpu_in_bypass_ptr      = VecInit(Seq.fill(copyNum)(RegEnable(bpu_in_resp_ptr, bpu_in_fire)))
824  val copied_last_cycle_to_ifu_fire = VecInit(Seq.fill(copyNum)(RegNext(io.toIfu.req.fire)))
825
826  // read pc and target
827  ftq_pc_mem.io.ifuPtr_w       := ifuPtr_write
828  ftq_pc_mem.io.ifuPtrPlus1_w  := ifuPtrPlus1_write
829  ftq_pc_mem.io.ifuPtrPlus2_w  := ifuPtrPlus2_write
830  ftq_pc_mem.io.pfPtr_w        := pfPtr_write
831  ftq_pc_mem.io.pfPtrPlus1_w   := pfPtrPlus1_write
832  ftq_pc_mem.io.commPtr_w      := commPtr_write
833  ftq_pc_mem.io.commPtrPlus1_w := commPtrPlus1_write
834
835  io.toIfu.req.bits.ftqIdx := ifuPtr
836
837  val toICachePcBundle               = Wire(Vec(copyNum, new Ftq_RF_Components))
838  val toICacheEntryToSend            = Wire(Vec(copyNum, Bool()))
839  val nextCycleToPrefetchPcBundle    = Wire(new Ftq_RF_Components)
840  val nextCycleToPrefetchEntryToSend = Wire(Bool())
841  val toPrefetchPcBundle             = RegNext(nextCycleToPrefetchPcBundle)
842  val toPrefetchEntryToSend          = RegNext(nextCycleToPrefetchEntryToSend)
843  val toIfuPcBundle                  = Wire(new Ftq_RF_Components)
844  val entry_is_to_send               = WireInit(entry_fetch_status(ifuPtr.value) === f_to_send)
845  val entry_ftq_offset               = WireInit(cfiIndex_vec(ifuPtr.value))
846  val entry_next_addr                = Wire(UInt(VAddrBits.W))
847
848  val pc_mem_ifu_ptr_rdata   = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtr_rdata)))
849  val pc_mem_ifu_plus1_rdata = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)))
850  val diff_entry_next_addr   = WireInit(update_target(ifuPtr.value)) // TODO: remove this
851
852  val copied_ifu_plus1_to_send = VecInit(Seq.fill(copyNum)(RegNext(
853    entry_fetch_status(ifuPtrPlus1.value) === f_to_send
854  ) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1)))
855  val copied_ifu_ptr_to_send = VecInit(Seq.fill(copyNum)(RegNext(
856    entry_fetch_status(ifuPtr.value) === f_to_send
857  ) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr)))
858
859  for (i <- 0 until copyNum) {
860    when(copied_last_cycle_bpu_in(i) && copied_bpu_in_bypass_ptr(i) === copied_ifu_ptr(i)) {
861      toICachePcBundle(i)    := copied_bpu_in_bypass_buf(i)
862      toICacheEntryToSend(i) := true.B
863    }.elsewhen(copied_last_cycle_to_ifu_fire(i)) {
864      toICachePcBundle(i)    := pc_mem_ifu_plus1_rdata(i)
865      toICacheEntryToSend(i) := copied_ifu_plus1_to_send(i)
866    }.otherwise {
867      toICachePcBundle(i)    := pc_mem_ifu_ptr_rdata(i)
868      toICacheEntryToSend(i) := copied_ifu_ptr_to_send(i)
869    }
870  }
871
872  // Calculate requests sent to prefetcher one cycle in advance to cut critical path
873  when(bpu_in_fire && bpu_in_resp_ptr === pfPtr_write) {
874    nextCycleToPrefetchPcBundle    := ftq_pc_mem.io.wdata
875    nextCycleToPrefetchEntryToSend := true.B
876  }.elsewhen(io.toPrefetch.req.fire) {
877    nextCycleToPrefetchPcBundle := ftq_pc_mem.io.pfPtrPlus1_rdata
878    nextCycleToPrefetchEntryToSend := entry_fetch_status(pfPtrPlus1.value) === f_to_send ||
879      last_cycle_bpu_in && bpu_in_bypass_ptr === pfPtrPlus1
880  }.otherwise {
881    nextCycleToPrefetchPcBundle := ftq_pc_mem.io.pfPtr_rdata
882    nextCycleToPrefetchEntryToSend := entry_fetch_status(pfPtr.value) === f_to_send ||
883      last_cycle_bpu_in && bpu_in_bypass_ptr === pfPtr // reduce potential bubbles
884  }
885
886  // TODO: reconsider target address bypass logic
887  when(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
888    toIfuPcBundle        := bpu_in_bypass_buf_for_ifu
889    entry_is_to_send     := true.B
890    entry_next_addr      := last_cycle_bpu_target
891    entry_ftq_offset     := last_cycle_cfiIndex
892    diff_entry_next_addr := last_cycle_bpu_target // TODO: remove this
893  }.elsewhen(last_cycle_to_ifu_fire) {
894    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)
895    entry_is_to_send := RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) ||
896      RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1) // reduce potential bubbles
897    entry_next_addr := Mux(
898      last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1,
899      bpu_in_bypass_buf_for_ifu.startAddr,
900      Mux(ifuPtr === newest_entry_ptr, newest_entry_target, RegNext(ftq_pc_mem.io.ifuPtrPlus2_rdata.startAddr))
901    ) // ifuPtr+2
902  }.otherwise {
903    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtr_rdata)
904    entry_is_to_send := RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) ||
905      RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) // reduce potential bubbles
906    entry_next_addr := Mux(
907      last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtrPlus1,
908      bpu_in_bypass_buf_for_ifu.startAddr,
909      Mux(ifuPtr === newest_entry_ptr, newest_entry_target, RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata.startAddr))
910    ) // ifuPtr+1
911  }
912
913  io.toIfu.req.valid              := entry_is_to_send && ifuPtr =/= bpuPtr
914  io.toIfu.req.bits.nextStartAddr := entry_next_addr
915  io.toIfu.req.bits.ftqOffset     := entry_ftq_offset
916  io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle)
917
918  io.toICache.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
919  io.toICache.req.bits.readValid.zipWithIndex.map { case (copy, i) =>
920    copy := toICacheEntryToSend(i) && copied_ifu_ptr(i) =/= copied_bpu_ptr(i)
921  }
922  io.toICache.req.bits.pcMemRead.zipWithIndex.foreach { case (copy, i) =>
923    copy.fromFtqPcBundle(toICachePcBundle(i))
924    copy.ftqIdx := ifuPtr
925  }
926  io.toICache.req.bits.backendException := ExceptionType.hasException(backendException) && backendPcFaultPtr === ifuPtr
927
928  io.toPrefetch.req.valid := toPrefetchEntryToSend && pfPtr =/= bpuPtr
929  io.toPrefetch.req.bits.fromFtqPcBundle(toPrefetchPcBundle)
930  io.toPrefetch.req.bits.ftqIdx  := pfPtr
931  io.toPrefetch.backendException := Mux(backendPcFaultPtr === pfPtr, backendException, ExceptionType.none)
932  // io.toICache.req.bits.bypassSelect := last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr
933  // io.toICache.req.bits.bpuBypassWrite.zipWithIndex.map{case(bypassWrtie, i) =>
934  //   bypassWrtie.startAddr := bpu_in_bypass_buf.tail(i).startAddr
935  //   bypassWrtie.nextlineStart := bpu_in_bypass_buf.tail(i).nextLineAddr
936  // }
937
938  // TODO: remove this
939  XSError(
940    io.toIfu.req.valid && diff_entry_next_addr =/= entry_next_addr,
941    p"\nifu_req_target wrong! ifuPtr: ${ifuPtr}, entry_next_addr: ${Hexadecimal(entry_next_addr)} diff_entry_next_addr: ${Hexadecimal(diff_entry_next_addr)}\n"
942  )
943
944  // when fall through is smaller in value than start address, there must be a false hit
945  when(toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
946    when(io.toIfu.req.fire &&
947      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
948      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)) {
949      entry_hit_status(ifuPtr.value) := h_false_hit
950      // XSError(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
951    }
952    XSDebug(
953      true.B,
954      "fallThruError! start:%x, fallThru:%x\n",
955      io.toIfu.req.bits.startAddr,
956      io.toIfu.req.bits.nextStartAddr
957    )
958  }
959
960  XSPerfAccumulate(
961    f"fall_through_error_to_ifu",
962    toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit &&
963      io.toIfu.req.fire && !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
964  )
965
966  val ifu_req_should_be_flushed =
967    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
968      io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
969
970  when(io.toIfu.req.fire && !ifu_req_should_be_flushed) {
971    entry_fetch_status(ifuPtr.value) := f_sent
972  }
973
974  // *********************************************************************
975  // **************************** wb from ifu ****************************
976  // *********************************************************************
977  val pdWb         = io.fromIfu.pdWb
978  val pds          = pdWb.bits.pd
979  val ifu_wb_valid = pdWb.valid
980  val ifu_wb_idx   = pdWb.bits.ftqIdx.value
981  // read ports:                                                         commit update
982  val ftq_pd_mem =
983    Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, FtqRedirectAheadNum + 1, 1, hasRen = true))
984  ftq_pd_mem.io.wen(0)   := ifu_wb_valid
985  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
986  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
987
988  val hit_pd_valid       = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
989  val hit_pd_mispred     = hit_pd_valid && pdWb.bits.misOffset.valid
990  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init = false.B)
991  val pd_reg             = RegEnable(pds, pdWb.valid)
992  val start_pc_reg       = RegEnable(pdWb.bits.pc(0), pdWb.valid)
993  val wb_idx_reg         = RegEnable(ifu_wb_idx, pdWb.valid)
994
995  when(ifu_wb_valid) {
996    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map {
997      case (v, inRange) => v && inRange
998    })
999    commitStateQueueEnable(ifu_wb_idx) := true.B
1000    (commitStateQueueNext(ifu_wb_idx) zip comm_stq_wen).map {
1001      case (qe, v) => when(v) {
1002          qe := c_toCommit
1003        }
1004    }
1005  }
1006
1007  when(ifu_wb_valid) {
1008    ifuWbPtr_write := ifuWbPtr + 1.U
1009  }
1010
1011  XSError(ifu_wb_valid && isAfter(pdWb.bits.ftqIdx, ifuPtr), "IFU returned a predecode before its req, check IFU")
1012
1013  ftb_entry_mem.io.ren.get.head := ifu_wb_valid
1014  ftb_entry_mem.io.raddr.head   := ifu_wb_idx
1015  val has_false_hit = WireInit(false.B)
1016  when(RegNext(hit_pd_valid)) {
1017    // check for false hit
1018    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
1019    val brSlots        = pred_ftb_entry.brSlots
1020    val tailSlot       = pred_ftb_entry.tailSlot
1021    // we check cfis that bpu predicted
1022
1023    // bpu predicted branches but denied by predecode
1024    val br_false_hit =
1025      brSlots.map {
1026        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
1027      }.reduce(_ || _) ||
1028        (tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
1029          !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
1030
1031    val jmpOffset = tailSlot.offset
1032    val jmp_pd    = pd_reg(jmpOffset)
1033    val jal_false_hit = pred_ftb_entry.jmpValid &&
1034      ((pred_ftb_entry.isJal && !(jmp_pd.valid && jmp_pd.isJal)) ||
1035        (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
1036        (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
1037        (pred_ftb_entry.isRet && !(jmp_pd.valid && jmp_pd.isRet)))
1038
1039    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
1040    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
1041
1042    // assert(!has_false_hit)
1043  }
1044
1045  when(has_false_hit) {
1046    entry_hit_status(wb_idx_reg) := h_false_hit
1047  }
1048
1049  // *******************************************************************************
1050  // **************************** redirect from backend ****************************
1051  // *******************************************************************************
1052
1053  // redirect read cfiInfo, couples to redirectGen s2
1054  // ftqIdxAhead(0-3) => ftq_redirect_mem(1-4), reuse ftq_redirect_mem(1)
1055  val ftq_redirect_rdata = Wire(Vec(FtqRedirectAheadNum, new Ftq_Redirect_SRAMEntry))
1056  val ftb_redirect_rdata = Wire(Vec(FtqRedirectAheadNum, new FTBEntry_FtqMem))
1057
1058  val ftq_pd_rdata = Wire(Vec(FtqRedirectAheadNum, new Ftq_pd_Entry))
1059  for (i <- 1 until FtqRedirectAheadNum) {
1060    ftq_redirect_mem.io.ren.get(i + IfuRedirectNum) := ftqIdxAhead(i).valid
1061    ftq_redirect_mem.io.raddr(i + IfuRedirectNum)   := ftqIdxAhead(i).bits.value
1062    ftb_entry_mem.io.ren.get(i + IfuRedirectNum)    := ftqIdxAhead(i).valid
1063    ftb_entry_mem.io.raddr(i + IfuRedirectNum)      := ftqIdxAhead(i).bits.value
1064
1065    ftq_pd_mem.io.ren.get(i) := ftqIdxAhead(i).valid
1066    ftq_pd_mem.io.raddr(i)   := ftqIdxAhead(i).bits.value
1067  }
1068  ftq_redirect_mem.io.ren.get(IfuRedirectNum) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
1069  ftq_redirect_mem.io.raddr(IfuRedirectNum) := Mux(
1070    aheadValid,
1071    ftqIdxAhead(0).bits.value,
1072    backendRedirect.bits.ftqIdx.value
1073  )
1074  ftb_entry_mem.io.ren.get(IfuRedirectNum) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
1075  ftb_entry_mem.io.raddr(IfuRedirectNum) := Mux(
1076    aheadValid,
1077    ftqIdxAhead(0).bits.value,
1078    backendRedirect.bits.ftqIdx.value
1079  )
1080
1081  ftq_pd_mem.io.ren.get(0) := Mux(aheadValid, ftqIdxAhead(0).valid, backendRedirect.valid)
1082  ftq_pd_mem.io.raddr(0)   := Mux(aheadValid, ftqIdxAhead(0).bits.value, backendRedirect.bits.ftqIdx.value)
1083
1084  for (i <- 0 until FtqRedirectAheadNum) {
1085    ftq_redirect_rdata(i) := ftq_redirect_mem.io.rdata(i + IfuRedirectNum)
1086    ftb_redirect_rdata(i) := ftb_entry_mem.io.rdata(i + IfuRedirectNum)
1087
1088    ftq_pd_rdata(i) := ftq_pd_mem.io.rdata(i)
1089  }
1090  val stage3CfiInfo =
1091    Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftq_redirect_rdata), ftq_redirect_mem.io.rdata(IfuRedirectNum))
1092  val stage3PdInfo       = Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftq_pd_rdata), ftq_pd_mem.io.rdata(0))
1093  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
1094  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
1095  backendRedirectCfi.pd := stage3PdInfo.toPd(fromBackendRedirect.bits.ftqOffset)
1096
1097  val r_ftb_entry = Mux(realAhdValid, Mux1H(ftqIdxSelOH, ftb_redirect_rdata), ftb_entry_mem.io.rdata(IfuRedirectNum))
1098  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
1099
1100  backendRedirectCfi.br_hit := r_ftb_entry.brIsSaved(r_ftqOffset)
1101  backendRedirectCfi.jr_hit := r_ftb_entry.isJalr && r_ftb_entry.tailSlot.offset === r_ftqOffset
1102  // FIXME: not portable
1103  val sc_disagree = stage3CfiInfo.sc_disagree.getOrElse(VecInit(Seq.fill(numBr)(false.B)))
1104  backendRedirectCfi.sc_hit := backendRedirectCfi.br_hit && Mux(
1105    r_ftb_entry.brSlots(0).offset === r_ftqOffset,
1106    sc_disagree(0),
1107    sc_disagree(1)
1108  )
1109
1110  when(entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
1111    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
1112      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
1113        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
1114
1115    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
1116      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
1117  }.otherwise {
1118    backendRedirectCfi.shift       := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
1119    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
1120  }
1121
1122  // ***************************************************************************
1123  // **************************** redirect from ifu ****************************
1124  // ***************************************************************************
1125  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
1126  fromIfuRedirect.valid              := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
1127  fromIfuRedirect.bits.ftqIdx        := pdWb.bits.ftqIdx
1128  fromIfuRedirect.bits.ftqOffset     := pdWb.bits.misOffset.bits
1129  fromIfuRedirect.bits.level         := RedirectLevel.flushAfter
1130  fromIfuRedirect.bits.BTBMissBubble := true.B
1131  fromIfuRedirect.bits.debugIsMemVio := false.B
1132  fromIfuRedirect.bits.debugIsCtrl   := false.B
1133
1134  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
1135  ifuRedirectCfiUpdate.pc        := pdWb.bits.pc(pdWb.bits.misOffset.bits)
1136  ifuRedirectCfiUpdate.pd        := pdWb.bits.pd(pdWb.bits.misOffset.bits)
1137  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
1138  ifuRedirectCfiUpdate.target    := pdWb.bits.target
1139  ifuRedirectCfiUpdate.taken     := pdWb.bits.cfiOffset.valid
1140  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
1141
1142  val ifuRedirectReg   = RegNextWithEnable(fromIfuRedirect, hasInit = true)
1143  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
1144  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
1145
1146  ftq_redirect_mem.io.ren.get.head := fromIfuRedirect.valid
1147  ftq_redirect_mem.io.raddr.head   := fromIfuRedirect.bits.ftqIdx.value
1148
1149  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
1150  toBpuCfi.fromFtqRedirectSram(ftq_redirect_mem.io.rdata.head)
1151  when(ifuRedirectReg.bits.cfiUpdate.pd.isRet && ifuRedirectReg.bits.cfiUpdate.pd.valid) {
1152    toBpuCfi.target := toBpuCfi.topAddr
1153  }
1154
1155  when(ifuRedirectReg.valid) {
1156    ifuRedirected(ifuRedirectReg.bits.ftqIdx.value) := true.B
1157  }.elsewhen(RegNext(pdWb.valid)) {
1158    // if pdWb and no redirect, set to false
1159    ifuRedirected(last_cycle_bpu_in_ptr.value) := false.B
1160  }
1161
1162  // **********************************************************************
1163  // ***************************** to backend *****************************
1164  // **********************************************************************
1165  // to backend pc mem / target
1166  io.toBackend.pc_mem_wen   := RegNext(last_cycle_bpu_in)
1167  io.toBackend.pc_mem_waddr := RegEnable(last_cycle_bpu_in_idx, last_cycle_bpu_in)
1168  io.toBackend.pc_mem_wdata := RegEnable(bpu_in_bypass_buf_for_ifu, last_cycle_bpu_in)
1169
1170  // num cycle is fixed
1171  val newest_entry_en: Bool = RegNext(last_cycle_bpu_in || backendRedirect.valid || ifuRedirectToBpu.valid)
1172  io.toBackend.newest_entry_en     := RegNext(newest_entry_en)
1173  io.toBackend.newest_entry_ptr    := RegEnable(newest_entry_ptr, newest_entry_en)
1174  io.toBackend.newest_entry_target := RegEnable(newest_entry_target, newest_entry_en)
1175
1176  // *********************************************************************
1177  // **************************** wb from exu ****************************
1178  // *********************************************************************
1179
1180  backendRedirect.valid := io.fromBackend.redirect.valid
1181  backendRedirect.bits.connectRedirect(io.fromBackend.redirect.bits)
1182  backendRedirect.bits.BTBMissBubble := false.B
1183
1184  def extractRedirectInfo(wb: Valid[Redirect]) = {
1185    val ftqPtr    = wb.bits.ftqIdx
1186    val ftqOffset = wb.bits.ftqOffset
1187    val taken     = wb.bits.cfiUpdate.taken
1188    val mispred   = wb.bits.cfiUpdate.isMisPred
1189    (wb.valid, ftqPtr, ftqOffset, taken, mispred)
1190  }
1191
1192  // fix mispredict entry
1193  val lastIsMispredict = RegNext(
1194    backendRedirect.valid && backendRedirect.bits.level === RedirectLevel.flushAfter,
1195    init = false.B
1196  )
1197
1198  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
1199    val (r_valid, r_ptr, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
1200    val r_idx                                          = r_ptr.value
1201    val cfiIndex_bits_wen                              = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
1202    val cfiIndex_valid_wen                             = r_valid && r_offset === cfiIndex_vec(r_idx).bits
1203    when(cfiIndex_bits_wen || cfiIndex_valid_wen) {
1204      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
1205    }.elsewhen(r_valid && !r_taken && r_offset =/= cfiIndex_vec(r_idx).bits) {
1206      cfiIndex_vec(r_idx).valid := false.B
1207    }
1208    when(cfiIndex_bits_wen) {
1209      cfiIndex_vec(r_idx).bits := r_offset
1210    }
1211    newest_entry_target_modified := true.B
1212    newest_entry_target          := redirect.bits.cfiUpdate.target
1213    newest_entry_ptr_modified    := true.B
1214    newest_entry_ptr             := r_ptr
1215
1216    update_target(r_idx) := redirect.bits.cfiUpdate.target // TODO: remove this
1217    if (isBackend) {
1218      mispredict_vec(r_idx)(r_offset) := r_mispred
1219    }
1220  }
1221
1222  when(fromBackendRedirect.valid) {
1223    updateCfiInfo(fromBackendRedirect)
1224  }.elsewhen(ifuRedirectToBpu.valid) {
1225    updateCfiInfo(ifuRedirectToBpu, isBackend = false)
1226  }
1227
1228  when(fromBackendRedirect.valid) {
1229    when(fromBackendRedirect.bits.ControlRedirectBubble) {
1230      when(fromBackendRedirect.bits.ControlBTBMissBubble) {
1231        topdown_stage.reasons(TopDownCounters.BTBMissBubble.id)                  := true.B
1232        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1233      }.elsewhen(fromBackendRedirect.bits.TAGEMissBubble) {
1234        topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id)                  := true.B
1235        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1236      }.elsewhen(fromBackendRedirect.bits.SCMissBubble) {
1237        topdown_stage.reasons(TopDownCounters.SCMissBubble.id)                  := true.B
1238        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.SCMissBubble.id) := true.B
1239      }.elsewhen(fromBackendRedirect.bits.ITTAGEMissBubble) {
1240        topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id)                  := true.B
1241        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1242      }.elsewhen(fromBackendRedirect.bits.RASMissBubble) {
1243        topdown_stage.reasons(TopDownCounters.RASMissBubble.id)                  := true.B
1244        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.RASMissBubble.id) := true.B
1245      }
1246
1247    }.elsewhen(backendRedirect.bits.MemVioRedirectBubble) {
1248      topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id)                  := true.B
1249      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1250    }.otherwise {
1251      topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id)                  := true.B
1252      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1253    }
1254  }.elsewhen(ifuRedirectReg.valid) {
1255    topdown_stage.reasons(TopDownCounters.BTBMissBubble.id)                  := true.B
1256    io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1257  }
1258
1259  io.ControlBTBMissBubble := fromBackendRedirect.bits.ControlBTBMissBubble
1260  io.TAGEMissBubble       := fromBackendRedirect.bits.TAGEMissBubble
1261  io.SCMissBubble         := fromBackendRedirect.bits.SCMissBubble
1262  io.ITTAGEMissBubble     := fromBackendRedirect.bits.ITTAGEMissBubble
1263  io.RASMissBubble        := fromBackendRedirect.bits.RASMissBubble
1264
1265  // ***********************************************************************************
1266  // **************************** flush ptr and state queue ****************************
1267  // ***********************************************************************************
1268
1269  val redirectVec = VecInit(backendRedirect, fromIfuRedirect)
1270
1271  // when redirect, we should reset ptrs and status queues
1272  io.icacheFlush := redirectVec.map(r => r.valid).reduce(_ || _)
1273  XSPerfAccumulate("icacheFlushFromBackend", backendRedirect.valid)
1274  XSPerfAccumulate("icacheFlushFromIFU", fromIfuRedirect.valid)
1275  when(redirectVec.map(r => r.valid).reduce(_ || _)) {
1276    val r                          = PriorityMux(redirectVec.map(r => r.valid -> r.bits))
1277    val notIfu                     = redirectVec.dropRight(1).map(r => r.valid).reduce(_ || _)
1278    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1279    val next                       = idx + 1.U
1280    bpuPtr := next
1281    copied_bpu_ptr.map(_ := next)
1282    ifuPtr_write      := next
1283    ifuWbPtr_write    := next
1284    ifuPtrPlus1_write := idx + 2.U
1285    ifuPtrPlus2_write := idx + 3.U
1286    pfPtr_write       := next
1287    pfPtrPlus1_write  := idx + 2.U
1288  }
1289  when(RegNext(redirectVec.map(r => r.valid).reduce(_ || _))) {
1290    val r                          = PriorityMux(redirectVec.map(r => r.valid -> r.bits))
1291    val notIfu                     = redirectVec.dropRight(1).map(r => r.valid).reduce(_ || _)
1292    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1293    when(RegNext(notIfu)) {
1294      commitStateQueueEnable(RegNext(idx.value)) := true.B
1295      commitStateQueueNext(RegNext(idx.value)).zipWithIndex.foreach { case (s, i) =>
1296        when(i.U > RegNext(offset)) {
1297          s := c_empty
1298        }
1299        when(i.U === RegNext(offset) && RegNext(flushItSelf)) {
1300          s := c_flushed
1301        }
1302      }
1303    }
1304  }
1305
1306  // only the valid bit is actually needed
1307  io.toIfu.redirect.bits    := backendRedirect.bits
1308  io.toIfu.redirect.valid   := stage2Flush
1309  io.toIfu.topdown_redirect := fromBackendRedirect
1310
1311  // commit
1312  for (c <- io.fromBackend.rob_commits) {
1313    when(c.valid) {
1314      commitStateQueueEnable(c.bits.ftqIdx.value)                 := true.B
1315      commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_committed
1316      // TODO: remove this
1317      // For instruction fusions, we also update the next instruction
1318      when(c.bits.commitType === 4.U) {
1319        commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_committed
1320      }.elsewhen(c.bits.commitType === 5.U) {
1321        commitStateQueueNext(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_committed
1322      }.elsewhen(c.bits.commitType === 6.U) {
1323        val index = (c.bits.ftqIdx + 1.U).value
1324        commitStateQueueEnable(index)  := true.B
1325        commitStateQueueNext(index)(0) := c_committed
1326      }.elsewhen(c.bits.commitType === 7.U) {
1327        val index = (c.bits.ftqIdx + 1.U).value
1328        commitStateQueueEnable(index)  := true.B
1329        commitStateQueueNext(index)(1) := c_committed
1330      }
1331    }
1332  }
1333
1334  // ****************************************************************
1335  // **************************** to bpu ****************************
1336  // ****************************************************************
1337
1338  io.toBpu.redirctFromIFU := ifuRedirectToBpu.valid
1339  io.toBpu.redirect       := Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
1340  val dummy_s1_pred_cycle_vec = VecInit(List.tabulate(FtqSize)(_ => 0.U(64.W)))
1341  val redirect_latency =
1342    GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(io.toBpu.redirect.bits.ftqIdx.value) + 1.U
1343  XSPerfHistogram("backend_redirect_latency", redirect_latency, fromBackendRedirect.valid, 0, 60, 1)
1344  XSPerfHistogram(
1345    "ifu_redirect_latency",
1346    redirect_latency,
1347    !fromBackendRedirect.valid && ifuRedirectToBpu.valid,
1348    0,
1349    60,
1350    1
1351  )
1352
1353  XSError(
1354    io.toBpu.redirect.valid && isBefore(io.toBpu.redirect.bits.ftqIdx, commPtr),
1355    "Ftq received a redirect after its commit, check backend or replay"
1356  )
1357
1358  val may_have_stall_from_bpu = Wire(Bool())
1359  val bpu_ftb_update_stall    = RegInit(0.U(2.W)) // 2-cycle stall, so we need 3 states
1360  may_have_stall_from_bpu := bpu_ftb_update_stall =/= 0.U
1361
1362  val validInstructions       = commitStateQueueReg(commPtr.value).map(s => s === c_toCommit || s === c_committed)
1363  val lastInstructionStatus   = PriorityMux(validInstructions.reverse.zip(commitStateQueueReg(commPtr.value).reverse))
1364  val firstInstructionFlushed = commitStateQueueReg(commPtr.value)(0) === c_flushed
1365  canCommit := commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1366    (isAfter(robCommPtr, commPtr) ||
1367      validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed)
1368  val canMoveCommPtr = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1369    (isAfter(robCommPtr, commPtr) ||
1370      validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed ||
1371      firstInstructionFlushed)
1372
1373  when(io.fromBackend.rob_commits.map(_.valid).reduce(_ | _)) {
1374    robCommPtr_write := ParallelPriorityMux(
1375      io.fromBackend.rob_commits.map(_.valid).reverse,
1376      io.fromBackend.rob_commits.map(_.bits.ftqIdx).reverse
1377    )
1378  }.elsewhen(isAfter(commPtr, robCommPtr)) {
1379    robCommPtr_write := commPtr
1380  }.otherwise {
1381    robCommPtr_write := robCommPtr
1382  }
1383
1384  /**
1385    *************************************************************************************
1386    * MMIO instruction fetch is allowed only if MMIO is the oldest instruction.
1387    *************************************************************************************
1388    */
1389  val mmioReadPtr = io.mmioCommitRead.mmioFtqPtr
1390  val mmioLastCommit = isAfter(commPtr, mmioReadPtr) ||
1391    commPtr === mmioReadPtr && validInstructions.reduce(_ || _) && lastInstructionStatus === c_committed
1392  io.mmioCommitRead.mmioLastCommit := RegNext(mmioLastCommit)
1393
1394  // commit reads
1395  val commit_pc_bundle = RegNext(ftq_pc_mem.io.commPtr_rdata)
1396  val commit_target =
1397    Mux(
1398      RegNext(commPtr === newest_entry_ptr),
1399      RegEnable(newest_entry_target, newest_entry_target_modified),
1400      RegNext(ftq_pc_mem.io.commPtrPlus1_rdata.startAddr)
1401    )
1402  ftq_pd_mem.io.ren.get.last := canCommit
1403  ftq_pd_mem.io.raddr.last   := commPtr.value
1404  val commit_pd = ftq_pd_mem.io.rdata.last
1405  ftq_redirect_mem.io.ren.get.last := canCommit
1406  ftq_redirect_mem.io.raddr.last   := commPtr.value
1407  val commit_spec_meta = ftq_redirect_mem.io.rdata.last
1408  ftq_meta_1r_sram.io.ren(0)   := canCommit
1409  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
1410  val commit_meta      = ftq_meta_1r_sram.io.rdata(0).meta
1411  val commit_ftb_entry = ftq_meta_1r_sram.io.rdata(0).ftb_entry
1412
1413  // need one cycle to read mem and srams
1414  val do_commit_ptr = RegEnable(commPtr, canCommit)
1415  val do_commit     = RegNext(canCommit, init = false.B)
1416  when(canMoveCommPtr) {
1417    commPtr_write      := commPtrPlus1
1418    commPtrPlus1_write := commPtrPlus1 + 1.U
1419  }
1420  val commit_state   = RegEnable(commitStateQueueReg(commPtr.value), canCommit)
1421  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
1422  val do_commit_cfi  = WireInit(cfiIndex_vec(do_commit_ptr.value))
1423  //
1424  // when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
1425  //  can_commit_cfi.valid := false.B
1426  // }
1427  val commit_cfi = RegEnable(can_commit_cfi, canCommit)
1428  val debug_cfi  = commitStateQueueReg(do_commit_ptr.value)(do_commit_cfi.bits) =/= c_committed && do_commit_cfi.valid
1429
1430  val commit_mispredict: Vec[Bool] =
1431    VecInit((RegEnable(mispredict_vec(commPtr.value), canCommit) zip commit_state).map {
1432      case (mis, state) => mis && state === c_committed
1433    })
1434  val commit_instCommited: Vec[Bool] = VecInit(commit_state.map(_ === c_committed)) // [PredictWidth]
1435  val can_commit_hit     = entry_hit_status(commPtr.value)
1436  val commit_hit         = RegEnable(can_commit_hit, canCommit)
1437  val diff_commit_target = RegEnable(update_target(commPtr.value), canCommit) // TODO: remove this
1438  val commit_stage       = RegEnable(pred_stage(commPtr.value), canCommit)
1439  val commit_valid       = commit_hit === h_hit || commit_cfi.valid           // hit or taken
1440
1441  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
1442  switch(bpu_ftb_update_stall) {
1443    is(0.U) {
1444      when(can_commit_cfi.valid && !to_bpu_hit && canCommit) {
1445        bpu_ftb_update_stall := 2.U // 2-cycle stall
1446      }
1447    }
1448    is(2.U) {
1449      bpu_ftb_update_stall := 1.U
1450    }
1451    is(1.U) {
1452      bpu_ftb_update_stall := 0.U
1453    }
1454    is(3.U) {
1455      XSError(true.B, "bpu_ftb_update_stall should be 0, 1 or 2")
1456    }
1457  }
1458
1459  // TODO: remove this
1460  XSError(do_commit && diff_commit_target =/= commit_target, "\ncommit target should be the same as update target\n")
1461
1462  // update latency stats
1463  val update_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(do_commit_ptr.value) + 1.U
1464  XSPerfHistogram("bpu_update_latency", update_latency, io.toBpu.update.valid, 0, 64, 2)
1465
1466  io.toBpu.update       := DontCare
1467  io.toBpu.update.valid := commit_valid && do_commit
1468  val update = io.toBpu.update.bits
1469  update.false_hit   := commit_hit === h_false_hit
1470  update.pc          := commit_pc_bundle.startAddr
1471  update.meta        := commit_meta
1472  update.cfi_idx     := commit_cfi
1473  update.full_target := commit_target
1474  update.from_stage  := commit_stage
1475  update.spec_info   := commit_spec_meta
1476  XSError(commit_valid && do_commit && debug_cfi, "\ncommit cfi can be non c_commited\n")
1477
1478  val commit_real_hit  = commit_hit === h_hit
1479  val update_ftb_entry = update.ftb_entry
1480
1481  val ftbEntryGen = Module(new FTBEntryGen).io
1482  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
1483  ftbEntryGen.old_entry      := commit_ftb_entry
1484  ftbEntryGen.pd             := commit_pd
1485  ftbEntryGen.cfiIndex       := commit_cfi
1486  ftbEntryGen.target         := commit_target
1487  ftbEntryGen.hit            := commit_real_hit
1488  ftbEntryGen.mispredict_vec := commit_mispredict
1489
1490  update_ftb_entry         := ftbEntryGen.new_entry
1491  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
1492  update.mispred_mask      := ftbEntryGen.mispred_mask
1493  update.old_entry         := ftbEntryGen.is_old_entry
1494  update.pred_hit          := commit_hit === h_hit || commit_hit === h_false_hit
1495  update.br_taken_mask     := ftbEntryGen.taken_mask
1496  update.br_committed := (ftbEntryGen.new_entry.brValids zip ftbEntryGen.new_entry.brOffset) map {
1497    case (valid, offset) => valid && commit_instCommited(offset)
1498  }
1499  update.jmp_taken := ftbEntryGen.jmp_taken
1500
1501  // update.full_pred.fromFtbEntry(ftbEntryGen.new_entry, update.pc)
1502  // update.full_pred.jalr_target := commit_target
1503  // update.full_pred.hit := true.B
1504  // when (update.full_pred.is_jalr) {
1505  //   update.full_pred.targets.last := commit_target
1506  // }
1507
1508  // ******************************************************************************
1509  // **************************** commit perf counters ****************************
1510  // ******************************************************************************
1511
1512  val commit_inst_mask        = VecInit(commit_state.map(c => c === c_committed && do_commit)).asUInt
1513  val commit_mispred_mask     = commit_mispredict.asUInt
1514  val commit_not_mispred_mask = ~commit_mispred_mask
1515
1516  val commit_br_mask  = commit_pd.brMask.asUInt
1517  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
1518  val commit_cfi_mask = commit_br_mask | commit_jmp_mask
1519
1520  val mbpInstrs = commit_inst_mask & commit_cfi_mask
1521
1522  val mbpRights = mbpInstrs & commit_not_mispred_mask
1523  val mbpWrongs = mbpInstrs & commit_mispred_mask
1524
1525  io.bpuInfo.bpRight := PopCount(mbpRights)
1526  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
1527
1528  val hartId           = p(XSCoreParamsKey).HartId
1529  val isWriteFTQTable  = Constantin.createRecord(s"isWriteFTQTable$hartId")
1530  val ftqBranchTraceDB = ChiselDB.createTable(s"FTQTable$hartId", new FtqDebugBundle)
1531  // Cfi Info
1532  for (i <- 0 until PredictWidth) {
1533    val pc      = commit_pc_bundle.startAddr + (i * instBytes).U
1534    val v       = commit_state(i) === c_committed
1535    val isBr    = commit_pd.brMask(i)
1536    val isJmp   = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
1537    val isCfi   = isBr || isJmp
1538    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
1539    val misPred = commit_mispredict(i)
1540    // val ghist = commit_spec_meta.ghist.predHist
1541    val histPtr   = commit_spec_meta.histPtr
1542    val predCycle = commit_meta(63, 0)
1543    val target    = commit_target
1544
1545    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map { case (v, offset) =>
1546      v && offset === i.U
1547    })))
1548    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map { case (v, offset) =>
1549      v && offset === i.U
1550    }.reduce(_ || _)
1551    val addIntoHist =
1552      ((commit_hit === h_hit) && inFtbEntry) || (!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid)
1553    XSDebug(
1554      v && do_commit && isCfi,
1555      p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
1556        p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
1557        p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
1558        p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n"
1559    )
1560
1561    val logbundle = Wire(new FtqDebugBundle)
1562    logbundle.pc        := pc
1563    logbundle.target    := target
1564    logbundle.isBr      := isBr
1565    logbundle.isJmp     := isJmp
1566    logbundle.isCall    := isJmp && commit_pd.hasCall
1567    logbundle.isRet     := isJmp && commit_pd.hasRet
1568    logbundle.misPred   := misPred
1569    logbundle.isTaken   := isTaken
1570    logbundle.predStage := commit_stage
1571
1572    ftqBranchTraceDB.log(
1573      data = logbundle /* hardware of type T */,
1574      en = isWriteFTQTable.orR && v && do_commit && isCfi,
1575      site = "FTQ" + p(XSCoreParamsKey).HartId.toString,
1576      clock = clock,
1577      reset = reset
1578    )
1579  }
1580
1581  val enq           = io.fromBpu.resp
1582  val perf_redirect = backendRedirect
1583
1584  XSPerfAccumulate("entry", validEntries)
1585  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1586  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1587  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1588  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1589
1590  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1591
1592  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1593  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1594  XSPerfAccumulate("bpu_to_ifu_bubble", bpuPtr === ifuPtr)
1595  XSPerfAccumulate(
1596    "bpu_to_ifu_bubble_when_ftq_full",
1597    (bpuPtr === ifuPtr) && isFull(bpuPtr, commPtr) && io.toIfu.req.ready
1598  )
1599
1600  XSPerfAccumulate("redirectAhead_ValidNum", ftqIdxAhead.map(_.valid).reduce(_ | _))
1601  XSPerfAccumulate("fromBackendRedirect_ValidNum", io.fromBackend.redirect.valid)
1602  XSPerfAccumulate("toBpuRedirect_ValidNum", io.toBpu.redirect.valid)
1603
1604  val from_bpu = io.fromBpu.resp.bits
1605  val to_ifu   = io.toIfu.req.bits
1606
1607  XSPerfHistogram("commit_num_inst", PopCount(commit_inst_mask), do_commit, 0, PredictWidth + 1, 1)
1608
1609  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1610  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1611  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1612  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1613
1614  val mbpBRights = mbpRights & commit_br_mask
1615  val mbpJRights = mbpRights & commit_jal_mask
1616  val mbpIRights = mbpRights & commit_jalr_mask
1617  val mbpCRights = mbpRights & commit_call_mask
1618  val mbpRRights = mbpRights & commit_ret_mask
1619
1620  val mbpBWrongs = mbpWrongs & commit_br_mask
1621  val mbpJWrongs = mbpWrongs & commit_jal_mask
1622  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1623  val mbpCWrongs = mbpWrongs & commit_call_mask
1624  val mbpRWrongs = mbpWrongs & commit_ret_mask
1625
1626  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1627
1628  def pred_stage_map(src: UInt, name: String) =
1629    (0 until numBpStages).map(i =>
1630      f"${name}_stage_${i + 1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1631    ).foldLeft(Map[String, UInt]())(_ + _)
1632
1633  val mispred_stage_map      = pred_stage_map(mbpWrongs, "mispredict")
1634  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1635  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1636  val correct_stage_map      = pred_stage_map(mbpRights, "correct")
1637  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1638  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1639
1640  val update_valid = io.toBpu.update.valid
1641  def u(cond: Bool) = update_valid && cond
1642  val ftb_false_hit = u(update.false_hit)
1643  // assert(!ftb_false_hit)
1644  val ftb_hit = u(commit_hit === h_hit)
1645
1646  val ftb_new_entry                = u(ftbEntryGen.is_init_entry)
1647  val ftb_new_entry_only_br        = ftb_new_entry && !update_ftb_entry.jmpValid
1648  val ftb_new_entry_only_jmp       = ftb_new_entry && !update_ftb_entry.brValids(0)
1649  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update_ftb_entry.brValids(0) && update_ftb_entry.jmpValid
1650
1651  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1652
1653  val ftb_modified_entry =
1654    u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_strong_bias_modified)
1655  val ftb_modified_entry_new_br               = u(ftbEntryGen.is_new_br)
1656  val ftb_modified_entry_ifu_redirected       = u(ifuRedirected(do_commit_ptr.value))
1657  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1658  val ftb_modified_entry_br_full              = ftb_modified_entry && ftbEntryGen.is_br_full
1659  val ftb_modified_entry_strong_bias          = ftb_modified_entry && ftbEntryGen.is_strong_bias_modified
1660
1661  def getFtbEntryLen(pc: UInt, entry: FTBEntry) = (entry.getFallThrough(pc) - pc) >> instOffsetBits
1662  val gen_ftb_entry_len = getFtbEntryLen(update.pc, ftbEntryGen.new_entry)
1663  XSPerfHistogram("ftb_init_entry_len", gen_ftb_entry_len, ftb_new_entry, 0, PredictWidth + 1, 1)
1664  XSPerfHistogram("ftb_modified_entry_len", gen_ftb_entry_len, ftb_modified_entry, 0, PredictWidth + 1, 1)
1665  val s3_ftb_entry_len = getFtbEntryLen(from_bpu.s3.pc(0), from_bpu.last_stage_ftb_entry)
1666  XSPerfHistogram("s3_ftb_entry_len", s3_ftb_entry_len, from_bpu.s3.valid(0), 0, PredictWidth + 1, 1)
1667
1668  XSPerfHistogram("ftq_has_entry", validEntries, true.B, 0, FtqSize + 1, 1)
1669
1670  val perfCountsMap = Map(
1671    "BpInstr"                        -> PopCount(mbpInstrs),
1672    "BpBInstr"                       -> PopCount(mbpBRights | mbpBWrongs),
1673    "BpRight"                        -> PopCount(mbpRights),
1674    "BpWrong"                        -> PopCount(mbpWrongs),
1675    "BpBRight"                       -> PopCount(mbpBRights),
1676    "BpBWrong"                       -> PopCount(mbpBWrongs),
1677    "BpJRight"                       -> PopCount(mbpJRights),
1678    "BpJWrong"                       -> PopCount(mbpJWrongs),
1679    "BpIRight"                       -> PopCount(mbpIRights),
1680    "BpIWrong"                       -> PopCount(mbpIWrongs),
1681    "BpCRight"                       -> PopCount(mbpCRights),
1682    "BpCWrong"                       -> PopCount(mbpCWrongs),
1683    "BpRRight"                       -> PopCount(mbpRRights),
1684    "BpRWrong"                       -> PopCount(mbpRWrongs),
1685    "ftb_false_hit"                  -> PopCount(ftb_false_hit),
1686    "ftb_hit"                        -> PopCount(ftb_hit),
1687    "ftb_new_entry"                  -> PopCount(ftb_new_entry),
1688    "ftb_new_entry_only_br"          -> PopCount(ftb_new_entry_only_br),
1689    "ftb_new_entry_only_jmp"         -> PopCount(ftb_new_entry_only_jmp),
1690    "ftb_new_entry_has_br_and_jmp"   -> PopCount(ftb_new_entry_has_br_and_jmp),
1691    "ftb_old_entry"                  -> PopCount(ftb_old_entry),
1692    "ftb_modified_entry"             -> PopCount(ftb_modified_entry),
1693    "ftb_modified_entry_new_br"      -> PopCount(ftb_modified_entry_new_br),
1694    "ftb_jalr_target_modified"       -> PopCount(ftb_modified_entry_jalr_target_modified),
1695    "ftb_modified_entry_br_full"     -> PopCount(ftb_modified_entry_br_full),
1696    "ftb_modified_entry_strong_bias" -> PopCount(ftb_modified_entry_strong_bias)
1697  ) ++ mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1698    correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1699
1700  for ((key, value) <- perfCountsMap) {
1701    XSPerfAccumulate(key, value)
1702  }
1703
1704  // --------------------------- Debug --------------------------------
1705  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1706  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1707  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1708  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1709  XSDebug(
1710    true.B,
1711    p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1712      p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n"
1713  )
1714  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1715
1716  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1717  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1718  //       case (((valid, pd), ans), taken) =>
1719  //       Mux(valid && pd.isBr,
1720  //         isWrong ^ Mux(ans.hit.asBool,
1721  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1722  //           !taken),
1723  //         !taken),
1724  //       false.B)
1725  //     }
1726  //   }
1727
1728  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1729  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1730  //       case (((valid, pd), ans), taken) =>
1731  //       Mux(valid && pd.isBr,
1732  //         isWrong ^ Mux(ans.hit.asBool,
1733  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1734  //           !taken),
1735  //         !taken),
1736  //       false.B)
1737  //     }
1738  //   }
1739
1740  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1741  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1742  //       case (((valid, pd), ans), taken) =>
1743  //       Mux(valid && pd.isBr,
1744  //         isWrong ^ (ans.taken.asBool === taken),
1745  //       false.B)
1746  //     }
1747  //   }
1748
1749  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1750  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1751  //       case (((valid, pd), ans), taken) =>
1752  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1753  //         isWrong ^ (!taken),
1754  //           false.B)
1755  //     }
1756  //   }
1757
1758  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1759  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1760  //       case (((valid, pd), ans), taken) =>
1761  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1762  //         isWrong ^ (ans.target === commitEntry.target),
1763  //           false.B)
1764  //     }
1765  //   }
1766
1767  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1768  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1769  //   // btb and ubtb pred jal and jalr as well
1770  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1771  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1772  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1773  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1774
1775  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1776  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1777
1778  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1779  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1780
1781  val perfEvents = Seq(
1782    ("bpu_s2_redirect        ", bpu_s2_redirect),
1783    ("bpu_s3_redirect        ", bpu_s3_redirect),
1784    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready),
1785    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1786    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)),
1787    ("predecodeRedirect      ", fromIfuRedirect.valid),
1788    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid),
1789    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn),
1790    ("BpInstr                ", PopCount(mbpInstrs)),
1791    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)),
1792    ("BpRight                ", PopCount(mbpRights)),
1793    ("BpWrong                ", PopCount(mbpWrongs)),
1794    ("BpBRight               ", PopCount(mbpBRights)),
1795    ("BpBWrong               ", PopCount(mbpBWrongs)),
1796    ("BpJRight               ", PopCount(mbpJRights)),
1797    ("BpJWrong               ", PopCount(mbpJWrongs)),
1798    ("BpIRight               ", PopCount(mbpIRights)),
1799    ("BpIWrong               ", PopCount(mbpIWrongs)),
1800    ("BpCRight               ", PopCount(mbpCRights)),
1801    ("BpCWrong               ", PopCount(mbpCWrongs)),
1802    ("BpRRight               ", PopCount(mbpRRights)),
1803    ("BpRWrong               ", PopCount(mbpRWrongs)),
1804    ("ftb_false_hit          ", PopCount(ftb_false_hit)),
1805    ("ftb_hit                ", PopCount(ftb_hit))
1806  )
1807  generatePerfEvent()
1808}
1809