xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision dcbc69cb2a7ea07707ede3d8f7c74421ef450202)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils.{AsyncDataModuleTemplate, CircularQueuePtr, DataModuleTemplate, HasCircularQueuePtrHelper, SRAMTemplate, SyncDataModuleTemplate, XSDebug, XSPerfAccumulate, PerfBundle, PerfEventsBundle, XSError}
23import xiangshan._
24import scala.tools.nsc.doc.model.Val
25import utils.{ParallelPriorityMux, ParallelPriorityEncoder}
26import xiangshan.backend.{CtrlToFtqIO}
27import firrtl.annotations.MemoryLoadFileType
28
29class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr](
30  p => p(XSCoreParamsKey).FtqSize
31){
32  override def cloneType = (new FtqPtr).asInstanceOf[this.type]
33}
34
35object FtqPtr {
36  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
37    val ptr = Wire(new FtqPtr)
38    ptr.flag := f
39    ptr.value := v
40    ptr
41  }
42  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
43    apply(!ptr.flag, ptr.value)
44  }
45}
46
47class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
48
49  val io = IO(new Bundle() {
50    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
51    val ren = Input(Vec(numRead, Bool()))
52    val rdata = Output(Vec(numRead, gen))
53    val waddr = Input(UInt(log2Up(FtqSize).W))
54    val wen = Input(Bool())
55    val wdata = Input(gen)
56  })
57
58  for(i <- 0 until numRead){
59    val sram = Module(new SRAMTemplate(gen, FtqSize))
60    sram.io.r.req.valid := io.ren(i)
61    sram.io.r.req.bits.setIdx := io.raddr(i)
62    io.rdata(i) := sram.io.r.resp.data(0)
63    sram.io.w.req.valid := io.wen
64    sram.io.w.req.bits.setIdx := io.waddr
65    sram.io.w.req.bits.data := VecInit(io.wdata)
66  }
67
68}
69
70class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
71  // TODO: move pftAddr, oversize, carry to another mem
72  val startAddr = UInt(VAddrBits.W)
73  val nextRangeAddr = UInt(VAddrBits.W)
74  val pftAddr = UInt((log2Ceil(PredictWidth)+1).W)
75  val isNextMask = Vec(PredictWidth, Bool())
76  val oversize = Bool()
77  val carry = Bool()
78  def getPc(offset: UInt) = {
79    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1)
80    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits)
81    Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextRangeAddr, startAddr)),
82        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
83  }
84  def getFallThrough() = {
85    def getHigher(pc: UInt) = pc.head(VAddrBits-log2Ceil(PredictWidth)-instOffsetBits-1)
86    val startHigher = getHigher(startAddr)
87    val nextHigher  = getHigher(nextRangeAddr)
88    val higher = Mux(carry, nextHigher, startHigher)
89    Cat(higher, pftAddr, 0.U(instOffsetBits.W))
90  }
91  def fallThroughError() = {
92    val startLower        = Cat(0.U(1.W), startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits))
93    val endLowerwithCarry = Cat(carry,    pftAddr)
94    require(startLower.getWidth == log2Ceil(PredictWidth)+2)
95    require(endLowerwithCarry.getWidth == log2Ceil(PredictWidth)+2)
96    startLower >= endLowerwithCarry || (endLowerwithCarry - startLower) > (PredictWidth+1).U
97  }
98  def fromBranchPrediction(resp: BranchPredictionBundle) = {
99    this.startAddr := resp.pc
100    this.nextRangeAddr := resp.pc + (FetchWidth * 4 * 2).U
101    this.pftAddr :=
102      Mux(resp.preds.hit, resp.ftb_entry.pftAddr,
103        resp.pc(instOffsetBits + log2Ceil(PredictWidth), instOffsetBits) ^ (1 << log2Ceil(PredictWidth)).U)
104    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
105      (resp.pc(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool()
106    ))
107    this.oversize := Mux(resp.preds.hit, resp.ftb_entry.oversize, false.B)
108    this.carry := Mux(resp.preds.hit, resp.ftb_entry.carry, resp.pc(instOffsetBits + log2Ceil(PredictWidth)).asBool)
109    this
110  }
111  override def toPrintable: Printable = {
112    p"startAddr:${Hexadecimal(startAddr)}, fallThru:${Hexadecimal(getFallThrough())}"
113  }
114}
115
116class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
117  val brMask = Vec(PredictWidth, Bool())
118  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
119  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
120  val jalTarget = UInt(VAddrBits.W)
121  val rvcMask = Vec(PredictWidth, Bool())
122  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
123  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
124  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
125  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
126
127  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
128    val pds = pdWb.pd
129    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
130    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
131    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
132                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
133    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
134    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
135    this.jalTarget := pdWb.jalTarget
136  }
137
138  def toPd(offset: UInt) = {
139    require(offset.getWidth == log2Ceil(PredictWidth))
140    val pd = Wire(new PreDecodeInfo)
141    pd.valid := true.B
142    pd.isRVC := rvcMask(offset)
143    val isBr = brMask(offset)
144    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
145    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
146    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
147    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
148    pd
149  }
150}
151
152
153
154class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
155  val rasSp = UInt(log2Ceil(RasSize).W)
156  val rasEntry = new RASEntry
157  val specCnt = Vec(numBr, UInt(10.W))
158  // val ghist = new ShiftingGlobalHistory
159  val folded_hist = new AllFoldedHistories(foldedGHistInfos)
160  val histPtr = new CGHPtr
161  val phist = UInt(PathHistoryLength.W)
162  val phNewBit = UInt(1.W)
163
164  def fromBranchPrediction(resp: BranchPredictionBundle) = {
165    this.rasSp := resp.rasSp
166    this.rasEntry := resp.rasTop
167    this.specCnt := resp.specCnt
168    // this.ghist := resp.ghist
169    this.folded_hist := resp.folded_hist
170    this.histPtr := resp.histPtr
171    this.phist := resp.phist
172    this.phNewBit := resp.pc(instOffsetBits)
173    this
174  }
175}
176
177class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
178  val meta = UInt(MaxMetaLength.W)
179}
180
181class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
182  val target = UInt(VAddrBits.W)
183  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
184}
185
186// class FtqEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
187//   val startAddr = UInt(VAddrBits.W)
188//   val fallThruAddr = UInt(VAddrBits.W)
189//   val isNextMask = Vec(PredictWidth, Bool())
190
191//   val meta = UInt(MaxMetaLength.W)
192
193//   val rasSp = UInt(log2Ceil(RasSize).W)
194//   val rasEntry = new RASEntry
195//   val hist = new ShiftingGlobalHistory
196//   val specCnt = Vec(numBr, UInt(10.W))
197
198//   val valids = Vec(PredictWidth, Bool())
199//   val brMask = Vec(PredictWidth, Bool())
200//   // isJalr, isCall, isRet
201//   val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
202//   val jmpOffset = UInt(log2Ceil(PredictWidth).W)
203
204//   val mispredVec = Vec(PredictWidth, Bool())
205//   val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
206//   val target = UInt(VAddrBits.W)
207// }
208
209class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
210  val ptr = Output(new FtqPtr)
211  val offset = Output(UInt(log2Ceil(PredictWidth).W))
212  val data = Input(gen)
213  def apply(ptr: FtqPtr, offset: UInt) = {
214    this.ptr := ptr
215    this.offset := offset
216    this.data
217  }
218  override def cloneType = (new FtqRead(gen)).asInstanceOf[this.type]
219}
220
221
222class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
223  val redirect = Valid(new BranchPredictionRedirect)
224  val update = Valid(new BranchPredictionUpdate)
225  val enq_ptr = Output(new FtqPtr)
226}
227
228class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
229  val req = Decoupled(new FetchRequestBundle)
230  val redirect = Valid(new Redirect)
231  val flushFromBpu = new Bundle {
232    // when ifu pipeline is not stalled,
233    // a packet from bpu s3 can reach f1 at most
234    val s2 = Valid(new FtqPtr)
235    val s3 = Valid(new FtqPtr)
236    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
237      src.valid && !isAfter(src.bits, idx_to_flush)
238    }
239    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
240    def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
241  }
242}
243
244trait HasBackendRedirectInfo extends HasXSParameter {
245  def numRedirect = exuParameters.JmpCnt + exuParameters.AluCnt + 1
246  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
247}
248
249class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
250  val pc_reads = Vec(1 + numRedirect + 1 + 1, Flipped(new FtqRead(UInt(VAddrBits.W))))
251  val target_read = Flipped(new FtqRead(UInt(VAddrBits.W)))
252  def getJumpPcRead = pc_reads.head
253  def getRedirectPcRead = VecInit(pc_reads.tail.dropRight(2))
254  def getMemPredPcRead = pc_reads.init.last
255  def getRobFlushPcRead = pc_reads.last
256}
257
258
259class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
260  val io = IO(new Bundle {
261    val start_addr = Input(UInt(VAddrBits.W))
262    val old_entry = Input(new FTBEntry)
263    val pd = Input(new Ftq_pd_Entry)
264    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
265    val target = Input(UInt(VAddrBits.W))
266    val hit = Input(Bool())
267    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
268
269    val new_entry = Output(new FTBEntry)
270    val new_br_insert_pos = Output(Vec(numBr, Bool()))
271    val taken_mask = Output(Vec(numBr, Bool()))
272    val mispred_mask = Output(Vec(numBr+1, Bool()))
273
274    // for perf counters
275    val is_init_entry = Output(Bool())
276    val is_old_entry = Output(Bool())
277    val is_new_br = Output(Bool())
278    val is_jalr_target_modified = Output(Bool())
279    val is_always_taken_modified = Output(Bool())
280    val is_br_full = Output(Bool())
281  })
282
283  // no mispredictions detected at predecode
284  val hit = io.hit
285  val pd = io.pd
286
287  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
288
289
290  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
291  val entry_has_jmp = pd.jmpInfo.valid
292  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
293  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
294  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
295  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
296  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
297  val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
298
299  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
300  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
301
302  def carryPos = log2Ceil(PredictWidth)+instOffsetBits+1
303  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
304  // if not hit, establish a new entry
305  init_entry.valid := true.B
306  // tag is left for ftb to assign
307
308  // case br
309  val init_br_slot = init_entry.getSlotForBr(0)
310  when (cfi_is_br) {
311    init_br_slot.valid := true.B
312    init_br_slot.offset := io.cfiIndex.bits
313    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && numBr == 1)
314    init_entry.always_taken(0) := true.B // set to always taken on init
315  }
316  // init_entry.isBrSharing := shareTailSlot.B && (numBr == 1).B && cfi_is_br
317
318  // case jmp
319  when (entry_has_jmp) {
320    init_entry.tailSlot.offset := pd.jmpOffset
321    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
322    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
323  }
324
325  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
326  init_entry.pftAddr := Mux(entry_has_jmp, jmpPft, getLower(io.start_addr) + ((FetchWidth*4)>>instOffsetBits).U + Mux(last_br_rvi, 1.U, 0.U))
327  init_entry.carry   := Mux(entry_has_jmp, jmpPft(carryPos-instOffsetBits), io.start_addr(carryPos-1) || (io.start_addr(carryPos-2, instOffsetBits).andR && last_br_rvi))
328  init_entry.isJalr := new_jmp_is_jalr
329  init_entry.isCall := new_jmp_is_call
330  init_entry.isRet  := new_jmp_is_ret
331  init_entry.last_is_rvc := Mux(entry_has_jmp, pd.rvcMask(pd.jmpOffset), pd.rvcMask.last)
332
333  init_entry.oversize := last_br_rvi || last_jmp_rvi
334
335  // if hit, check whether a new cfi(only br is possible) is detected
336  val oe = io.old_entry
337  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
338  val br_recorded = br_recorded_vec.asUInt.orR
339  val is_new_br = cfi_is_br && !br_recorded
340  val new_br_offset = io.cfiIndex.bits
341  // vec(i) means new br will be inserted BEFORE old br(i)
342  val allBrSlotsVec = oe.allSlotsForBr
343  val new_br_insert_onehot = VecInit((0 until numBr).map{
344    i => i match {
345      case 0 =>
346        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
347      case idx =>
348        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
349        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
350    }
351  })
352
353  val old_entry_modified = WireInit(io.old_entry)
354  for (i <- 0 until numBr) {
355    val slot = old_entry_modified.allSlotsForBr(i)
356    when (new_br_insert_onehot(i)) {
357      slot.valid := true.B
358      slot.offset := new_br_offset
359      slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && i == numBr-1)
360      old_entry_modified.always_taken(i) := true.B
361    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
362      old_entry_modified.always_taken(i) := false.B
363      // all other fields remain unchanged
364    }.otherwise {
365      // case i == 0, remain unchanged
366      if (i != 0) {
367        val noNeedToMoveFromFormerSlot = (shareTailSlot && i == numBr-1).B && !oe.brSlots.last.valid
368        when (!noNeedToMoveFromFormerSlot) {
369          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
370          old_entry_modified.always_taken(i) := oe.always_taken(i)
371        }
372      }
373    }
374  }
375
376  // two circumstances:
377  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
378  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
379  //        the previous last br or the new br
380  val may_have_to_replace = oe.noEmptySlotForNewBr
381  val pft_need_to_change = is_new_br && may_have_to_replace
382  // it should either be the given last br or the new br
383  when (pft_need_to_change) {
384    val new_pft_offset =
385      Mux(!new_br_insert_onehot.asUInt.orR,
386        new_br_offset, oe.allSlotsForBr.last.offset)
387
388    // set jmp to invalid
389    if (!shareTailSlot) {
390      old_entry_modified.tailSlot.valid := false.B
391    }
392    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
393    old_entry_modified.last_is_rvc := pd.rvcMask(new_pft_offset - 1.U) // TODO: fix this
394    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
395    old_entry_modified.oversize := false.B
396    old_entry_modified.isCall := false.B
397    old_entry_modified.isRet := false.B
398    old_entry_modified.isJalr := false.B
399  }
400
401  val old_entry_jmp_target_modified = WireInit(oe)
402  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
403  val old_tail_is_jmp = !oe.tailSlot.sharing || !shareTailSlot.B
404  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
405  when (jalr_target_modified) {
406    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
407    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
408  }
409
410  val old_entry_always_taken = WireInit(oe)
411  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
412  for (i <- 0 until numBr) {
413    old_entry_always_taken.always_taken(i) :=
414      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
415    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
416  }
417  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
418
419
420
421  val derived_from_old_entry =
422    Mux(is_new_br, old_entry_modified,
423      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
424
425
426  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
427
428  io.new_br_insert_pos := new_br_insert_onehot
429  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
430    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
431  })
432  for (i <- 0 until numBr) {
433    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
434  }
435  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
436
437  // for perf counters
438  io.is_init_entry := !hit
439  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
440  io.is_new_br := hit && is_new_br
441  io.is_jalr_target_modified := hit && jalr_target_modified
442  io.is_always_taken_modified := hit && always_taken_modified
443  io.is_br_full := hit && is_new_br && may_have_to_replace
444}
445
446class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
447  with HasBackendRedirectInfo with BPUUtils with HasBPUConst {
448  val io = IO(new Bundle {
449    val fromBpu = Flipped(new BpuToFtqIO)
450    val fromIfu = Flipped(new IfuToFtqIO)
451    val fromBackend = Flipped(new CtrlToFtqIO)
452
453    val toBpu = new FtqToBpuIO
454    val toIfu = new FtqToIfuIO
455    val toBackend = new FtqToCtrlIO
456
457    val bpuInfo = new Bundle {
458      val bpRight = Output(UInt(XLEN.W))
459      val bpWrong = Output(UInt(XLEN.W))
460    }
461  })
462  io.bpuInfo := DontCare
463
464  val robFlush = io.fromBackend.robFlush
465  val stage2Redirect = io.fromBackend.stage2Redirect
466  val stage3Redirect = io.fromBackend.stage3Redirect
467
468  val stage2Flush = stage2Redirect.valid || robFlush.valid
469  val backendFlush = stage2Flush || RegNext(stage2Flush)
470  val ifuFlush = Wire(Bool())
471
472  val flush = stage2Flush || RegNext(stage2Flush)
473
474  val allowBpuIn, allowToIfu = WireInit(false.B)
475  val flushToIfu = !allowToIfu
476  allowBpuIn := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid
477  allowToIfu := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid
478
479  val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
480  val validEntries = distanceBetween(bpuPtr, commPtr)
481
482  // **********************************************************************
483  // **************************** enq from bpu ****************************
484  // **********************************************************************
485  val new_entry_ready = validEntries < FtqSize.U
486  io.fromBpu.resp.ready := new_entry_ready
487
488  val bpu_s2_resp = io.fromBpu.resp.bits.s2
489  val bpu_s3_resp = io.fromBpu.resp.bits.s3
490  val bpu_s2_redirect = bpu_s2_resp.valid && bpu_s2_resp.hasRedirect
491  val bpu_s3_redirect = bpu_s3_resp.valid && bpu_s3_resp.hasRedirect
492
493  io.toBpu.enq_ptr := bpuPtr
494  val enq_fire = io.fromBpu.resp.fire() && allowBpuIn // from bpu s1
495  val bpu_in_fire = (io.fromBpu.resp.fire() || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
496
497  val bpu_in_resp = WireInit(io.fromBpu.resp.bits.selectedResp)
498  val bpu_in_stage = WireInit(io.fromBpu.resp.bits.selectedRespIdx)
499  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
500  val bpu_in_resp_idx = bpu_in_resp_ptr.value
501
502  // read ports:                            jumpPc + redirects + loadPred + robFlush + ifuReq1 + ifuReq2 + commitUpdate
503  val ftq_pc_mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, 1+numRedirect+2+1+1+1, 1))
504  // resp from uBTB
505  ftq_pc_mem.io.wen(0) := bpu_in_fire
506  ftq_pc_mem.io.waddr(0) := bpu_in_resp_idx
507  ftq_pc_mem.io.wdata(0).fromBranchPrediction(bpu_in_resp)
508
509  //                                                            ifuRedirect + backendRedirect + commit
510  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1))
511  // these info is intended to enq at the last stage of bpu
512  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
513  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
514  ftq_redirect_sram.io.wdata.fromBranchPrediction(io.fromBpu.resp.bits.lastStage)
515
516  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
517  // these info is intended to enq at the last stage of bpu
518  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
519  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
520  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.meta
521  //                                                            ifuRedirect + backendRedirect + commit
522  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1))
523  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid
524  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
525  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.lastStage.ftb_entry
526
527
528  // multi-write
529  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W)))
530  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
531  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
532  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
533
534  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
535  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
536    VecInit(Seq.fill(PredictWidth)(c_invalid))
537  }))
538
539  val f_to_send :: f_sent :: Nil = Enum(2)
540  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
541
542  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
543  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
544
545
546  when (bpu_in_fire) {
547    entry_fetch_status(bpu_in_resp_idx) := f_to_send
548    commitStateQueue(bpu_in_resp_idx) := VecInit(Seq.fill(PredictWidth)(c_invalid))
549    cfiIndex_vec(bpu_in_resp_idx) := bpu_in_resp.genCfiIndex
550    mispredict_vec(bpu_in_resp_idx) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
551    update_target(bpu_in_resp_idx) := bpu_in_resp.target
552    pred_stage(bpu_in_resp_idx) := bpu_in_stage
553  }
554
555  bpuPtr := bpuPtr + enq_fire
556  ifuPtr := ifuPtr + io.toIfu.req.fire
557
558  // only use ftb result to assign hit status
559  when (bpu_s2_resp.valid) {
560    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.preds.hit, h_hit, h_not_hit)
561  }
562
563
564  io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect
565  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
566  when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) {
567    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
568    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
569    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
570      ifuPtr := bpu_s2_resp.ftq_idx
571    }
572  }
573
574  io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect
575  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
576  when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) {
577    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
578    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
579    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
580      ifuPtr := bpu_s3_resp.ftq_idx
581    }
582    XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
583  }
584
585  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
586
587  // ****************************************************************
588  // **************************** to ifu ****************************
589  // ****************************************************************
590  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata(0), enable=bpu_in_fire)
591  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
592  val last_cycle_bpu_in = RegNext(bpu_in_fire)
593  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
594
595  // read pc and target
596  ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value
597  ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value
598
599  io.toIfu.req.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr
600  io.toIfu.req.bits.ftqIdx := ifuPtr
601  io.toIfu.req.bits.target := update_target(ifuPtr.value)
602  io.toIfu.req.bits.ftqOffset := cfiIndex_vec(ifuPtr.value)
603
604  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
605    io.toIfu.req.bits.fromFtqPcBundle(bpu_in_bypass_buf)
606  }.elsewhen (last_cycle_to_ifu_fire) {
607    io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last)
608  }.otherwise {
609    io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last)
610  }
611
612  // when fall through is smaller in value than start address, there must be a false hit
613  when (io.toIfu.req.bits.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
614    when (io.toIfu.req.fire &&
615      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
616      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
617    ) {
618      entry_hit_status(ifuPtr.value) := h_false_hit
619      XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr)
620    }
621    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr)
622  }
623
624  val ifu_req_should_be_flushed =
625    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
626    io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
627
628  when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
629    entry_fetch_status(ifuPtr.value) := f_sent
630  }
631
632
633  // *********************************************************************
634  // **************************** wb from ifu ****************************
635  // *********************************************************************
636  val pdWb = io.fromIfu.pdWb
637  val pds = pdWb.bits.pd
638  val ifu_wb_valid = pdWb.valid
639  val ifu_wb_idx = pdWb.bits.ftqIdx.value
640  // read ports:                                                         commit update
641  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
642  ftq_pd_mem.io.wen(0) := ifu_wb_valid
643  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
644  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
645
646  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
647  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
648  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
649  val pd_reg       = RegEnable(pds,             enable = pdWb.valid)
650  val start_pc_reg = RegEnable(pdWb.bits.pc(0), enable = pdWb.valid)
651  val wb_idx_reg   = RegEnable(ifu_wb_idx,      enable = pdWb.valid)
652
653  when (ifu_wb_valid) {
654    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
655      case (v, inRange) => v && inRange
656    })
657    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
658      case (qe, v) => when (v) { qe := c_valid }
659    }
660  }
661
662  ifuWbPtr := ifuWbPtr + ifu_wb_valid
663
664  ftb_entry_mem.io.raddr.head := ifu_wb_idx
665  val has_false_hit = WireInit(false.B)
666  when (RegNext(hit_pd_valid)) {
667    // check for false hit
668    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
669    val brSlots = pred_ftb_entry.brSlots
670    val tailSlot = pred_ftb_entry.tailSlot
671    // we check cfis that bpu predicted
672
673    // bpu predicted branches but denied by predecode
674    val br_false_hit =
675      brSlots.map{
676        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
677      }.reduce(_||_) ||
678      (shareTailSlot.B && tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
679        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
680
681    val jmpOffset = tailSlot.offset
682    val jmp_pd = pd_reg(jmpOffset)
683    val jal_false_hit = pred_ftb_entry.jmpValid &&
684      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
685       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
686       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
687       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
688      )
689
690    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
691    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
692
693    // assert(!has_false_hit)
694  }
695
696  when (has_false_hit) {
697    entry_hit_status(wb_idx_reg) := h_false_hit
698  }
699
700
701  // **********************************************************************
702  // **************************** backend read ****************************
703  // **********************************************************************
704
705  // pc reads
706  for ((req, i) <- io.toBackend.pc_reads.zipWithIndex) {
707    ftq_pc_mem.io.raddr(i) := req.ptr.value
708    req.data := ftq_pc_mem.io.rdata(i).getPc(RegNext(req.offset))
709  }
710  // target read
711  io.toBackend.target_read.data := RegNext(update_target(io.toBackend.target_read.ptr.value))
712
713  // *******************************************************************************
714  // **************************** redirect from backend ****************************
715  // *******************************************************************************
716
717  // redirect read cfiInfo, couples to redirectGen s2
718  ftq_redirect_sram.io.ren.init.last := io.fromBackend.stage2Redirect.valid
719  ftq_redirect_sram.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value
720
721  ftb_entry_mem.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value
722
723  val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last
724  val fromBackendRedirect = WireInit(io.fromBackend.stage3Redirect)
725  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
726  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
727
728  val r_ftb_entry = ftb_entry_mem.io.rdata.init.last
729  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
730
731  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
732    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
733      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
734      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
735
736    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
737        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
738  }.otherwise {
739    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
740    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
741  }
742
743
744  // ***************************************************************************
745  // **************************** redirect from ifu ****************************
746  // ***************************************************************************
747  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new Redirect)))
748  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
749  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
750  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
751  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
752
753  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
754  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
755  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
756  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
757  ifuRedirectCfiUpdate.target := pdWb.bits.target
758  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
759  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
760
761  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new Redirect)))
762  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
763  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
764
765  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
766  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
767
768  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
769
770  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
771  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
772  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet) {
773    toBpuCfi.target := toBpuCfi.rasEntry.retAddr
774  }
775
776  // *********************************************************************
777  // **************************** wb from exu ****************************
778  // *********************************************************************
779
780  def extractRedirectInfo(wb: Valid[Redirect]) = {
781    val ftqIdx = wb.bits.ftqIdx.value
782    val ftqOffset = wb.bits.ftqOffset
783    val taken = wb.bits.cfiUpdate.taken
784    val mispred = wb.bits.cfiUpdate.isMisPred
785    (wb.valid, ftqIdx, ftqOffset, taken, mispred)
786  }
787
788  // fix mispredict entry
789  val lastIsMispredict = RegNext(
790    stage2Redirect.valid && stage2Redirect.bits.level === RedirectLevel.flushAfter, init = false.B
791  )
792
793  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
794    val (r_valid, r_idx, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
795    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
796    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
797    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
798      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
799    }
800    when (cfiIndex_bits_wen) {
801      cfiIndex_vec(r_idx).bits := r_offset
802    }
803    update_target(r_idx) := redirect.bits.cfiUpdate.target
804    if (isBackend) {
805      mispredict_vec(r_idx)(r_offset) := r_mispred
806    }
807  }
808
809  when(stage3Redirect.valid && lastIsMispredict) {
810    updateCfiInfo(stage3Redirect)
811  }.elsewhen (ifuRedirectToBpu.valid) {
812    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
813  }
814
815  // ***********************************************************************************
816  // **************************** flush ptr and state queue ****************************
817  // ***********************************************************************************
818
819  val redirectVec = VecInit(robFlush, stage2Redirect, fromIfuRedirect)
820
821  // when redirect, we should reset ptrs and status queues
822  when(redirectVec.map(r => r.valid).reduce(_||_)){
823    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
824    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
825    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
826    val next = idx + 1.U
827    bpuPtr := next
828    ifuPtr := next
829    ifuWbPtr := next
830    when (notIfu) {
831      commitStateQueue(idx.value).zipWithIndex.foreach({ case (s, i) =>
832        when(i.U > offset || i.U === offset && flushItSelf){
833          s := c_invalid
834        }
835      })
836    }
837  }
838
839  // only the valid bit is actually needed
840  io.toIfu.redirect.bits    := Mux(robFlush.valid, robFlush.bits, stage2Redirect.bits)
841  io.toIfu.redirect.valid   := stage2Flush
842
843  // commit
844  for (c <- io.fromBackend.rob_commits) {
845    when(c.valid) {
846      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
847      // TODO: remove this
848      // For instruction fusions, we also update the next instruction
849      when (c.bits.commitType === 4.U) {
850        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
851      }.elsewhen(c.bits.commitType === 5.U) {
852        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
853      }.elsewhen(c.bits.commitType === 6.U) {
854        val index = (c.bits.ftqIdx + 1.U).value
855        commitStateQueue(index)(0) := c_commited
856      }.elsewhen(c.bits.commitType === 7.U) {
857        val index = (c.bits.ftqIdx + 1.U).value
858        commitStateQueue(index)(1) := c_commited
859      }
860    }
861  }
862
863  // ****************************************************************
864  // **************************** to bpu ****************************
865  // ****************************************************************
866
867  io.toBpu.redirect <> Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
868
869  val may_have_stall_from_bpu = RegInit(false.B)
870  val canCommit = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
871    Cat(commitStateQueue(commPtr.value).map(s => {
872      s === c_invalid || s === c_commited
873    })).andR()
874
875  // commit reads
876  ftq_pc_mem.io.raddr.last := commPtr.value
877  val commit_pc_bundle = ftq_pc_mem.io.rdata.last
878  ftq_pd_mem.io.raddr.last := commPtr.value
879  val commit_pd = ftq_pd_mem.io.rdata.last
880  ftq_redirect_sram.io.ren.last := canCommit
881  ftq_redirect_sram.io.raddr.last := commPtr.value
882  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
883  ftq_meta_1r_sram.io.ren(0) := canCommit
884  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
885  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
886  ftb_entry_mem.io.raddr.last := commPtr.value
887  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
888
889  // need one cycle to read mem and srams
890  val do_commit_ptr = RegNext(commPtr)
891  val do_commit = RegNext(canCommit, init=false.B)
892  when (canCommit) { commPtr := commPtr + 1.U }
893  val commit_state = RegNext(commitStateQueue(commPtr.value))
894  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
895  when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
896    can_commit_cfi.valid := false.B
897  }
898  val commit_cfi = RegNext(can_commit_cfi)
899
900  val commit_mispredict = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
901    case (mis, state) => mis && state === c_commited
902  })
903  val can_commit_hit = entry_hit_status(commPtr.value)
904  val commit_hit = RegNext(can_commit_hit)
905  val commit_target = RegNext(update_target(commPtr.value))
906  val commit_valid = commit_hit === h_hit || commit_cfi.valid // hit or taken
907
908  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
909  may_have_stall_from_bpu := can_commit_cfi.valid && !to_bpu_hit && !may_have_stall_from_bpu
910
911  io.toBpu.update := DontCare
912  io.toBpu.update.valid := commit_valid && do_commit
913  val update = io.toBpu.update.bits
914  update.false_hit   := commit_hit === h_false_hit
915  update.pc          := commit_pc_bundle.startAddr
916  update.preds.hit   := commit_hit === h_hit || commit_hit === h_false_hit
917  update.meta        := commit_meta.meta
918  update.full_target := commit_target
919  update.fromFtqRedirectSram(commit_spec_meta)
920
921  val commit_real_hit = commit_hit === h_hit
922  val update_ftb_entry = update.ftb_entry
923
924  val ftbEntryGen = Module(new FTBEntryGen).io
925  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
926  ftbEntryGen.old_entry      := commit_ftb_entry
927  ftbEntryGen.pd             := commit_pd
928  ftbEntryGen.cfiIndex       := commit_cfi
929  ftbEntryGen.target         := commit_target
930  ftbEntryGen.hit            := commit_real_hit
931  ftbEntryGen.mispredict_vec := commit_mispredict
932
933  update_ftb_entry         := ftbEntryGen.new_entry
934  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
935  update.mispred_mask      := ftbEntryGen.mispred_mask
936  update.old_entry         := ftbEntryGen.is_old_entry
937  update.preds.br_taken_mask  := ftbEntryGen.taken_mask
938
939  // ******************************************************************************
940  // **************************** commit perf counters ****************************
941  // ******************************************************************************
942
943  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
944  val commit_mispred_mask = commit_mispredict.asUInt
945  val commit_not_mispred_mask = ~commit_mispred_mask
946
947  val commit_br_mask = commit_pd.brMask.asUInt
948  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
949  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
950
951  val mbpInstrs = commit_inst_mask & commit_cfi_mask
952
953  val mbpRights = mbpInstrs & commit_not_mispred_mask
954  val mbpWrongs = mbpInstrs & commit_mispred_mask
955
956  io.bpuInfo.bpRight := PopCount(mbpRights)
957  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
958
959  // Cfi Info
960  for (i <- 0 until PredictWidth) {
961    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
962    val v = commit_state(i) === c_commited
963    val isBr = commit_pd.brMask(i)
964    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
965    val isCfi = isBr || isJmp
966    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
967    val misPred = commit_mispredict(i)
968    // val ghist = commit_spec_meta.ghist.predHist
969    val histPtr = commit_spec_meta.histPtr
970    val predCycle = commit_meta.meta(63, 0)
971    val target = commit_target
972
973    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
974    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
975    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
976    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
977    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
978    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
979    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
980  }
981
982  val enq = io.fromBpu.resp
983  val perf_redirect = io.fromBackend.stage2Redirect
984
985  XSPerfAccumulate("entry", validEntries)
986  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
987  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
988  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
989  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
990
991  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
992
993  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
994  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
995  XSPerfAccumulate("bpu_to_ftq_bubble", bpuPtr === ifuPtr)
996
997  val from_bpu = io.fromBpu.resp.bits
998  def in_entry_len_map_gen(resp: BranchPredictionBundle)(stage: String) = {
999    val entry_len = (resp.ftb_entry.getFallThrough(resp.pc) - resp.pc) >> instOffsetBits
1000    val entry_len_recording_vec = (1 to PredictWidth+1).map(i => entry_len === i.U)
1001    val entry_len_map = (1 to PredictWidth+1).map(i =>
1002      f"${stage}_ftb_entry_len_$i" -> (entry_len_recording_vec(i-1) && resp.valid)
1003    ).foldLeft(Map[String, UInt]())(_+_)
1004    entry_len_map
1005  }
1006  val s1_entry_len_map = in_entry_len_map_gen(from_bpu.s1)("s1")
1007  val s2_entry_len_map = in_entry_len_map_gen(from_bpu.s2)("s2")
1008  val s3_entry_len_map = in_entry_len_map_gen(from_bpu.s3)("s3")
1009
1010  val to_ifu = io.toIfu.req.bits
1011  val to_ifu_entry_len = (to_ifu.fallThruAddr - to_ifu.startAddr) >> instOffsetBits
1012  val to_ifu_entry_len_recording_vec = (1 to PredictWidth+1).map(i => to_ifu_entry_len === i.U)
1013  val to_ifu_entry_len_map = (1 to PredictWidth+1).map(i =>
1014    f"to_ifu_ftb_entry_len_$i" -> (to_ifu_entry_len_recording_vec(i-1) && io.toIfu.req.fire)
1015  ).foldLeft(Map[String, UInt]())(_+_)
1016
1017
1018
1019  val commit_num_inst_recording_vec = (1 to PredictWidth).map(i => PopCount(commit_inst_mask) === i.U)
1020  val commit_num_inst_map = (1 to PredictWidth).map(i =>
1021    f"commit_num_inst_$i" -> (commit_num_inst_recording_vec(i-1) && do_commit)
1022  ).foldLeft(Map[String, UInt]())(_+_)
1023
1024
1025
1026  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1027  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1028  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1029  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1030
1031
1032  val mbpBRights = mbpRights & commit_br_mask
1033  val mbpJRights = mbpRights & commit_jal_mask
1034  val mbpIRights = mbpRights & commit_jalr_mask
1035  val mbpCRights = mbpRights & commit_call_mask
1036  val mbpRRights = mbpRights & commit_ret_mask
1037
1038  val mbpBWrongs = mbpWrongs & commit_br_mask
1039  val mbpJWrongs = mbpWrongs & commit_jal_mask
1040  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1041  val mbpCWrongs = mbpWrongs & commit_call_mask
1042  val mbpRWrongs = mbpWrongs & commit_ret_mask
1043
1044  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1045
1046  def pred_stage_map(src: UInt, name: String) = {
1047    (0 until numBpStages).map(i =>
1048      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1049    ).foldLeft(Map[String, UInt]())(_+_)
1050  }
1051
1052  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1053  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1054  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1055  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1056  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1057  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1058
1059  val update_valid = io.toBpu.update.valid
1060  def u(cond: Bool) = update_valid && cond
1061  val ftb_false_hit = u(update.false_hit)
1062  // assert(!ftb_false_hit)
1063  val ftb_hit = u(commit_hit === h_hit)
1064
1065  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1066  val ftb_new_entry_only_br = ftb_new_entry && !update.ftb_entry.jmpValid
1067  val ftb_new_entry_only_jmp = ftb_new_entry && !update.ftb_entry.brValids(0)
1068  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update.ftb_entry.brValids(0) && update.ftb_entry.jmpValid
1069
1070  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1071
1072  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1073  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1074  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1075  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1076  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1077
1078  val ftb_entry_len = (ftbEntryGen.new_entry.getFallThrough(update.pc) - update.pc) >> instOffsetBits
1079  val ftb_entry_len_recording_vec = (1 to PredictWidth+1).map(i => ftb_entry_len === i.U)
1080  val ftb_init_entry_len_map = (1 to PredictWidth+1).map(i =>
1081    f"ftb_init_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_new_entry)
1082  ).foldLeft(Map[String, UInt]())(_+_)
1083  val ftb_modified_entry_len_map = (1 to PredictWidth+1).map(i =>
1084    f"ftb_modified_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_modified_entry)
1085  ).foldLeft(Map[String, UInt]())(_+_)
1086
1087  val ftq_occupancy_map = (0 to FtqSize).map(i =>
1088    f"ftq_has_entry_$i" ->( validEntries === i.U)
1089  ).foldLeft(Map[String, UInt]())(_+_)
1090
1091  val perfCountsMap = Map(
1092    "BpInstr" -> PopCount(mbpInstrs),
1093    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1094    "BpRight"  -> PopCount(mbpRights),
1095    "BpWrong"  -> PopCount(mbpWrongs),
1096    "BpBRight" -> PopCount(mbpBRights),
1097    "BpBWrong" -> PopCount(mbpBWrongs),
1098    "BpJRight" -> PopCount(mbpJRights),
1099    "BpJWrong" -> PopCount(mbpJWrongs),
1100    "BpIRight" -> PopCount(mbpIRights),
1101    "BpIWrong" -> PopCount(mbpIWrongs),
1102    "BpCRight" -> PopCount(mbpCRights),
1103    "BpCWrong" -> PopCount(mbpCWrongs),
1104    "BpRRight" -> PopCount(mbpRRights),
1105    "BpRWrong" -> PopCount(mbpRWrongs),
1106
1107    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1108    "ftb_hit"                      -> PopCount(ftb_hit),
1109    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1110    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1111    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1112    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1113    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1114    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1115    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1116    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1117    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1118    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1119  ) ++ ftb_init_entry_len_map ++ ftb_modified_entry_len_map ++ s1_entry_len_map ++
1120  s2_entry_len_map ++ s3_entry_len_map ++
1121  to_ifu_entry_len_map ++ commit_num_inst_map ++ ftq_occupancy_map ++
1122  mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1123  correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1124
1125  for((key, value) <- perfCountsMap) {
1126    XSPerfAccumulate(key, value)
1127  }
1128
1129  // --------------------------- Debug --------------------------------
1130  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1131  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1132  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1133  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1134  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1135    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1136  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1137
1138  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1139  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1140  //       case (((valid, pd), ans), taken) =>
1141  //       Mux(valid && pd.isBr,
1142  //         isWrong ^ Mux(ans.hit.asBool,
1143  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1144  //           !taken),
1145  //         !taken),
1146  //       false.B)
1147  //     }
1148  //   }
1149
1150  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1151  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1152  //       case (((valid, pd), ans), taken) =>
1153  //       Mux(valid && pd.isBr,
1154  //         isWrong ^ Mux(ans.hit.asBool,
1155  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1156  //           !taken),
1157  //         !taken),
1158  //       false.B)
1159  //     }
1160  //   }
1161
1162  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1163  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1164  //       case (((valid, pd), ans), taken) =>
1165  //       Mux(valid && pd.isBr,
1166  //         isWrong ^ (ans.taken.asBool === taken),
1167  //       false.B)
1168  //     }
1169  //   }
1170
1171  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1172  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1173  //       case (((valid, pd), ans), taken) =>
1174  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1175  //         isWrong ^ (!taken),
1176  //           false.B)
1177  //     }
1178  //   }
1179
1180  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1181  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1182  //       case (((valid, pd), ans), taken) =>
1183  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1184  //         isWrong ^ (ans.target === commitEntry.target),
1185  //           false.B)
1186  //     }
1187  //   }
1188
1189  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1190  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1191  //   // btb and ubtb pred jal and jalr as well
1192  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1193  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1194  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1195  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1196
1197  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1198  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1199
1200  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1201  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1202  val perfinfo = IO(new Bundle(){
1203    val perfEvents = Output(new PerfEventsBundle(22))
1204  })
1205  val perfEvents = Seq(
1206    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1207    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1208    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1209    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1210    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1211    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1212    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1213    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1214    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1215    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1216    ("BpRight                ", PopCount(mbpRights)                                                         ),
1217    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1218    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1219    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1220    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1221    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1222    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1223    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1224    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1225    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1226    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1227    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1228    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1229    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1230  )
1231
1232  for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
1233    perf_out.incr_step := RegNext(perf)
1234  }
1235}
1236