xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision a273862e37f1d43bee748f2a6353320a2f52f6f4)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils.{AsyncDataModuleTemplate, CircularQueuePtr, DataModuleTemplate, HasCircularQueuePtrHelper, SRAMTemplate, SyncDataModuleTemplate, XSDebug, XSPerfAccumulate, PerfBundle, PerfEventsBundle, XSError}
23import xiangshan._
24import scala.tools.nsc.doc.model.Val
25import utils.{ParallelPriorityMux, ParallelPriorityEncoder}
26import xiangshan.backend.{CtrlToFtqIO}
27import firrtl.annotations.MemoryLoadFileType
28
29class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr](
30  p => p(XSCoreParamsKey).FtqSize
31){
32  override def cloneType = (new FtqPtr).asInstanceOf[this.type]
33}
34
35object FtqPtr {
36  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
37    val ptr = Wire(new FtqPtr)
38    ptr.flag := f
39    ptr.value := v
40    ptr
41  }
42  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
43    apply(!ptr.flag, ptr.value)
44  }
45}
46
47class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
48
49  val io = IO(new Bundle() {
50    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
51    val ren = Input(Vec(numRead, Bool()))
52    val rdata = Output(Vec(numRead, gen))
53    val waddr = Input(UInt(log2Up(FtqSize).W))
54    val wen = Input(Bool())
55    val wdata = Input(gen)
56  })
57
58  for(i <- 0 until numRead){
59    val sram = Module(new SRAMTemplate(gen, FtqSize))
60    sram.io.r.req.valid := io.ren(i)
61    sram.io.r.req.bits.setIdx := io.raddr(i)
62    io.rdata(i) := sram.io.r.resp.data(0)
63    sram.io.w.req.valid := io.wen
64    sram.io.w.req.bits.setIdx := io.waddr
65    sram.io.w.req.bits.data := VecInit(io.wdata)
66  }
67
68}
69
70class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
71  // TODO: move pftAddr, oversize, carry to another mem
72  val startAddr = UInt(VAddrBits.W)
73  val nextRangeAddr = UInt(VAddrBits.W)
74  val pftAddr = UInt((log2Ceil(PredictWidth)+1).W)
75  val isNextMask = Vec(PredictWidth, Bool())
76  val oversize = Bool()
77  val carry = Bool()
78  def getPc(offset: UInt) = {
79    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits)
80    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits-1, instOffsetBits)
81    Cat(getHigher(Mux(isNextMask(offset), nextRangeAddr, startAddr)),
82        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
83  }
84  def getFallThrough() = {
85    getFallThroughAddr(this.startAddr, this.carry, this.pftAddr)
86  }
87  def fallThroughError() = {
88    !carry && startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits) > pftAddr
89  }
90  def fromBranchPrediction(resp: BranchPredictionBundle) = {
91    this.startAddr := resp.pc
92    this.nextRangeAddr := resp.pc + (FetchWidth * 4).U
93    this.pftAddr := resp.ftb_entry.pftAddr
94    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
95      (resp.pc(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool()
96    ))
97    this.oversize := resp.ftb_entry.oversize
98    this.carry := resp.ftb_entry.carry
99    this
100  }
101  override def toPrintable: Printable = {
102    p"startAddr:${Hexadecimal(startAddr)}, fallThru:${Hexadecimal(getFallThrough())}"
103  }
104}
105
106class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
107  val brMask = Vec(PredictWidth, Bool())
108  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
109  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
110  val jalTarget = UInt(VAddrBits.W)
111  val rvcMask = Vec(PredictWidth, Bool())
112  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
113  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
114  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
115  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
116
117  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
118    val pds = pdWb.pd
119    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
120    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
121    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
122                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
123    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
124    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
125    this.jalTarget := pdWb.jalTarget
126  }
127
128  def toPd(offset: UInt) = {
129    require(offset.getWidth == log2Ceil(PredictWidth))
130    val pd = Wire(new PreDecodeInfo)
131    pd.valid := true.B
132    pd.isRVC := rvcMask(offset)
133    val isBr = brMask(offset)
134    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
135    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
136    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
137    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
138    pd
139  }
140}
141
142
143
144class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
145  val rasSp = UInt(log2Ceil(RasSize).W)
146  val rasEntry = new RASEntry
147  val specCnt = Vec(numBr, UInt(10.W))
148  val ghist = new GlobalHistory
149  val phist = UInt(PathHistoryLength.W)
150  val phNewBit = UInt(1.W)
151
152  def fromBranchPrediction(resp: BranchPredictionBundle) = {
153    this.rasSp := resp.rasSp
154    this.rasEntry := resp.rasTop
155    this.specCnt := resp.specCnt
156    this.ghist := resp.ghist
157    this.phist := resp.phist
158    this.phNewBit := resp.pc(instOffsetBits)
159    this
160  }
161}
162
163class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
164  val meta = UInt(MaxMetaLength.W)
165}
166
167class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
168  val target = UInt(VAddrBits.W)
169  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
170}
171
172class FtqEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
173  val startAddr = UInt(VAddrBits.W)
174  val fallThruAddr = UInt(VAddrBits.W)
175  val isNextMask = Vec(PredictWidth, Bool())
176
177  val meta = UInt(MaxMetaLength.W)
178
179  val rasSp = UInt(log2Ceil(RasSize).W)
180  val rasEntry = new RASEntry
181  val hist = new GlobalHistory
182  val specCnt = Vec(numBr, UInt(10.W))
183
184  val valids = Vec(PredictWidth, Bool())
185  val brMask = Vec(PredictWidth, Bool())
186  // isJalr, isCall, isRet
187  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
188  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
189
190  val mispredVec = Vec(PredictWidth, Bool())
191  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
192  val target = UInt(VAddrBits.W)
193}
194
195class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
196  val ptr = Output(new FtqPtr)
197  val offset = Output(UInt(log2Ceil(PredictWidth).W))
198  val data = Input(gen)
199  def apply(ptr: FtqPtr, offset: UInt) = {
200    this.ptr := ptr
201    this.offset := offset
202    this.data
203  }
204  override def cloneType = (new FtqRead(gen)).asInstanceOf[this.type]
205}
206
207
208class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
209  val redirect = Valid(new BranchPredictionRedirect)
210  val update = Valid(new BranchPredictionUpdate)
211  val enq_ptr = Output(new FtqPtr)
212}
213
214class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
215  val req = Decoupled(new FetchRequestBundle)
216  val redirect = Valid(new Redirect)
217  val flushFromBpu = new Bundle {
218    // when ifu pipeline is not stalled,
219    // a packet from bpu s3 can reach f1 at most
220    val s2 = Valid(new FtqPtr)
221    val s3 = Valid(new FtqPtr)
222    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
223      src.valid && !isAfter(src.bits, idx_to_flush)
224    }
225    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
226    def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
227  }
228}
229
230trait HasBackendRedirectInfo extends HasXSParameter {
231  def numRedirect = exuParameters.JmpCnt + exuParameters.AluCnt + 1
232  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
233}
234
235class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
236  val pc_reads = Vec(1 + numRedirect + 1 + 1, Flipped(new FtqRead(UInt(VAddrBits.W))))
237  val target_read = Flipped(new FtqRead(UInt(VAddrBits.W)))
238  def getJumpPcRead = pc_reads.head
239  def getRedirectPcRead = VecInit(pc_reads.tail.dropRight(2))
240  def getMemPredPcRead = pc_reads.init.last
241  def getRobFlushPcRead = pc_reads.last
242}
243
244
245class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
246  val io = IO(new Bundle {
247    val start_addr = Input(UInt(VAddrBits.W))
248    val old_entry = Input(new FTBEntry)
249    val pd = Input(new Ftq_pd_Entry)
250    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
251    val target = Input(UInt(VAddrBits.W))
252    val hit = Input(Bool())
253    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
254
255    val new_entry = Output(new FTBEntry)
256    val new_br_insert_pos = Output(Vec(numBr, Bool()))
257    val taken_mask = Output(Vec(numBr, Bool()))
258    val mispred_mask = Output(Vec(numBr+1, Bool()))
259
260    // for perf counters
261    val is_init_entry = Output(Bool())
262    val is_old_entry = Output(Bool())
263    val is_new_br = Output(Bool())
264    val is_jalr_target_modified = Output(Bool())
265    val is_always_taken_modified = Output(Bool())
266    val is_br_full = Output(Bool())
267  })
268
269  // no mispredictions detected at predecode
270  val hit = io.hit
271  val pd = io.pd
272
273  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
274
275
276  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
277  val entry_has_jmp = pd.jmpInfo.valid
278  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
279  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
280  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
281  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
282  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
283  val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
284
285  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
286  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
287
288  def carryPos = log2Ceil(PredictWidth)+instOffsetBits+1
289  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
290  // if not hit, establish a new entry
291  init_entry.valid := true.B
292  // tag is left for ftb to assign
293
294  // case br
295  val init_br_slot = init_entry.getSlotForBr(0)
296  when (cfi_is_br) {
297    init_br_slot.valid := true.B
298    init_br_slot.offset := io.cfiIndex.bits
299    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && numBr == 1)
300    init_entry.always_taken(0) := true.B // set to always taken on init
301  }
302  // init_entry.isBrSharing := shareTailSlot.B && (numBr == 1).B && cfi_is_br
303
304  // case jmp
305  when (entry_has_jmp) {
306    init_entry.tailSlot.offset := pd.jmpOffset
307    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
308    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
309  }
310
311  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
312  init_entry.pftAddr := Mux(entry_has_jmp, jmpPft, getLower(io.start_addr) + ((FetchWidth*4)>>instOffsetBits).U + Mux(last_br_rvi, 1.U, 0.U))
313  init_entry.carry   := Mux(entry_has_jmp, jmpPft(carryPos-instOffsetBits), io.start_addr(carryPos-1) || (io.start_addr(carryPos-2, instOffsetBits).andR && last_br_rvi))
314  init_entry.isJalr := new_jmp_is_jalr
315  init_entry.isCall := new_jmp_is_call
316  init_entry.isRet  := new_jmp_is_ret
317  init_entry.last_is_rvc := Mux(entry_has_jmp, pd.rvcMask(pd.jmpOffset), pd.rvcMask.last)
318
319  init_entry.oversize := last_br_rvi || last_jmp_rvi
320
321  // if hit, check whether a new cfi(only br is possible) is detected
322  val oe = io.old_entry
323  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
324  val br_recorded = br_recorded_vec.asUInt.orR
325  val is_new_br = cfi_is_br && !br_recorded
326  val new_br_offset = io.cfiIndex.bits
327  // vec(i) means new br will be inserted BEFORE old br(i)
328  val allBrSlotsVec = oe.allSlotsForBr
329  val new_br_insert_onehot = VecInit((0 until numBr).map{
330    i => i match {
331      case 0 =>
332        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
333      case idx =>
334        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
335        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
336    }
337  })
338
339  val old_entry_modified = WireInit(io.old_entry)
340  for (i <- 0 until numBr) {
341    val slot = old_entry_modified.allSlotsForBr(i)
342    when (new_br_insert_onehot(i)) {
343      slot.valid := true.B
344      slot.offset := new_br_offset
345      slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && i == numBr-1)
346      old_entry_modified.always_taken(i) := true.B
347    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
348      old_entry_modified.always_taken(i) := false.B
349      // all other fields remain unchanged
350    }.otherwise {
351      // case i == 0, remain unchanged
352      if (i != 0) {
353        val noNeedToMoveFromFormerSlot = (shareTailSlot && i == numBr-1).B && !oe.brSlots.last.valid
354        when (!noNeedToMoveFromFormerSlot) {
355          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
356          old_entry_modified.always_taken(i) := oe.always_taken(i)
357        }
358      }
359    }
360  }
361
362  // two circumstances:
363  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
364  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
365  //        the previous last br or the new br
366  val may_have_to_replace = oe.noEmptySlotForNewBr
367  val pft_need_to_change = is_new_br && may_have_to_replace
368  // it should either be the given last br or the new br
369  when (pft_need_to_change) {
370    val new_pft_offset =
371      Mux(!new_br_insert_onehot.asUInt.orR,
372        new_br_offset, oe.allSlotsForBr.last.offset)
373
374    // set jmp to invalid
375    if (!shareTailSlot) {
376      old_entry_modified.tailSlot.valid := false.B
377    }
378    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
379    old_entry_modified.last_is_rvc := pd.rvcMask(new_pft_offset - 1.U) // TODO: fix this
380    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
381    old_entry_modified.oversize := false.B
382    old_entry_modified.isCall := false.B
383    old_entry_modified.isRet := false.B
384    old_entry_modified.isJalr := false.B
385  }
386
387  val old_entry_jmp_target_modified = WireInit(oe)
388  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
389  val old_tail_is_jmp = !oe.tailSlot.sharing || !shareTailSlot.B
390  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
391  when (jalr_target_modified) {
392    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
393    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
394  }
395
396  val old_entry_always_taken = WireInit(oe)
397  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
398  for (i <- 0 until numBr) {
399    old_entry_always_taken.always_taken(i) :=
400      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
401    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
402  }
403  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
404
405
406
407  val derived_from_old_entry =
408    Mux(is_new_br, old_entry_modified,
409      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
410
411
412  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
413
414  io.new_br_insert_pos := new_br_insert_onehot
415  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
416    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
417  })
418  for (i <- 0 until numBr) {
419    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
420  }
421  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
422
423  // for perf counters
424  io.is_init_entry := !hit
425  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
426  io.is_new_br := hit && is_new_br
427  io.is_jalr_target_modified := hit && jalr_target_modified
428  io.is_always_taken_modified := hit && always_taken_modified
429  io.is_br_full := hit && is_new_br && may_have_to_replace
430}
431
432class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
433  with HasBackendRedirectInfo with BPUUtils with HasBPUConst {
434  val io = IO(new Bundle {
435    val fromBpu = Flipped(new BpuToFtqIO)
436    val fromIfu = Flipped(new IfuToFtqIO)
437    val fromBackend = Flipped(new CtrlToFtqIO)
438
439    val toBpu = new FtqToBpuIO
440    val toIfu = new FtqToIfuIO
441    val toBackend = new FtqToCtrlIO
442
443    val bpuInfo = new Bundle {
444      val bpRight = Output(UInt(XLEN.W))
445      val bpWrong = Output(UInt(XLEN.W))
446    }
447  })
448  io.bpuInfo := DontCare
449
450  val robFlush = io.fromBackend.robFlush
451  val stage2Redirect = io.fromBackend.stage2Redirect
452  val stage3Redirect = io.fromBackend.stage3Redirect
453
454  val stage2Flush = stage2Redirect.valid || robFlush.valid
455  val backendFlush = stage2Flush || RegNext(stage2Flush)
456  val ifuFlush = Wire(Bool())
457
458  val flush = stage2Flush || RegNext(stage2Flush)
459
460  val allowBpuIn, allowToIfu = WireInit(false.B)
461  val flushToIfu = !allowToIfu
462  allowBpuIn := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid
463  allowToIfu := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid
464
465  val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
466  val validEntries = distanceBetween(bpuPtr, commPtr)
467
468  // **********************************************************************
469  // **************************** enq from bpu ****************************
470  // **********************************************************************
471  val new_entry_ready = validEntries < FtqSize.U
472  io.fromBpu.resp.ready := new_entry_ready
473
474  val bpu_s2_resp = io.fromBpu.resp.bits.s2
475  val bpu_s3_resp = io.fromBpu.resp.bits.s3
476  val bpu_s2_redirect = bpu_s2_resp.valid && bpu_s2_resp.hasRedirect
477  val bpu_s3_redirect = bpu_s3_resp.valid && bpu_s3_resp.hasRedirect
478
479  io.toBpu.enq_ptr := bpuPtr
480  val enq_fire = io.fromBpu.resp.fire() && allowBpuIn // from bpu s1
481  val bpu_in_fire = (io.fromBpu.resp.fire() || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
482
483  val bpu_in_resp = WireInit(io.fromBpu.resp.bits.selectedResp)
484  val bpu_in_stage = WireInit(io.fromBpu.resp.bits.selectedRespIdx)
485  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
486  val bpu_in_resp_idx = bpu_in_resp_ptr.value
487
488  // read ports:                            jumpPc + redirects + loadPred + robFlush + ifuReq1 + ifuReq2 + commitUpdate
489  val ftq_pc_mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, 1+numRedirect+2+1+1+1, 1))
490  // resp from uBTB
491  ftq_pc_mem.io.wen(0) := bpu_in_fire
492  ftq_pc_mem.io.waddr(0) := bpu_in_resp_idx
493  ftq_pc_mem.io.wdata(0).fromBranchPrediction(bpu_in_resp)
494
495  //                                                            ifuRedirect + backendRedirect + commit
496  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1))
497  // these info is intended to enq at the last stage of bpu
498  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
499  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
500  ftq_redirect_sram.io.wdata.fromBranchPrediction(io.fromBpu.resp.bits.lastStage)
501
502  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
503  // these info is intended to enq at the last stage of bpu
504  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
505  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
506  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.meta
507  //                                                            ifuRedirect + backendRedirect + commit
508  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1))
509  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid
510  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
511  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.lastStage.ftb_entry
512
513
514  // multi-write
515  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W)))
516  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
517  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
518  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
519
520  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
521  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
522    VecInit(Seq.fill(PredictWidth)(c_invalid))
523  }))
524
525  val f_to_send :: f_sent :: Nil = Enum(2)
526  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
527
528  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
529  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
530
531
532  when (bpu_in_fire) {
533    entry_fetch_status(bpu_in_resp_idx) := f_to_send
534    commitStateQueue(bpu_in_resp_idx) := VecInit(Seq.fill(PredictWidth)(c_invalid))
535    cfiIndex_vec(bpu_in_resp_idx) := bpu_in_resp.genCfiIndex
536    mispredict_vec(bpu_in_resp_idx) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
537    update_target(bpu_in_resp_idx) := bpu_in_resp.target
538    pred_stage(bpu_in_resp_idx) := bpu_in_stage
539  }
540
541  bpuPtr := bpuPtr + enq_fire
542  ifuPtr := ifuPtr + io.toIfu.req.fire
543
544  // only use ftb result to assign hit status
545  when (bpu_s2_resp.valid) {
546    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.preds.hit, h_hit, h_not_hit)
547  }
548
549
550  io.toIfu.flushFromBpu.s2.valid := bpu_s2_resp.valid && bpu_s2_resp.hasRedirect
551  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
552  when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) {
553    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
554    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
555    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
556      ifuPtr := bpu_s2_resp.ftq_idx
557    }
558  }
559
560  io.toIfu.flushFromBpu.s3.valid := bpu_s3_resp.valid && bpu_s3_resp.hasRedirect
561  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
562  when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) {
563    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
564    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
565    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
566      ifuPtr := bpu_s3_resp.ftq_idx
567    }
568    XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
569  }
570
571  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
572
573  // ****************************************************************
574  // **************************** to ifu ****************************
575  // ****************************************************************
576  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata(0), enable=bpu_in_fire)
577  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
578  val last_cycle_bpu_in = RegNext(bpu_in_fire)
579  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
580
581  // read pc and target
582  ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value
583  ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value
584
585  val toIfuReq = Wire(chiselTypeOf(io.toIfu.req))
586
587  toIfuReq.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr
588  toIfuReq.bits.ftqIdx := ifuPtr
589  toIfuReq.bits.target := update_target(ifuPtr.value)
590  toIfuReq.bits.ftqOffset := cfiIndex_vec(ifuPtr.value)
591  toIfuReq.bits.fallThruError  := false.B
592
593  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
594    toIfuReq.bits.fromFtqPcBundle(bpu_in_bypass_buf)
595  }.elsewhen (last_cycle_to_ifu_fire) {
596    toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last)
597  }.otherwise {
598    toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last)
599  }
600
601  io.toIfu.req <> toIfuReq
602
603  // when fall through is smaller in value than start address, there must be a false hit
604  when (toIfuReq.bits.fallThroughError() && entry_hit_status(ifuPtr.value) === h_hit) {
605    when (io.toIfu.req.fire &&
606      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
607      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
608    ) {
609      entry_hit_status(ifuPtr.value) := h_false_hit
610      XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr)
611    }
612    io.toIfu.req.bits.fallThruAddr   := toIfuReq.bits.startAddr + (FetchWidth*4).U
613    io.toIfu.req.bits.fallThruError  := true.B
614    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr)
615  }
616
617  val ifu_req_should_be_flushed =
618    io.toIfu.flushFromBpu.shouldFlushByStage2(toIfuReq.bits.ftqIdx) ||
619    io.toIfu.flushFromBpu.shouldFlushByStage3(toIfuReq.bits.ftqIdx)
620
621  when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
622    entry_fetch_status(ifuPtr.value) := f_sent
623  }
624
625
626  // *********************************************************************
627  // **************************** wb from ifu ****************************
628  // *********************************************************************
629  val pdWb = io.fromIfu.pdWb
630  val pds = pdWb.bits.pd
631  val ifu_wb_valid = pdWb.valid
632  val ifu_wb_idx = pdWb.bits.ftqIdx.value
633  // read ports:                                                         commit update
634  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
635  ftq_pd_mem.io.wen(0) := ifu_wb_valid
636  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
637  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
638
639  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
640  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
641  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
642  val pd_reg       = RegEnable(pds,             enable = pdWb.valid)
643  val start_pc_reg = RegEnable(pdWb.bits.pc(0), enable = pdWb.valid)
644  val wb_idx_reg   = RegEnable(ifu_wb_idx,      enable = pdWb.valid)
645
646  when (ifu_wb_valid) {
647    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
648      case (v, inRange) => v && inRange
649    })
650    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
651      case (qe, v) => when (v) { qe := c_valid }
652    }
653  }
654
655  ifuWbPtr := ifuWbPtr + ifu_wb_valid
656
657  ftb_entry_mem.io.raddr.head := ifu_wb_idx
658  val has_false_hit = WireInit(false.B)
659  when (RegNext(hit_pd_valid)) {
660    // check for false hit
661    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
662    val brSlots = pred_ftb_entry.brSlots
663    val tailSlot = pred_ftb_entry.tailSlot
664    // we check cfis that bpu predicted
665
666    // bpu predicted branches but denied by predecode
667    val br_false_hit =
668      brSlots.map{
669        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
670      }.reduce(_||_) ||
671      (shareTailSlot.B && tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
672        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
673
674    val jmpOffset = tailSlot.offset
675    val jmp_pd = pd_reg(jmpOffset)
676    val jal_false_hit = pred_ftb_entry.jmpValid &&
677      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
678       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
679       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
680       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
681      )
682
683    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
684    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
685
686    // assert(!has_false_hit)
687  }
688
689  when (has_false_hit) {
690    entry_hit_status(wb_idx_reg) := h_false_hit
691  }
692
693
694  // **********************************************************************
695  // **************************** backend read ****************************
696  // **********************************************************************
697
698  // pc reads
699  for ((req, i) <- io.toBackend.pc_reads.zipWithIndex) {
700    ftq_pc_mem.io.raddr(i) := req.ptr.value
701    req.data := ftq_pc_mem.io.rdata(i).getPc(RegNext(req.offset))
702  }
703  // target read
704  io.toBackend.target_read.data := RegNext(update_target(io.toBackend.target_read.ptr.value))
705
706  // *******************************************************************************
707  // **************************** redirect from backend ****************************
708  // *******************************************************************************
709
710  // redirect read cfiInfo, couples to redirectGen s2
711  ftq_redirect_sram.io.ren.init.last := io.fromBackend.stage2Redirect.valid
712  ftq_redirect_sram.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value
713
714  ftb_entry_mem.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value
715
716  val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last
717  val fromBackendRedirect = WireInit(io.fromBackend.stage3Redirect)
718  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
719  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
720
721  val r_ftb_entry = ftb_entry_mem.io.rdata.init.last
722  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
723
724  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
725    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
726      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
727      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
728
729    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
730        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
731  }.otherwise {
732    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
733    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
734  }
735
736
737  // ***************************************************************************
738  // **************************** redirect from ifu ****************************
739  // ***************************************************************************
740  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new Redirect)))
741  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
742  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
743  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
744  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
745
746  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
747  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
748  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
749  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
750  ifuRedirectCfiUpdate.target := pdWb.bits.target
751  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
752  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
753
754  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new Redirect)))
755  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
756  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
757
758  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
759  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
760
761  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
762
763  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
764  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
765  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet) {
766    toBpuCfi.target := toBpuCfi.rasEntry.retAddr
767  }
768
769  // *********************************************************************
770  // **************************** wb from exu ****************************
771  // *********************************************************************
772
773  def extractRedirectInfo(wb: Valid[Redirect]) = {
774    val ftqIdx = wb.bits.ftqIdx.value
775    val ftqOffset = wb.bits.ftqOffset
776    val taken = wb.bits.cfiUpdate.taken
777    val mispred = wb.bits.cfiUpdate.isMisPred
778    (wb.valid, ftqIdx, ftqOffset, taken, mispred)
779  }
780
781  // fix mispredict entry
782  val lastIsMispredict = RegNext(
783    stage2Redirect.valid && stage2Redirect.bits.level === RedirectLevel.flushAfter, init = false.B
784  )
785
786  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
787    val (r_valid, r_idx, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
788    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
789    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
790    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
791      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
792    }
793    when (cfiIndex_bits_wen) {
794      cfiIndex_vec(r_idx).bits := r_offset
795    }
796    update_target(r_idx) := redirect.bits.cfiUpdate.target
797    if (isBackend) {
798      mispredict_vec(r_idx)(r_offset) := r_mispred
799    }
800  }
801
802  when(stage3Redirect.valid && lastIsMispredict) {
803    updateCfiInfo(stage3Redirect)
804  }.elsewhen (ifuRedirectToBpu.valid) {
805    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
806  }
807
808  // ***********************************************************************************
809  // **************************** flush ptr and state queue ****************************
810  // ***********************************************************************************
811
812  class RedirectInfo extends Bundle {
813    val valid = Bool()
814    val ftqIdx = new FtqPtr
815    val ftqOffset = UInt(log2Ceil(PredictWidth).W)
816    val flushItSelf = Bool()
817    def apply(redirect: Valid[Redirect]) = {
818      this.valid := redirect.valid
819      this.ftqIdx := redirect.bits.ftqIdx
820      this.ftqOffset := redirect.bits.ftqOffset
821      this.flushItSelf := RedirectLevel.flushItself(redirect.bits.level)
822      this
823    }
824  }
825  val redirectVec = Wire(Vec(3, new RedirectInfo))
826  val robRedirect = robFlush
827
828  redirectVec.zip(Seq(robRedirect, stage2Redirect, fromIfuRedirect)).map {
829    case (ve, r) => ve(r)
830  }
831
832  // when redirect, we should reset ptrs and status queues
833  when(redirectVec.map(r => r.valid).reduce(_||_)){
834    val r = PriorityMux(redirectVec.map(r => (r.valid -> r)))
835    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
836    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, r.flushItSelf)
837    val next = idx + 1.U
838    bpuPtr := next
839    ifuPtr := next
840    ifuWbPtr := next
841    when (notIfu) {
842      commitStateQueue(idx.value).zipWithIndex.foreach({ case (s, i) =>
843        when(i.U > offset || i.U === offset && flushItSelf){
844          s := c_invalid
845        }
846      })
847    }
848  }
849
850  // only the valid bit is actually needed
851  io.toIfu.redirect := DontCare
852  io.toIfu.redirect.valid := stage2Flush
853
854  // commit
855  for (c <- io.fromBackend.rob_commits) {
856    when(c.valid) {
857      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
858      // TODO: remove this
859      // For instruction fusions, we also update the next instruction
860      when (c.bits.commitType === 4.U) {
861        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
862      }.elsewhen(c.bits.commitType === 5.U) {
863        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
864      }.elsewhen(c.bits.commitType === 6.U) {
865        val index = (c.bits.ftqIdx + 1.U).value
866        commitStateQueue(index)(0) := c_commited
867      }.elsewhen(c.bits.commitType === 7.U) {
868        val index = (c.bits.ftqIdx + 1.U).value
869        commitStateQueue(index)(1) := c_commited
870      }
871    }
872  }
873
874  // ****************************************************************
875  // **************************** to bpu ****************************
876  // ****************************************************************
877
878  io.toBpu.redirect <> Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
879
880  val may_have_stall_from_bpu = RegInit(false.B)
881  val canCommit = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
882    Cat(commitStateQueue(commPtr.value).map(s => {
883      s === c_invalid || s === c_commited
884    })).andR()
885
886  // commit reads
887  ftq_pc_mem.io.raddr.last := commPtr.value
888  val commit_pc_bundle = ftq_pc_mem.io.rdata.last
889  ftq_pd_mem.io.raddr.last := commPtr.value
890  val commit_pd = ftq_pd_mem.io.rdata.last
891  ftq_redirect_sram.io.ren.last := canCommit
892  ftq_redirect_sram.io.raddr.last := commPtr.value
893  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
894  ftq_meta_1r_sram.io.ren(0) := canCommit
895  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
896  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
897  ftb_entry_mem.io.raddr.last := commPtr.value
898  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
899
900  // need one cycle to read mem and srams
901  val do_commit_ptr = RegNext(commPtr)
902  val do_commit = RegNext(canCommit, init=false.B)
903  when (canCommit) { commPtr := commPtr + 1.U }
904  val commit_state = RegNext(commitStateQueue(commPtr.value))
905  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
906  when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
907    can_commit_cfi.valid := false.B
908  }
909  val commit_cfi = RegNext(can_commit_cfi)
910
911  val commit_mispredict = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
912    case (mis, state) => mis && state === c_commited
913  })
914  val can_commit_hit = entry_hit_status(commPtr.value)
915  val commit_hit = RegNext(can_commit_hit)
916  val commit_target = RegNext(update_target(commPtr.value))
917  val commit_valid = commit_hit === h_hit || commit_cfi.valid // hit or taken
918
919  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
920  may_have_stall_from_bpu := can_commit_cfi.valid && !to_bpu_hit && !may_have_stall_from_bpu
921
922  io.toBpu.update := DontCare
923  io.toBpu.update.valid := commit_valid && do_commit
924  val update = io.toBpu.update.bits
925  update.false_hit   := commit_hit === h_false_hit
926  update.pc          := commit_pc_bundle.startAddr
927  update.preds.hit   := commit_hit === h_hit || commit_hit === h_false_hit
928  update.meta        := commit_meta.meta
929  update.full_target := commit_target
930  update.fromFtqRedirectSram(commit_spec_meta)
931
932  val commit_real_hit = commit_hit === h_hit
933  val update_ftb_entry = update.ftb_entry
934
935  val ftbEntryGen = Module(new FTBEntryGen).io
936  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
937  ftbEntryGen.old_entry      := commit_ftb_entry
938  ftbEntryGen.pd             := commit_pd
939  ftbEntryGen.cfiIndex       := commit_cfi
940  ftbEntryGen.target         := commit_target
941  ftbEntryGen.hit            := commit_real_hit
942  ftbEntryGen.mispredict_vec := commit_mispredict
943
944  update_ftb_entry         := ftbEntryGen.new_entry
945  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
946  update.mispred_mask      := ftbEntryGen.mispred_mask
947  update.old_entry         := ftbEntryGen.is_old_entry
948  update.preds.br_taken_mask  := ftbEntryGen.taken_mask
949
950  // ******************************************************************************
951  // **************************** commit perf counters ****************************
952  // ******************************************************************************
953
954  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
955  val commit_mispred_mask = commit_mispredict.asUInt
956  val commit_not_mispred_mask = ~commit_mispred_mask
957
958  val commit_br_mask = commit_pd.brMask.asUInt
959  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
960  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
961
962  val mbpInstrs = commit_inst_mask & commit_cfi_mask
963
964  val mbpRights = mbpInstrs & commit_not_mispred_mask
965  val mbpWrongs = mbpInstrs & commit_mispred_mask
966
967  io.bpuInfo.bpRight := PopCount(mbpRights)
968  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
969
970  // Cfi Info
971  for (i <- 0 until PredictWidth) {
972    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
973    val v = commit_state(i) === c_commited
974    val isBr = commit_pd.brMask(i)
975    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
976    val isCfi = isBr || isJmp
977    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
978    val misPred = commit_mispredict(i)
979    val ghist = commit_spec_meta.ghist.predHist
980    val predCycle = commit_meta.meta(63, 0)
981    val target = commit_target
982
983    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
984    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
985    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
986    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
987    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${Hexadecimal(ghist)}) " +
988    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
989    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
990  }
991
992  val enq = io.fromBpu.resp
993  val perf_redirect = io.fromBackend.stage2Redirect
994
995  XSPerfAccumulate("entry", validEntries)
996  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
997  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
998  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
999  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1000
1001  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1002
1003  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1004  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1005  XSPerfAccumulate("bpu_to_ftq_bubble", bpuPtr === ifuPtr)
1006
1007  val from_bpu = io.fromBpu.resp.bits
1008  def in_entry_len_map_gen(resp: BranchPredictionBundle)(stage: String) = {
1009    val entry_len = (resp.ftb_entry.getFallThrough(resp.pc) - resp.pc) >> instOffsetBits
1010    val entry_len_recording_vec = (1 to PredictWidth+1).map(i => entry_len === i.U)
1011    val entry_len_map = (1 to PredictWidth+1).map(i =>
1012      f"${stage}_ftb_entry_len_$i" -> (entry_len_recording_vec(i-1) && resp.valid)
1013    ).foldLeft(Map[String, UInt]())(_+_)
1014    entry_len_map
1015  }
1016  val s1_entry_len_map = in_entry_len_map_gen(from_bpu.s1)("s1")
1017  val s2_entry_len_map = in_entry_len_map_gen(from_bpu.s2)("s2")
1018  val s3_entry_len_map = in_entry_len_map_gen(from_bpu.s3)("s3")
1019
1020  val to_ifu = io.toIfu.req.bits
1021  val to_ifu_entry_len = (to_ifu.fallThruAddr - to_ifu.startAddr) >> instOffsetBits
1022  val to_ifu_entry_len_recording_vec = (1 to PredictWidth+1).map(i => to_ifu_entry_len === i.U)
1023  val to_ifu_entry_len_map = (1 to PredictWidth+1).map(i =>
1024    f"to_ifu_ftb_entry_len_$i" -> (to_ifu_entry_len_recording_vec(i-1) && io.toIfu.req.fire)
1025  ).foldLeft(Map[String, UInt]())(_+_)
1026
1027
1028
1029  val commit_num_inst_recording_vec = (1 to PredictWidth).map(i => PopCount(commit_inst_mask) === i.U)
1030  val commit_num_inst_map = (1 to PredictWidth).map(i =>
1031    f"commit_num_inst_$i" -> (commit_num_inst_recording_vec(i-1) && do_commit)
1032  ).foldLeft(Map[String, UInt]())(_+_)
1033
1034
1035
1036  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1037  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1038  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1039  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1040
1041
1042  val mbpBRights = mbpRights & commit_br_mask
1043  val mbpJRights = mbpRights & commit_jal_mask
1044  val mbpIRights = mbpRights & commit_jalr_mask
1045  val mbpCRights = mbpRights & commit_call_mask
1046  val mbpRRights = mbpRights & commit_ret_mask
1047
1048  val mbpBWrongs = mbpWrongs & commit_br_mask
1049  val mbpJWrongs = mbpWrongs & commit_jal_mask
1050  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1051  val mbpCWrongs = mbpWrongs & commit_call_mask
1052  val mbpRWrongs = mbpWrongs & commit_ret_mask
1053
1054  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1055
1056  def pred_stage_map(src: UInt, name: String) = {
1057    (0 until numBpStages).map(i =>
1058      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1059    ).foldLeft(Map[String, UInt]())(_+_)
1060  }
1061
1062  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1063  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1064  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1065  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1066  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1067  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1068
1069  val update_valid = io.toBpu.update.valid
1070  def u(cond: Bool) = update_valid && cond
1071  val ftb_false_hit = u(update.false_hit)
1072  // assert(!ftb_false_hit)
1073  val ftb_hit = u(commit_hit === h_hit)
1074
1075  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1076  val ftb_new_entry_only_br = ftb_new_entry && !update.ftb_entry.jmpValid
1077  val ftb_new_entry_only_jmp = ftb_new_entry && !update.ftb_entry.brValids(0)
1078  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update.ftb_entry.brValids(0) && update.ftb_entry.jmpValid
1079
1080  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1081
1082  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1083  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1084  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1085  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1086  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1087
1088  val ftb_entry_len = (ftbEntryGen.new_entry.getFallThrough(update.pc) - update.pc) >> instOffsetBits
1089  val ftb_entry_len_recording_vec = (1 to PredictWidth+1).map(i => ftb_entry_len === i.U)
1090  val ftb_init_entry_len_map = (1 to PredictWidth+1).map(i =>
1091    f"ftb_init_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_new_entry)
1092  ).foldLeft(Map[String, UInt]())(_+_)
1093  val ftb_modified_entry_len_map = (1 to PredictWidth+1).map(i =>
1094    f"ftb_modified_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_modified_entry)
1095  ).foldLeft(Map[String, UInt]())(_+_)
1096
1097  val ftq_occupancy_map = (0 to FtqSize).map(i =>
1098    f"ftq_has_entry_$i" ->( validEntries === i.U)
1099  ).foldLeft(Map[String, UInt]())(_+_)
1100
1101  val perfCountsMap = Map(
1102    "BpInstr" -> PopCount(mbpInstrs),
1103    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1104    "BpRight"  -> PopCount(mbpRights),
1105    "BpWrong"  -> PopCount(mbpWrongs),
1106    "BpBRight" -> PopCount(mbpBRights),
1107    "BpBWrong" -> PopCount(mbpBWrongs),
1108    "BpJRight" -> PopCount(mbpJRights),
1109    "BpJWrong" -> PopCount(mbpJWrongs),
1110    "BpIRight" -> PopCount(mbpIRights),
1111    "BpIWrong" -> PopCount(mbpIWrongs),
1112    "BpCRight" -> PopCount(mbpCRights),
1113    "BpCWrong" -> PopCount(mbpCWrongs),
1114    "BpRRight" -> PopCount(mbpRRights),
1115    "BpRWrong" -> PopCount(mbpRWrongs),
1116
1117    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1118    "ftb_hit"                      -> PopCount(ftb_hit),
1119    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1120    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1121    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1122    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1123    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1124    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1125    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1126    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1127    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1128    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1129  ) ++ ftb_init_entry_len_map ++ ftb_modified_entry_len_map ++ s1_entry_len_map ++
1130  s2_entry_len_map ++ s3_entry_len_map ++
1131  to_ifu_entry_len_map ++ commit_num_inst_map ++ ftq_occupancy_map ++
1132  mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1133  correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1134
1135  for((key, value) <- perfCountsMap) {
1136    XSPerfAccumulate(key, value)
1137  }
1138
1139  // --------------------------- Debug --------------------------------
1140  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1141  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1142  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1143  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1144  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1145    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1146  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1147
1148  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1149  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1150  //       case (((valid, pd), ans), taken) =>
1151  //       Mux(valid && pd.isBr,
1152  //         isWrong ^ Mux(ans.hit.asBool,
1153  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1154  //           !taken),
1155  //         !taken),
1156  //       false.B)
1157  //     }
1158  //   }
1159
1160  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1161  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1162  //       case (((valid, pd), ans), taken) =>
1163  //       Mux(valid && pd.isBr,
1164  //         isWrong ^ Mux(ans.hit.asBool,
1165  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1166  //           !taken),
1167  //         !taken),
1168  //       false.B)
1169  //     }
1170  //   }
1171
1172  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1173  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1174  //       case (((valid, pd), ans), taken) =>
1175  //       Mux(valid && pd.isBr,
1176  //         isWrong ^ (ans.taken.asBool === taken),
1177  //       false.B)
1178  //     }
1179  //   }
1180
1181  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1182  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1183  //       case (((valid, pd), ans), taken) =>
1184  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1185  //         isWrong ^ (!taken),
1186  //           false.B)
1187  //     }
1188  //   }
1189
1190  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1191  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1192  //       case (((valid, pd), ans), taken) =>
1193  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1194  //         isWrong ^ (ans.target === commitEntry.target),
1195  //           false.B)
1196  //     }
1197  //   }
1198
1199  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1200  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1201  //   // btb and ubtb pred jal and jalr as well
1202  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1203  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1204  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1205  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1206
1207  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1208  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1209
1210  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1211  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1212  val perfinfo = IO(new Bundle(){
1213    val perfEvents = Output(new PerfEventsBundle(22))
1214  })
1215  val perfEvents = Seq(
1216    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1217    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1218    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1219    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1220    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1221    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1222    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1223    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1224    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1225    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1226    ("BpRight                ", PopCount(mbpRights)                                                         ),
1227    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1228    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1229    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1230    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1231    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1232    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1233    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1234    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1235    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1236    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1237    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1238    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1239    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1240  )
1241
1242  for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
1243    perf_out.incr_step := RegNext(perf)
1244  }
1245}
1246