xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision 5668a921eb594c3ea72da43594b3fb54e05959a3)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils.{AsyncDataModuleTemplate, CircularQueuePtr, DataModuleTemplate, HasCircularQueuePtrHelper, SRAMTemplate, SyncDataModuleTemplate, XSDebug, XSPerfAccumulate, PerfBundle, PerfEventsBundle, XSError}
23import xiangshan._
24import scala.tools.nsc.doc.model.Val
25import utils.{ParallelPriorityMux, ParallelPriorityEncoder}
26import xiangshan.backend.{CtrlToFtqIO}
27import firrtl.annotations.MemoryLoadFileType
28
29class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr](
30  p => p(XSCoreParamsKey).FtqSize
31){
32  override def cloneType = (new FtqPtr).asInstanceOf[this.type]
33}
34
35object FtqPtr {
36  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
37    val ptr = Wire(new FtqPtr)
38    ptr.flag := f
39    ptr.value := v
40    ptr
41  }
42  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
43    apply(!ptr.flag, ptr.value)
44  }
45}
46
47class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
48
49  val io = IO(new Bundle() {
50    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
51    val ren = Input(Vec(numRead, Bool()))
52    val rdata = Output(Vec(numRead, gen))
53    val waddr = Input(UInt(log2Up(FtqSize).W))
54    val wen = Input(Bool())
55    val wdata = Input(gen)
56  })
57
58  for(i <- 0 until numRead){
59    val sram = Module(new SRAMTemplate(gen, FtqSize))
60    sram.io.r.req.valid := io.ren(i)
61    sram.io.r.req.bits.setIdx := io.raddr(i)
62    io.rdata(i) := sram.io.r.resp.data(0)
63    sram.io.w.req.valid := io.wen
64    sram.io.w.req.bits.setIdx := io.waddr
65    sram.io.w.req.bits.data := VecInit(io.wdata)
66  }
67
68}
69
70class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
71  // TODO: move pftAddr, oversize, carry to another mem
72  val startAddr = UInt(VAddrBits.W)
73  val nextRangeAddr = UInt(VAddrBits.W)
74  val pftAddr = UInt((log2Ceil(PredictWidth)+1).W)
75  val isNextMask = Vec(PredictWidth, Bool())
76  val oversize = Bool()
77  val carry = Bool()
78  def getPc(offset: UInt) = {
79    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits)
80    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits-1, instOffsetBits)
81    Cat(getHigher(Mux(isNextMask(offset), nextRangeAddr, startAddr)),
82        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
83  }
84  def getFallThrough() = {
85    getFallThroughAddr(this.startAddr, this.carry, this.pftAddr)
86  }
87  def fallThroughError() = {
88    !carry && startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits) > pftAddr
89  }
90  def fromBranchPrediction(resp: BranchPredictionBundle) = {
91    this.startAddr := resp.pc
92    this.nextRangeAddr := resp.pc + (FetchWidth * 4).U
93    this.pftAddr := resp.ftb_entry.pftAddr
94    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
95      (resp.pc(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool()
96    ))
97    this.oversize := resp.ftb_entry.oversize
98    this.carry := resp.ftb_entry.carry
99    this
100  }
101  override def toPrintable: Printable = {
102    p"startAddr:${Hexadecimal(startAddr)}, fallThru:${Hexadecimal(getFallThrough())}"
103  }
104}
105
106class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
107  val brMask = Vec(PredictWidth, Bool())
108  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
109  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
110  val jalTarget = UInt(VAddrBits.W)
111  val rvcMask = Vec(PredictWidth, Bool())
112  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
113  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
114  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
115  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
116
117  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
118    val pds = pdWb.pd
119    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
120    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
121    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
122                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
123    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
124    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
125    this.jalTarget := pdWb.jalTarget
126  }
127
128  def toPd(offset: UInt) = {
129    require(offset.getWidth == log2Ceil(PredictWidth))
130    val pd = Wire(new PreDecodeInfo)
131    pd.valid := true.B
132    pd.isRVC := rvcMask(offset)
133    val isBr = brMask(offset)
134    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
135    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
136    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
137    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
138    pd
139  }
140}
141
142
143
144class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
145  val rasSp = UInt(log2Ceil(RasSize).W)
146  val rasEntry = new RASEntry
147  val specCnt = Vec(numBr, UInt(10.W))
148  // val ghist = new ShiftingGlobalHistory
149  val folded_hist = new AllFoldedHistories(foldedGHistInfos)
150  val histPtr = new CGHPtr
151  val phist = UInt(PathHistoryLength.W)
152  val phNewBit = UInt(1.W)
153
154  def fromBranchPrediction(resp: BranchPredictionBundle) = {
155    this.rasSp := resp.rasSp
156    this.rasEntry := resp.rasTop
157    this.specCnt := resp.specCnt
158    // this.ghist := resp.ghist
159    this.folded_hist := resp.folded_hist
160    this.histPtr := resp.histPtr
161    this.phist := resp.phist
162    this.phNewBit := resp.pc(instOffsetBits)
163    this
164  }
165}
166
167class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
168  val meta = UInt(MaxMetaLength.W)
169}
170
171class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
172  val target = UInt(VAddrBits.W)
173  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
174}
175
176// class FtqEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
177//   val startAddr = UInt(VAddrBits.W)
178//   val fallThruAddr = UInt(VAddrBits.W)
179//   val isNextMask = Vec(PredictWidth, Bool())
180
181//   val meta = UInt(MaxMetaLength.W)
182
183//   val rasSp = UInt(log2Ceil(RasSize).W)
184//   val rasEntry = new RASEntry
185//   val hist = new ShiftingGlobalHistory
186//   val specCnt = Vec(numBr, UInt(10.W))
187
188//   val valids = Vec(PredictWidth, Bool())
189//   val brMask = Vec(PredictWidth, Bool())
190//   // isJalr, isCall, isRet
191//   val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
192//   val jmpOffset = UInt(log2Ceil(PredictWidth).W)
193
194//   val mispredVec = Vec(PredictWidth, Bool())
195//   val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
196//   val target = UInt(VAddrBits.W)
197// }
198
199class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
200  val ptr = Output(new FtqPtr)
201  val offset = Output(UInt(log2Ceil(PredictWidth).W))
202  val data = Input(gen)
203  def apply(ptr: FtqPtr, offset: UInt) = {
204    this.ptr := ptr
205    this.offset := offset
206    this.data
207  }
208  override def cloneType = (new FtqRead(gen)).asInstanceOf[this.type]
209}
210
211
212class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
213  val redirect = Valid(new BranchPredictionRedirect)
214  val update = Valid(new BranchPredictionUpdate)
215  val enq_ptr = Output(new FtqPtr)
216}
217
218class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
219  val req = Decoupled(new FetchRequestBundle)
220  val redirect = Valid(new Redirect)
221  val flushFromBpu = new Bundle {
222    // when ifu pipeline is not stalled,
223    // a packet from bpu s3 can reach f1 at most
224    val s2 = Valid(new FtqPtr)
225    val s3 = Valid(new FtqPtr)
226    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
227      src.valid && !isAfter(src.bits, idx_to_flush)
228    }
229    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
230    def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
231  }
232}
233
234trait HasBackendRedirectInfo extends HasXSParameter {
235  def numRedirect = exuParameters.JmpCnt + exuParameters.AluCnt + 1
236  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
237}
238
239class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
240  val pc_reads = Vec(1 + numRedirect + 1 + 1, Flipped(new FtqRead(UInt(VAddrBits.W))))
241  val target_read = Flipped(new FtqRead(UInt(VAddrBits.W)))
242  def getJumpPcRead = pc_reads.head
243  def getRedirectPcRead = VecInit(pc_reads.tail.dropRight(2))
244  def getMemPredPcRead = pc_reads.init.last
245  def getRobFlushPcRead = pc_reads.last
246}
247
248
249class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
250  val io = IO(new Bundle {
251    val start_addr = Input(UInt(VAddrBits.W))
252    val old_entry = Input(new FTBEntry)
253    val pd = Input(new Ftq_pd_Entry)
254    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
255    val target = Input(UInt(VAddrBits.W))
256    val hit = Input(Bool())
257    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
258
259    val new_entry = Output(new FTBEntry)
260    val new_br_insert_pos = Output(Vec(numBr, Bool()))
261    val taken_mask = Output(Vec(numBr, Bool()))
262    val mispred_mask = Output(Vec(numBr+1, Bool()))
263
264    // for perf counters
265    val is_init_entry = Output(Bool())
266    val is_old_entry = Output(Bool())
267    val is_new_br = Output(Bool())
268    val is_jalr_target_modified = Output(Bool())
269    val is_always_taken_modified = Output(Bool())
270    val is_br_full = Output(Bool())
271  })
272
273  // no mispredictions detected at predecode
274  val hit = io.hit
275  val pd = io.pd
276
277  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
278
279
280  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
281  val entry_has_jmp = pd.jmpInfo.valid
282  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
283  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
284  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
285  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
286  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
287  val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
288
289  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
290  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
291
292  def carryPos = log2Ceil(PredictWidth)+instOffsetBits+1
293  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
294  // if not hit, establish a new entry
295  init_entry.valid := true.B
296  // tag is left for ftb to assign
297
298  // case br
299  val init_br_slot = init_entry.getSlotForBr(0)
300  when (cfi_is_br) {
301    init_br_slot.valid := true.B
302    init_br_slot.offset := io.cfiIndex.bits
303    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && numBr == 1)
304    init_entry.always_taken(0) := true.B // set to always taken on init
305  }
306  // init_entry.isBrSharing := shareTailSlot.B && (numBr == 1).B && cfi_is_br
307
308  // case jmp
309  when (entry_has_jmp) {
310    init_entry.tailSlot.offset := pd.jmpOffset
311    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
312    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
313  }
314
315  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
316  init_entry.pftAddr := Mux(entry_has_jmp, jmpPft, getLower(io.start_addr) + ((FetchWidth*4)>>instOffsetBits).U + Mux(last_br_rvi, 1.U, 0.U))
317  init_entry.carry   := Mux(entry_has_jmp, jmpPft(carryPos-instOffsetBits), io.start_addr(carryPos-1) || (io.start_addr(carryPos-2, instOffsetBits).andR && last_br_rvi))
318  init_entry.isJalr := new_jmp_is_jalr
319  init_entry.isCall := new_jmp_is_call
320  init_entry.isRet  := new_jmp_is_ret
321  init_entry.last_is_rvc := Mux(entry_has_jmp, pd.rvcMask(pd.jmpOffset), pd.rvcMask.last)
322
323  init_entry.oversize := last_br_rvi || last_jmp_rvi
324
325  // if hit, check whether a new cfi(only br is possible) is detected
326  val oe = io.old_entry
327  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
328  val br_recorded = br_recorded_vec.asUInt.orR
329  val is_new_br = cfi_is_br && !br_recorded
330  val new_br_offset = io.cfiIndex.bits
331  // vec(i) means new br will be inserted BEFORE old br(i)
332  val allBrSlotsVec = oe.allSlotsForBr
333  val new_br_insert_onehot = VecInit((0 until numBr).map{
334    i => i match {
335      case 0 =>
336        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
337      case idx =>
338        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
339        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
340    }
341  })
342
343  val old_entry_modified = WireInit(io.old_entry)
344  for (i <- 0 until numBr) {
345    val slot = old_entry_modified.allSlotsForBr(i)
346    when (new_br_insert_onehot(i)) {
347      slot.valid := true.B
348      slot.offset := new_br_offset
349      slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && i == numBr-1)
350      old_entry_modified.always_taken(i) := true.B
351    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
352      old_entry_modified.always_taken(i) := false.B
353      // all other fields remain unchanged
354    }.otherwise {
355      // case i == 0, remain unchanged
356      if (i != 0) {
357        val noNeedToMoveFromFormerSlot = (shareTailSlot && i == numBr-1).B && !oe.brSlots.last.valid
358        when (!noNeedToMoveFromFormerSlot) {
359          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
360          old_entry_modified.always_taken(i) := oe.always_taken(i)
361        }
362      }
363    }
364  }
365
366  // two circumstances:
367  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
368  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
369  //        the previous last br or the new br
370  val may_have_to_replace = oe.noEmptySlotForNewBr
371  val pft_need_to_change = is_new_br && may_have_to_replace
372  // it should either be the given last br or the new br
373  when (pft_need_to_change) {
374    val new_pft_offset =
375      Mux(!new_br_insert_onehot.asUInt.orR,
376        new_br_offset, oe.allSlotsForBr.last.offset)
377
378    // set jmp to invalid
379    if (!shareTailSlot) {
380      old_entry_modified.tailSlot.valid := false.B
381    }
382    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
383    old_entry_modified.last_is_rvc := pd.rvcMask(new_pft_offset - 1.U) // TODO: fix this
384    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
385    old_entry_modified.oversize := false.B
386    old_entry_modified.isCall := false.B
387    old_entry_modified.isRet := false.B
388    old_entry_modified.isJalr := false.B
389  }
390
391  val old_entry_jmp_target_modified = WireInit(oe)
392  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
393  val old_tail_is_jmp = !oe.tailSlot.sharing || !shareTailSlot.B
394  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
395  when (jalr_target_modified) {
396    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
397    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
398  }
399
400  val old_entry_always_taken = WireInit(oe)
401  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
402  for (i <- 0 until numBr) {
403    old_entry_always_taken.always_taken(i) :=
404      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
405    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
406  }
407  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
408
409
410
411  val derived_from_old_entry =
412    Mux(is_new_br, old_entry_modified,
413      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
414
415
416  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
417
418  io.new_br_insert_pos := new_br_insert_onehot
419  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
420    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
421  })
422  for (i <- 0 until numBr) {
423    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
424  }
425  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
426
427  // for perf counters
428  io.is_init_entry := !hit
429  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
430  io.is_new_br := hit && is_new_br
431  io.is_jalr_target_modified := hit && jalr_target_modified
432  io.is_always_taken_modified := hit && always_taken_modified
433  io.is_br_full := hit && is_new_br && may_have_to_replace
434}
435
436class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
437  with HasBackendRedirectInfo with BPUUtils with HasBPUConst {
438  val io = IO(new Bundle {
439    val fromBpu = Flipped(new BpuToFtqIO)
440    val fromIfu = Flipped(new IfuToFtqIO)
441    val fromBackend = Flipped(new CtrlToFtqIO)
442
443    val toBpu = new FtqToBpuIO
444    val toIfu = new FtqToIfuIO
445    val toBackend = new FtqToCtrlIO
446
447    val bpuInfo = new Bundle {
448      val bpRight = Output(UInt(XLEN.W))
449      val bpWrong = Output(UInt(XLEN.W))
450    }
451  })
452  io.bpuInfo := DontCare
453
454  val robFlush = io.fromBackend.robFlush
455  val stage2Redirect = io.fromBackend.stage2Redirect
456  val stage3Redirect = io.fromBackend.stage3Redirect
457
458  val stage2Flush = stage2Redirect.valid || robFlush.valid
459  val backendFlush = stage2Flush || RegNext(stage2Flush)
460  val ifuFlush = Wire(Bool())
461
462  val flush = stage2Flush || RegNext(stage2Flush)
463
464  val allowBpuIn, allowToIfu = WireInit(false.B)
465  val flushToIfu = !allowToIfu
466  allowBpuIn := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid
467  allowToIfu := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid
468
469  val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
470  val validEntries = distanceBetween(bpuPtr, commPtr)
471
472  // **********************************************************************
473  // **************************** enq from bpu ****************************
474  // **********************************************************************
475  val new_entry_ready = validEntries < FtqSize.U
476  io.fromBpu.resp.ready := new_entry_ready
477
478  val bpu_s2_resp = io.fromBpu.resp.bits.s2
479  val bpu_s3_resp = io.fromBpu.resp.bits.s3
480  val bpu_s2_redirect = bpu_s2_resp.valid && bpu_s2_resp.hasRedirect
481  val bpu_s3_redirect = bpu_s3_resp.valid && bpu_s3_resp.hasRedirect
482
483  io.toBpu.enq_ptr := bpuPtr
484  val enq_fire = io.fromBpu.resp.fire() && allowBpuIn // from bpu s1
485  val bpu_in_fire = (io.fromBpu.resp.fire() || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
486
487  val bpu_in_resp = WireInit(io.fromBpu.resp.bits.selectedResp)
488  val bpu_in_stage = WireInit(io.fromBpu.resp.bits.selectedRespIdx)
489  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
490  val bpu_in_resp_idx = bpu_in_resp_ptr.value
491
492  // read ports:                            jumpPc + redirects + loadPred + robFlush + ifuReq1 + ifuReq2 + commitUpdate
493  val ftq_pc_mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, 1+numRedirect+2+1+1+1, 1))
494  // resp from uBTB
495  ftq_pc_mem.io.wen(0) := bpu_in_fire
496  ftq_pc_mem.io.waddr(0) := bpu_in_resp_idx
497  ftq_pc_mem.io.wdata(0).fromBranchPrediction(bpu_in_resp)
498
499  //                                                            ifuRedirect + backendRedirect + commit
500  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1))
501  // these info is intended to enq at the last stage of bpu
502  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
503  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
504  ftq_redirect_sram.io.wdata.fromBranchPrediction(io.fromBpu.resp.bits.lastStage)
505
506  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
507  // these info is intended to enq at the last stage of bpu
508  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
509  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
510  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.meta
511  //                                                            ifuRedirect + backendRedirect + commit
512  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1))
513  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid
514  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
515  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.lastStage.ftb_entry
516
517
518  // multi-write
519  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W)))
520  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
521  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
522  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
523
524  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
525  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
526    VecInit(Seq.fill(PredictWidth)(c_invalid))
527  }))
528
529  val f_to_send :: f_sent :: Nil = Enum(2)
530  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
531
532  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
533  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
534
535
536  when (bpu_in_fire) {
537    entry_fetch_status(bpu_in_resp_idx) := f_to_send
538    commitStateQueue(bpu_in_resp_idx) := VecInit(Seq.fill(PredictWidth)(c_invalid))
539    cfiIndex_vec(bpu_in_resp_idx) := bpu_in_resp.genCfiIndex
540    mispredict_vec(bpu_in_resp_idx) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
541    update_target(bpu_in_resp_idx) := bpu_in_resp.target
542    pred_stage(bpu_in_resp_idx) := bpu_in_stage
543  }
544
545  bpuPtr := bpuPtr + enq_fire
546  ifuPtr := ifuPtr + io.toIfu.req.fire
547
548  // only use ftb result to assign hit status
549  when (bpu_s2_resp.valid) {
550    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.preds.hit, h_hit, h_not_hit)
551  }
552
553
554  io.toIfu.flushFromBpu.s2.valid := bpu_s2_resp.valid && bpu_s2_resp.hasRedirect
555  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
556  when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) {
557    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
558    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
559    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
560      ifuPtr := bpu_s2_resp.ftq_idx
561    }
562  }
563
564  io.toIfu.flushFromBpu.s3.valid := bpu_s3_resp.valid && bpu_s3_resp.hasRedirect
565  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
566  when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) {
567    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
568    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
569    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
570      ifuPtr := bpu_s3_resp.ftq_idx
571    }
572    XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
573  }
574
575  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
576
577  // ****************************************************************
578  // **************************** to ifu ****************************
579  // ****************************************************************
580  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata(0), enable=bpu_in_fire)
581  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
582  val last_cycle_bpu_in = RegNext(bpu_in_fire)
583  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
584
585  // read pc and target
586  ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value
587  ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value
588
589  val toIfuReq = Wire(chiselTypeOf(io.toIfu.req))
590
591  toIfuReq.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr
592  toIfuReq.bits.ftqIdx := ifuPtr
593  toIfuReq.bits.target := update_target(ifuPtr.value)
594  toIfuReq.bits.ftqOffset := cfiIndex_vec(ifuPtr.value)
595  toIfuReq.bits.fallThruError  := false.B
596
597  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
598    toIfuReq.bits.fromFtqPcBundle(bpu_in_bypass_buf)
599  }.elsewhen (last_cycle_to_ifu_fire) {
600    toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last)
601  }.otherwise {
602    toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last)
603  }
604
605  io.toIfu.req <> toIfuReq
606
607  // when fall through is smaller in value than start address, there must be a false hit
608  when (toIfuReq.bits.fallThroughError() && entry_hit_status(ifuPtr.value) === h_hit) {
609    when (io.toIfu.req.fire &&
610      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
611      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
612    ) {
613      entry_hit_status(ifuPtr.value) := h_false_hit
614      XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr)
615    }
616    io.toIfu.req.bits.fallThruAddr   := toIfuReq.bits.startAddr + (FetchWidth*4).U
617    io.toIfu.req.bits.fallThruError  := true.B
618    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr)
619  }
620
621  val ifu_req_should_be_flushed =
622    io.toIfu.flushFromBpu.shouldFlushByStage2(toIfuReq.bits.ftqIdx) ||
623    io.toIfu.flushFromBpu.shouldFlushByStage3(toIfuReq.bits.ftqIdx)
624
625  when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
626    entry_fetch_status(ifuPtr.value) := f_sent
627  }
628
629
630  // *********************************************************************
631  // **************************** wb from ifu ****************************
632  // *********************************************************************
633  val pdWb = io.fromIfu.pdWb
634  val pds = pdWb.bits.pd
635  val ifu_wb_valid = pdWb.valid
636  val ifu_wb_idx = pdWb.bits.ftqIdx.value
637  // read ports:                                                         commit update
638  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
639  ftq_pd_mem.io.wen(0) := ifu_wb_valid
640  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
641  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
642
643  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
644  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
645  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
646  val pd_reg       = RegEnable(pds,             enable = pdWb.valid)
647  val start_pc_reg = RegEnable(pdWb.bits.pc(0), enable = pdWb.valid)
648  val wb_idx_reg   = RegEnable(ifu_wb_idx,      enable = pdWb.valid)
649
650  when (ifu_wb_valid) {
651    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
652      case (v, inRange) => v && inRange
653    })
654    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
655      case (qe, v) => when (v) { qe := c_valid }
656    }
657  }
658
659  ifuWbPtr := ifuWbPtr + ifu_wb_valid
660
661  ftb_entry_mem.io.raddr.head := ifu_wb_idx
662  val has_false_hit = WireInit(false.B)
663  when (RegNext(hit_pd_valid)) {
664    // check for false hit
665    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
666    val brSlots = pred_ftb_entry.brSlots
667    val tailSlot = pred_ftb_entry.tailSlot
668    // we check cfis that bpu predicted
669
670    // bpu predicted branches but denied by predecode
671    val br_false_hit =
672      brSlots.map{
673        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
674      }.reduce(_||_) ||
675      (shareTailSlot.B && tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
676        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
677
678    val jmpOffset = tailSlot.offset
679    val jmp_pd = pd_reg(jmpOffset)
680    val jal_false_hit = pred_ftb_entry.jmpValid &&
681      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
682       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
683       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
684       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
685      )
686
687    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
688    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
689
690    // assert(!has_false_hit)
691  }
692
693  when (has_false_hit) {
694    entry_hit_status(wb_idx_reg) := h_false_hit
695  }
696
697
698  // **********************************************************************
699  // **************************** backend read ****************************
700  // **********************************************************************
701
702  // pc reads
703  for ((req, i) <- io.toBackend.pc_reads.zipWithIndex) {
704    ftq_pc_mem.io.raddr(i) := req.ptr.value
705    req.data := ftq_pc_mem.io.rdata(i).getPc(RegNext(req.offset))
706  }
707  // target read
708  io.toBackend.target_read.data := RegNext(update_target(io.toBackend.target_read.ptr.value))
709
710  // *******************************************************************************
711  // **************************** redirect from backend ****************************
712  // *******************************************************************************
713
714  // redirect read cfiInfo, couples to redirectGen s2
715  ftq_redirect_sram.io.ren.init.last := io.fromBackend.stage2Redirect.valid
716  ftq_redirect_sram.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value
717
718  ftb_entry_mem.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value
719
720  val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last
721  val fromBackendRedirect = WireInit(io.fromBackend.stage3Redirect)
722  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
723  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
724
725  val r_ftb_entry = ftb_entry_mem.io.rdata.init.last
726  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
727
728  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
729    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
730      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
731      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
732
733    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
734        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
735  }.otherwise {
736    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
737    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
738  }
739
740
741  // ***************************************************************************
742  // **************************** redirect from ifu ****************************
743  // ***************************************************************************
744  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new Redirect)))
745  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
746  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
747  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
748  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
749
750  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
751  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
752  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
753  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
754  ifuRedirectCfiUpdate.target := pdWb.bits.target
755  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
756  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
757
758  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new Redirect)))
759  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
760  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
761
762  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
763  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
764
765  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
766
767  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
768  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
769  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet) {
770    toBpuCfi.target := toBpuCfi.rasEntry.retAddr
771  }
772
773  // *********************************************************************
774  // **************************** wb from exu ****************************
775  // *********************************************************************
776
777  def extractRedirectInfo(wb: Valid[Redirect]) = {
778    val ftqIdx = wb.bits.ftqIdx.value
779    val ftqOffset = wb.bits.ftqOffset
780    val taken = wb.bits.cfiUpdate.taken
781    val mispred = wb.bits.cfiUpdate.isMisPred
782    (wb.valid, ftqIdx, ftqOffset, taken, mispred)
783  }
784
785  // fix mispredict entry
786  val lastIsMispredict = RegNext(
787    stage2Redirect.valid && stage2Redirect.bits.level === RedirectLevel.flushAfter, init = false.B
788  )
789
790  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
791    val (r_valid, r_idx, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
792    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
793    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
794    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
795      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
796    }
797    when (cfiIndex_bits_wen) {
798      cfiIndex_vec(r_idx).bits := r_offset
799    }
800    update_target(r_idx) := redirect.bits.cfiUpdate.target
801    if (isBackend) {
802      mispredict_vec(r_idx)(r_offset) := r_mispred
803    }
804  }
805
806  when(stage3Redirect.valid && lastIsMispredict) {
807    updateCfiInfo(stage3Redirect)
808  }.elsewhen (ifuRedirectToBpu.valid) {
809    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
810  }
811
812  // ***********************************************************************************
813  // **************************** flush ptr and state queue ****************************
814  // ***********************************************************************************
815
816  class RedirectInfo extends Bundle {
817    val valid = Bool()
818    val ftqIdx = new FtqPtr
819    val ftqOffset = UInt(log2Ceil(PredictWidth).W)
820    val flushItSelf = Bool()
821    def apply(redirect: Valid[Redirect]) = {
822      this.valid := redirect.valid
823      this.ftqIdx := redirect.bits.ftqIdx
824      this.ftqOffset := redirect.bits.ftqOffset
825      this.flushItSelf := RedirectLevel.flushItself(redirect.bits.level)
826      this
827    }
828  }
829  val redirectVec = Wire(Vec(3, new RedirectInfo))
830  val robRedirect = robFlush
831
832  redirectVec.zip(Seq(robRedirect, stage2Redirect, fromIfuRedirect)).map {
833    case (ve, r) => ve(r)
834  }
835
836  // when redirect, we should reset ptrs and status queues
837  when(redirectVec.map(r => r.valid).reduce(_||_)){
838    val r = PriorityMux(redirectVec.map(r => (r.valid -> r)))
839    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
840    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, r.flushItSelf)
841    val next = idx + 1.U
842    bpuPtr := next
843    ifuPtr := next
844    ifuWbPtr := next
845    when (notIfu) {
846      commitStateQueue(idx.value).zipWithIndex.foreach({ case (s, i) =>
847        when(i.U > offset || i.U === offset && flushItSelf){
848          s := c_invalid
849        }
850      })
851    }
852  }
853
854  // only the valid bit is actually needed
855  io.toIfu.redirect.bits    := Mux(robFlush.valid, robFlush.bits, stage2Redirect.bits)
856  io.toIfu.redirect.valid   := stage2Flush
857
858  // commit
859  for (c <- io.fromBackend.rob_commits) {
860    when(c.valid) {
861      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
862      // TODO: remove this
863      // For instruction fusions, we also update the next instruction
864      when (c.bits.commitType === 4.U) {
865        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
866      }.elsewhen(c.bits.commitType === 5.U) {
867        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
868      }.elsewhen(c.bits.commitType === 6.U) {
869        val index = (c.bits.ftqIdx + 1.U).value
870        commitStateQueue(index)(0) := c_commited
871      }.elsewhen(c.bits.commitType === 7.U) {
872        val index = (c.bits.ftqIdx + 1.U).value
873        commitStateQueue(index)(1) := c_commited
874      }
875    }
876  }
877
878  // ****************************************************************
879  // **************************** to bpu ****************************
880  // ****************************************************************
881
882  io.toBpu.redirect <> Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
883
884  val may_have_stall_from_bpu = RegInit(false.B)
885  val canCommit = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
886    Cat(commitStateQueue(commPtr.value).map(s => {
887      s === c_invalid || s === c_commited
888    })).andR()
889
890  // commit reads
891  ftq_pc_mem.io.raddr.last := commPtr.value
892  val commit_pc_bundle = ftq_pc_mem.io.rdata.last
893  ftq_pd_mem.io.raddr.last := commPtr.value
894  val commit_pd = ftq_pd_mem.io.rdata.last
895  ftq_redirect_sram.io.ren.last := canCommit
896  ftq_redirect_sram.io.raddr.last := commPtr.value
897  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
898  ftq_meta_1r_sram.io.ren(0) := canCommit
899  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
900  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
901  ftb_entry_mem.io.raddr.last := commPtr.value
902  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
903
904  // need one cycle to read mem and srams
905  val do_commit_ptr = RegNext(commPtr)
906  val do_commit = RegNext(canCommit, init=false.B)
907  when (canCommit) { commPtr := commPtr + 1.U }
908  val commit_state = RegNext(commitStateQueue(commPtr.value))
909  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
910  when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
911    can_commit_cfi.valid := false.B
912  }
913  val commit_cfi = RegNext(can_commit_cfi)
914
915  val commit_mispredict = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
916    case (mis, state) => mis && state === c_commited
917  })
918  val can_commit_hit = entry_hit_status(commPtr.value)
919  val commit_hit = RegNext(can_commit_hit)
920  val commit_target = RegNext(update_target(commPtr.value))
921  val commit_valid = commit_hit === h_hit || commit_cfi.valid // hit or taken
922
923  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
924  may_have_stall_from_bpu := can_commit_cfi.valid && !to_bpu_hit && !may_have_stall_from_bpu
925
926  io.toBpu.update := DontCare
927  io.toBpu.update.valid := commit_valid && do_commit
928  val update = io.toBpu.update.bits
929  update.false_hit   := commit_hit === h_false_hit
930  update.pc          := commit_pc_bundle.startAddr
931  update.preds.hit   := commit_hit === h_hit || commit_hit === h_false_hit
932  update.meta        := commit_meta.meta
933  update.full_target := commit_target
934  update.fromFtqRedirectSram(commit_spec_meta)
935
936  val commit_real_hit = commit_hit === h_hit
937  val update_ftb_entry = update.ftb_entry
938
939  val ftbEntryGen = Module(new FTBEntryGen).io
940  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
941  ftbEntryGen.old_entry      := commit_ftb_entry
942  ftbEntryGen.pd             := commit_pd
943  ftbEntryGen.cfiIndex       := commit_cfi
944  ftbEntryGen.target         := commit_target
945  ftbEntryGen.hit            := commit_real_hit
946  ftbEntryGen.mispredict_vec := commit_mispredict
947
948  update_ftb_entry         := ftbEntryGen.new_entry
949  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
950  update.mispred_mask      := ftbEntryGen.mispred_mask
951  update.old_entry         := ftbEntryGen.is_old_entry
952  update.preds.br_taken_mask  := ftbEntryGen.taken_mask
953
954  // ******************************************************************************
955  // **************************** commit perf counters ****************************
956  // ******************************************************************************
957
958  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
959  val commit_mispred_mask = commit_mispredict.asUInt
960  val commit_not_mispred_mask = ~commit_mispred_mask
961
962  val commit_br_mask = commit_pd.brMask.asUInt
963  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
964  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
965
966  val mbpInstrs = commit_inst_mask & commit_cfi_mask
967
968  val mbpRights = mbpInstrs & commit_not_mispred_mask
969  val mbpWrongs = mbpInstrs & commit_mispred_mask
970
971  io.bpuInfo.bpRight := PopCount(mbpRights)
972  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
973
974  // Cfi Info
975  for (i <- 0 until PredictWidth) {
976    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
977    val v = commit_state(i) === c_commited
978    val isBr = commit_pd.brMask(i)
979    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
980    val isCfi = isBr || isJmp
981    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
982    val misPred = commit_mispredict(i)
983    // val ghist = commit_spec_meta.ghist.predHist
984    val histPtr = commit_spec_meta.histPtr
985    val predCycle = commit_meta.meta(63, 0)
986    val target = commit_target
987
988    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
989    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
990    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
991    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
992    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
993    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
994    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
995  }
996
997  val enq = io.fromBpu.resp
998  val perf_redirect = io.fromBackend.stage2Redirect
999
1000  XSPerfAccumulate("entry", validEntries)
1001  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1002  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1003  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1004  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1005
1006  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1007
1008  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1009  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1010  XSPerfAccumulate("bpu_to_ftq_bubble", bpuPtr === ifuPtr)
1011
1012  val from_bpu = io.fromBpu.resp.bits
1013  def in_entry_len_map_gen(resp: BranchPredictionBundle)(stage: String) = {
1014    val entry_len = (resp.ftb_entry.getFallThrough(resp.pc) - resp.pc) >> instOffsetBits
1015    val entry_len_recording_vec = (1 to PredictWidth+1).map(i => entry_len === i.U)
1016    val entry_len_map = (1 to PredictWidth+1).map(i =>
1017      f"${stage}_ftb_entry_len_$i" -> (entry_len_recording_vec(i-1) && resp.valid)
1018    ).foldLeft(Map[String, UInt]())(_+_)
1019    entry_len_map
1020  }
1021  val s1_entry_len_map = in_entry_len_map_gen(from_bpu.s1)("s1")
1022  val s2_entry_len_map = in_entry_len_map_gen(from_bpu.s2)("s2")
1023  val s3_entry_len_map = in_entry_len_map_gen(from_bpu.s3)("s3")
1024
1025  val to_ifu = io.toIfu.req.bits
1026  val to_ifu_entry_len = (to_ifu.fallThruAddr - to_ifu.startAddr) >> instOffsetBits
1027  val to_ifu_entry_len_recording_vec = (1 to PredictWidth+1).map(i => to_ifu_entry_len === i.U)
1028  val to_ifu_entry_len_map = (1 to PredictWidth+1).map(i =>
1029    f"to_ifu_ftb_entry_len_$i" -> (to_ifu_entry_len_recording_vec(i-1) && io.toIfu.req.fire)
1030  ).foldLeft(Map[String, UInt]())(_+_)
1031
1032
1033
1034  val commit_num_inst_recording_vec = (1 to PredictWidth).map(i => PopCount(commit_inst_mask) === i.U)
1035  val commit_num_inst_map = (1 to PredictWidth).map(i =>
1036    f"commit_num_inst_$i" -> (commit_num_inst_recording_vec(i-1) && do_commit)
1037  ).foldLeft(Map[String, UInt]())(_+_)
1038
1039
1040
1041  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1042  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1043  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1044  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1045
1046
1047  val mbpBRights = mbpRights & commit_br_mask
1048  val mbpJRights = mbpRights & commit_jal_mask
1049  val mbpIRights = mbpRights & commit_jalr_mask
1050  val mbpCRights = mbpRights & commit_call_mask
1051  val mbpRRights = mbpRights & commit_ret_mask
1052
1053  val mbpBWrongs = mbpWrongs & commit_br_mask
1054  val mbpJWrongs = mbpWrongs & commit_jal_mask
1055  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1056  val mbpCWrongs = mbpWrongs & commit_call_mask
1057  val mbpRWrongs = mbpWrongs & commit_ret_mask
1058
1059  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1060
1061  def pred_stage_map(src: UInt, name: String) = {
1062    (0 until numBpStages).map(i =>
1063      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1064    ).foldLeft(Map[String, UInt]())(_+_)
1065  }
1066
1067  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1068  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1069  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1070  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1071  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1072  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1073
1074  val update_valid = io.toBpu.update.valid
1075  def u(cond: Bool) = update_valid && cond
1076  val ftb_false_hit = u(update.false_hit)
1077  // assert(!ftb_false_hit)
1078  val ftb_hit = u(commit_hit === h_hit)
1079
1080  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1081  val ftb_new_entry_only_br = ftb_new_entry && !update.ftb_entry.jmpValid
1082  val ftb_new_entry_only_jmp = ftb_new_entry && !update.ftb_entry.brValids(0)
1083  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update.ftb_entry.brValids(0) && update.ftb_entry.jmpValid
1084
1085  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1086
1087  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1088  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1089  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1090  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1091  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1092
1093  val ftb_entry_len = (ftbEntryGen.new_entry.getFallThrough(update.pc) - update.pc) >> instOffsetBits
1094  val ftb_entry_len_recording_vec = (1 to PredictWidth+1).map(i => ftb_entry_len === i.U)
1095  val ftb_init_entry_len_map = (1 to PredictWidth+1).map(i =>
1096    f"ftb_init_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_new_entry)
1097  ).foldLeft(Map[String, UInt]())(_+_)
1098  val ftb_modified_entry_len_map = (1 to PredictWidth+1).map(i =>
1099    f"ftb_modified_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_modified_entry)
1100  ).foldLeft(Map[String, UInt]())(_+_)
1101
1102  val ftq_occupancy_map = (0 to FtqSize).map(i =>
1103    f"ftq_has_entry_$i" ->( validEntries === i.U)
1104  ).foldLeft(Map[String, UInt]())(_+_)
1105
1106  val perfCountsMap = Map(
1107    "BpInstr" -> PopCount(mbpInstrs),
1108    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1109    "BpRight"  -> PopCount(mbpRights),
1110    "BpWrong"  -> PopCount(mbpWrongs),
1111    "BpBRight" -> PopCount(mbpBRights),
1112    "BpBWrong" -> PopCount(mbpBWrongs),
1113    "BpJRight" -> PopCount(mbpJRights),
1114    "BpJWrong" -> PopCount(mbpJWrongs),
1115    "BpIRight" -> PopCount(mbpIRights),
1116    "BpIWrong" -> PopCount(mbpIWrongs),
1117    "BpCRight" -> PopCount(mbpCRights),
1118    "BpCWrong" -> PopCount(mbpCWrongs),
1119    "BpRRight" -> PopCount(mbpRRights),
1120    "BpRWrong" -> PopCount(mbpRWrongs),
1121
1122    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1123    "ftb_hit"                      -> PopCount(ftb_hit),
1124    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1125    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1126    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1127    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1128    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1129    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1130    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1131    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1132    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1133    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1134  ) ++ ftb_init_entry_len_map ++ ftb_modified_entry_len_map ++ s1_entry_len_map ++
1135  s2_entry_len_map ++ s3_entry_len_map ++
1136  to_ifu_entry_len_map ++ commit_num_inst_map ++ ftq_occupancy_map ++
1137  mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1138  correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1139
1140  for((key, value) <- perfCountsMap) {
1141    XSPerfAccumulate(key, value)
1142  }
1143
1144  // --------------------------- Debug --------------------------------
1145  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1146  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1147  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1148  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1149  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1150    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1151  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1152
1153  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1154  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1155  //       case (((valid, pd), ans), taken) =>
1156  //       Mux(valid && pd.isBr,
1157  //         isWrong ^ Mux(ans.hit.asBool,
1158  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1159  //           !taken),
1160  //         !taken),
1161  //       false.B)
1162  //     }
1163  //   }
1164
1165  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1166  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1167  //       case (((valid, pd), ans), taken) =>
1168  //       Mux(valid && pd.isBr,
1169  //         isWrong ^ Mux(ans.hit.asBool,
1170  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1171  //           !taken),
1172  //         !taken),
1173  //       false.B)
1174  //     }
1175  //   }
1176
1177  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1178  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1179  //       case (((valid, pd), ans), taken) =>
1180  //       Mux(valid && pd.isBr,
1181  //         isWrong ^ (ans.taken.asBool === taken),
1182  //       false.B)
1183  //     }
1184  //   }
1185
1186  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1187  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1188  //       case (((valid, pd), ans), taken) =>
1189  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1190  //         isWrong ^ (!taken),
1191  //           false.B)
1192  //     }
1193  //   }
1194
1195  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1196  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1197  //       case (((valid, pd), ans), taken) =>
1198  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1199  //         isWrong ^ (ans.target === commitEntry.target),
1200  //           false.B)
1201  //     }
1202  //   }
1203
1204  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1205  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1206  //   // btb and ubtb pred jal and jalr as well
1207  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1208  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1209  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1210  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1211
1212  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1213  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1214
1215  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1216  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1217  val perfinfo = IO(new Bundle(){
1218    val perfEvents = Output(new PerfEventsBundle(22))
1219  })
1220  val perfEvents = Seq(
1221    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1222    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1223    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1224    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1225    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1226    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1227    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1228    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1229    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1230    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1231    ("BpRight                ", PopCount(mbpRights)                                                         ),
1232    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1233    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1234    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1235    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1236    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1237    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1238    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1239    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1240    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1241    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1242    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1243    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1244    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1245  )
1246
1247  for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
1248    perf_out.incr_step := RegNext(perf)
1249  }
1250}
1251