1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.frontend 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils.{AsyncDataModuleTemplate, CircularQueuePtr, DataModuleTemplate, HasCircularQueuePtrHelper, SRAMTemplate, SyncDataModuleTemplate, XSDebug, XSPerfAccumulate, PerfBundle, PerfEventsBundle, XSError} 23import xiangshan._ 24import scala.tools.nsc.doc.model.Val 25import utils.{ParallelPriorityMux, ParallelPriorityEncoder} 26import xiangshan.backend.{CtrlToFtqIO} 27import firrtl.annotations.MemoryLoadFileType 28 29class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr]( 30 p => p(XSCoreParamsKey).FtqSize 31){ 32 override def cloneType = (new FtqPtr).asInstanceOf[this.type] 33} 34 35object FtqPtr { 36 def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = { 37 val ptr = Wire(new FtqPtr) 38 ptr.flag := f 39 ptr.value := v 40 ptr 41 } 42 def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = { 43 apply(!ptr.flag, ptr.value) 44 } 45} 46 47class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule { 48 49 val io = IO(new Bundle() { 50 val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W))) 51 val ren = Input(Vec(numRead, Bool())) 52 val rdata = Output(Vec(numRead, gen)) 53 val waddr = Input(UInt(log2Up(FtqSize).W)) 54 val wen = Input(Bool()) 55 val wdata = Input(gen) 56 }) 57 58 for(i <- 0 until numRead){ 59 val sram = Module(new SRAMTemplate(gen, FtqSize)) 60 sram.io.r.req.valid := io.ren(i) 61 sram.io.r.req.bits.setIdx := io.raddr(i) 62 io.rdata(i) := sram.io.r.resp.data(0) 63 sram.io.w.req.valid := io.wen 64 sram.io.w.req.bits.setIdx := io.waddr 65 sram.io.w.req.bits.data := VecInit(io.wdata) 66 } 67 68} 69 70class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils { 71 // TODO: move pftAddr, oversize, carry to another mem 72 val startAddr = UInt(VAddrBits.W) 73 val nextRangeAddr = UInt(VAddrBits.W) 74 val pftAddr = UInt((log2Ceil(PredictWidth)+1).W) 75 val isNextMask = Vec(PredictWidth, Bool()) 76 val oversize = Bool() 77 val carry = Bool() 78 def getPc(offset: UInt) = { 79 def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits) 80 def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits-1, instOffsetBits) 81 Cat(getHigher(Mux(isNextMask(offset), nextRangeAddr, startAddr)), 82 getOffset(startAddr)+offset, 0.U(instOffsetBits.W)) 83 } 84 def getFallThrough() = { 85 getFallThroughAddr(this.startAddr, this.carry, this.pftAddr) 86 } 87 def fallThroughError() = { 88 !carry && startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits) > pftAddr 89 } 90 def fromBranchPrediction(resp: BranchPredictionBundle) = { 91 this.startAddr := resp.pc 92 this.nextRangeAddr := resp.pc + (FetchWidth * 4).U 93 this.pftAddr := resp.ftb_entry.pftAddr 94 this.isNextMask := VecInit((0 until PredictWidth).map(i => 95 (resp.pc(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool() 96 )) 97 this.oversize := resp.ftb_entry.oversize 98 this.carry := resp.ftb_entry.carry 99 this 100 } 101 override def toPrintable: Printable = { 102 p"startAddr:${Hexadecimal(startAddr)}, fallThru:${Hexadecimal(getFallThrough())}" 103 } 104} 105 106class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle { 107 val brMask = Vec(PredictWidth, Bool()) 108 val jmpInfo = ValidUndirectioned(Vec(3, Bool())) 109 val jmpOffset = UInt(log2Ceil(PredictWidth).W) 110 val jalTarget = UInt(VAddrBits.W) 111 val rvcMask = Vec(PredictWidth, Bool()) 112 def hasJal = jmpInfo.valid && !jmpInfo.bits(0) 113 def hasJalr = jmpInfo.valid && jmpInfo.bits(0) 114 def hasCall = jmpInfo.valid && jmpInfo.bits(1) 115 def hasRet = jmpInfo.valid && jmpInfo.bits(2) 116 117 def fromPdWb(pdWb: PredecodeWritebackBundle) = { 118 val pds = pdWb.pd 119 this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid)) 120 this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR 121 this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid), 122 pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet))) 123 this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)) 124 this.rvcMask := VecInit(pds.map(pd => pd.isRVC)) 125 this.jalTarget := pdWb.jalTarget 126 } 127 128 def toPd(offset: UInt) = { 129 require(offset.getWidth == log2Ceil(PredictWidth)) 130 val pd = Wire(new PreDecodeInfo) 131 pd.valid := true.B 132 pd.isRVC := rvcMask(offset) 133 val isBr = brMask(offset) 134 val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0) 135 pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr) 136 pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1) 137 pd.isRet := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2) 138 pd 139 } 140} 141 142 143 144class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst { 145 val rasSp = UInt(log2Ceil(RasSize).W) 146 val rasEntry = new RASEntry 147 val specCnt = Vec(numBr, UInt(10.W)) 148 // val ghist = new ShiftingGlobalHistory 149 val folded_hist = new AllFoldedHistories(foldedGHistInfos) 150 val histPtr = new CGHPtr 151 val phist = UInt(PathHistoryLength.W) 152 val phNewBit = UInt(1.W) 153 154 def fromBranchPrediction(resp: BranchPredictionBundle) = { 155 this.rasSp := resp.rasSp 156 this.rasEntry := resp.rasTop 157 this.specCnt := resp.specCnt 158 // this.ghist := resp.ghist 159 this.folded_hist := resp.folded_hist 160 this.histPtr := resp.histPtr 161 this.phist := resp.phist 162 this.phNewBit := resp.pc(instOffsetBits) 163 this 164 } 165} 166 167class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst { 168 val meta = UInt(MaxMetaLength.W) 169} 170 171class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle { 172 val target = UInt(VAddrBits.W) 173 val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W)) 174} 175 176// class FtqEntry(implicit p: Parameters) extends XSBundle with HasBPUConst { 177// val startAddr = UInt(VAddrBits.W) 178// val fallThruAddr = UInt(VAddrBits.W) 179// val isNextMask = Vec(PredictWidth, Bool()) 180 181// val meta = UInt(MaxMetaLength.W) 182 183// val rasSp = UInt(log2Ceil(RasSize).W) 184// val rasEntry = new RASEntry 185// val hist = new ShiftingGlobalHistory 186// val specCnt = Vec(numBr, UInt(10.W)) 187 188// val valids = Vec(PredictWidth, Bool()) 189// val brMask = Vec(PredictWidth, Bool()) 190// // isJalr, isCall, isRet 191// val jmpInfo = ValidUndirectioned(Vec(3, Bool())) 192// val jmpOffset = UInt(log2Ceil(PredictWidth).W) 193 194// val mispredVec = Vec(PredictWidth, Bool()) 195// val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W)) 196// val target = UInt(VAddrBits.W) 197// } 198 199class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle { 200 val ptr = Output(new FtqPtr) 201 val offset = Output(UInt(log2Ceil(PredictWidth).W)) 202 val data = Input(gen) 203 def apply(ptr: FtqPtr, offset: UInt) = { 204 this.ptr := ptr 205 this.offset := offset 206 this.data 207 } 208 override def cloneType = (new FtqRead(gen)).asInstanceOf[this.type] 209} 210 211 212class FtqToBpuIO(implicit p: Parameters) extends XSBundle { 213 val redirect = Valid(new BranchPredictionRedirect) 214 val update = Valid(new BranchPredictionUpdate) 215 val enq_ptr = Output(new FtqPtr) 216} 217 218class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper { 219 val req = Decoupled(new FetchRequestBundle) 220 val redirect = Valid(new Redirect) 221 val flushFromBpu = new Bundle { 222 // when ifu pipeline is not stalled, 223 // a packet from bpu s3 can reach f1 at most 224 val s2 = Valid(new FtqPtr) 225 val s3 = Valid(new FtqPtr) 226 def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = { 227 src.valid && !isAfter(src.bits, idx_to_flush) 228 } 229 def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx) 230 def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx) 231 } 232} 233 234trait HasBackendRedirectInfo extends HasXSParameter { 235 def numRedirect = exuParameters.JmpCnt + exuParameters.AluCnt + 1 236 def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself() 237} 238 239class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo { 240 val pc_reads = Vec(1 + numRedirect + 1 + 1, Flipped(new FtqRead(UInt(VAddrBits.W)))) 241 val target_read = Flipped(new FtqRead(UInt(VAddrBits.W))) 242 def getJumpPcRead = pc_reads.head 243 def getRedirectPcRead = VecInit(pc_reads.tail.dropRight(2)) 244 def getMemPredPcRead = pc_reads.init.last 245 def getRobFlushPcRead = pc_reads.last 246} 247 248 249class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter { 250 val io = IO(new Bundle { 251 val start_addr = Input(UInt(VAddrBits.W)) 252 val old_entry = Input(new FTBEntry) 253 val pd = Input(new Ftq_pd_Entry) 254 val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W))) 255 val target = Input(UInt(VAddrBits.W)) 256 val hit = Input(Bool()) 257 val mispredict_vec = Input(Vec(PredictWidth, Bool())) 258 259 val new_entry = Output(new FTBEntry) 260 val new_br_insert_pos = Output(Vec(numBr, Bool())) 261 val taken_mask = Output(Vec(numBr, Bool())) 262 val mispred_mask = Output(Vec(numBr+1, Bool())) 263 264 // for perf counters 265 val is_init_entry = Output(Bool()) 266 val is_old_entry = Output(Bool()) 267 val is_new_br = Output(Bool()) 268 val is_jalr_target_modified = Output(Bool()) 269 val is_always_taken_modified = Output(Bool()) 270 val is_br_full = Output(Bool()) 271 }) 272 273 // no mispredictions detected at predecode 274 val hit = io.hit 275 val pd = io.pd 276 277 val init_entry = WireInit(0.U.asTypeOf(new FTBEntry)) 278 279 280 val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid 281 val entry_has_jmp = pd.jmpInfo.valid 282 val new_jmp_is_jal = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid 283 val new_jmp_is_jalr = entry_has_jmp && pd.jmpInfo.bits(0) && io.cfiIndex.valid 284 val new_jmp_is_call = entry_has_jmp && pd.jmpInfo.bits(1) && io.cfiIndex.valid 285 val new_jmp_is_ret = entry_has_jmp && pd.jmpInfo.bits(2) && io.cfiIndex.valid 286 val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last 287 val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last 288 289 val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal 290 val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr 291 292 def carryPos = log2Ceil(PredictWidth)+instOffsetBits+1 293 def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits) 294 // if not hit, establish a new entry 295 init_entry.valid := true.B 296 // tag is left for ftb to assign 297 298 // case br 299 val init_br_slot = init_entry.getSlotForBr(0) 300 when (cfi_is_br) { 301 init_br_slot.valid := true.B 302 init_br_slot.offset := io.cfiIndex.bits 303 init_br_slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && numBr == 1) 304 init_entry.always_taken(0) := true.B // set to always taken on init 305 } 306 // init_entry.isBrSharing := shareTailSlot.B && (numBr == 1).B && cfi_is_br 307 308 // case jmp 309 when (entry_has_jmp) { 310 init_entry.tailSlot.offset := pd.jmpOffset 311 init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr 312 init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false) 313 } 314 315 val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U) 316 init_entry.pftAddr := Mux(entry_has_jmp, jmpPft, getLower(io.start_addr) + ((FetchWidth*4)>>instOffsetBits).U + Mux(last_br_rvi, 1.U, 0.U)) 317 init_entry.carry := Mux(entry_has_jmp, jmpPft(carryPos-instOffsetBits), io.start_addr(carryPos-1) || (io.start_addr(carryPos-2, instOffsetBits).andR && last_br_rvi)) 318 init_entry.isJalr := new_jmp_is_jalr 319 init_entry.isCall := new_jmp_is_call 320 init_entry.isRet := new_jmp_is_ret 321 init_entry.last_is_rvc := Mux(entry_has_jmp, pd.rvcMask(pd.jmpOffset), pd.rvcMask.last) 322 323 init_entry.oversize := last_br_rvi || last_jmp_rvi 324 325 // if hit, check whether a new cfi(only br is possible) is detected 326 val oe = io.old_entry 327 val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits) 328 val br_recorded = br_recorded_vec.asUInt.orR 329 val is_new_br = cfi_is_br && !br_recorded 330 val new_br_offset = io.cfiIndex.bits 331 // vec(i) means new br will be inserted BEFORE old br(i) 332 val allBrSlotsVec = oe.allSlotsForBr 333 val new_br_insert_onehot = VecInit((0 until numBr).map{ 334 i => i match { 335 case 0 => 336 !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset 337 case idx => 338 allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset && 339 (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset) 340 } 341 }) 342 343 val old_entry_modified = WireInit(io.old_entry) 344 for (i <- 0 until numBr) { 345 val slot = old_entry_modified.allSlotsForBr(i) 346 when (new_br_insert_onehot(i)) { 347 slot.valid := true.B 348 slot.offset := new_br_offset 349 slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && i == numBr-1) 350 old_entry_modified.always_taken(i) := true.B 351 }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) { 352 old_entry_modified.always_taken(i) := false.B 353 // all other fields remain unchanged 354 }.otherwise { 355 // case i == 0, remain unchanged 356 if (i != 0) { 357 val noNeedToMoveFromFormerSlot = (shareTailSlot && i == numBr-1).B && !oe.brSlots.last.valid 358 when (!noNeedToMoveFromFormerSlot) { 359 slot.fromAnotherSlot(oe.allSlotsForBr(i-1)) 360 old_entry_modified.always_taken(i) := oe.always_taken(i) 361 } 362 } 363 } 364 } 365 366 // two circumstances: 367 // 1. oe: | br | j |, new br should be in front of j, thus addr of j should be new pft 368 // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either 369 // the previous last br or the new br 370 val may_have_to_replace = oe.noEmptySlotForNewBr 371 val pft_need_to_change = is_new_br && may_have_to_replace 372 // it should either be the given last br or the new br 373 when (pft_need_to_change) { 374 val new_pft_offset = 375 Mux(!new_br_insert_onehot.asUInt.orR, 376 new_br_offset, oe.allSlotsForBr.last.offset) 377 378 // set jmp to invalid 379 if (!shareTailSlot) { 380 old_entry_modified.tailSlot.valid := false.B 381 } 382 old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset 383 old_entry_modified.last_is_rvc := pd.rvcMask(new_pft_offset - 1.U) // TODO: fix this 384 old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool 385 old_entry_modified.oversize := false.B 386 old_entry_modified.isCall := false.B 387 old_entry_modified.isRet := false.B 388 old_entry_modified.isJalr := false.B 389 } 390 391 val old_entry_jmp_target_modified = WireInit(oe) 392 val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits 393 val old_tail_is_jmp = !oe.tailSlot.sharing || !shareTailSlot.B 394 val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target 395 when (jalr_target_modified) { 396 old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target) 397 old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool())) 398 } 399 400 val old_entry_always_taken = WireInit(oe) 401 val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not 402 for (i <- 0 until numBr) { 403 old_entry_always_taken.always_taken(i) := 404 oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i) 405 always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i) 406 } 407 val always_taken_modified = always_taken_modified_vec.reduce(_||_) 408 409 410 411 val derived_from_old_entry = 412 Mux(is_new_br, old_entry_modified, 413 Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken)) 414 415 416 io.new_entry := Mux(!hit, init_entry, derived_from_old_entry) 417 418 io.new_br_insert_pos := new_br_insert_onehot 419 io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{ 420 case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v 421 }) 422 for (i <- 0 until numBr) { 423 io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i)) 424 } 425 io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset) 426 427 // for perf counters 428 io.is_init_entry := !hit 429 io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified 430 io.is_new_br := hit && is_new_br 431 io.is_jalr_target_modified := hit && jalr_target_modified 432 io.is_always_taken_modified := hit && always_taken_modified 433 io.is_br_full := hit && is_new_br && may_have_to_replace 434} 435 436class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper 437 with HasBackendRedirectInfo with BPUUtils with HasBPUConst { 438 val io = IO(new Bundle { 439 val fromBpu = Flipped(new BpuToFtqIO) 440 val fromIfu = Flipped(new IfuToFtqIO) 441 val fromBackend = Flipped(new CtrlToFtqIO) 442 443 val toBpu = new FtqToBpuIO 444 val toIfu = new FtqToIfuIO 445 val toBackend = new FtqToCtrlIO 446 447 val bpuInfo = new Bundle { 448 val bpRight = Output(UInt(XLEN.W)) 449 val bpWrong = Output(UInt(XLEN.W)) 450 } 451 }) 452 io.bpuInfo := DontCare 453 454 val robFlush = io.fromBackend.robFlush 455 val stage2Redirect = io.fromBackend.stage2Redirect 456 val stage3Redirect = io.fromBackend.stage3Redirect 457 458 val stage2Flush = stage2Redirect.valid || robFlush.valid 459 val backendFlush = stage2Flush || RegNext(stage2Flush) 460 val ifuFlush = Wire(Bool()) 461 462 val flush = stage2Flush || RegNext(stage2Flush) 463 464 val allowBpuIn, allowToIfu = WireInit(false.B) 465 val flushToIfu = !allowToIfu 466 allowBpuIn := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid 467 allowToIfu := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid 468 469 val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U)) 470 val validEntries = distanceBetween(bpuPtr, commPtr) 471 472 // ********************************************************************** 473 // **************************** enq from bpu **************************** 474 // ********************************************************************** 475 val new_entry_ready = validEntries < FtqSize.U 476 io.fromBpu.resp.ready := new_entry_ready 477 478 val bpu_s2_resp = io.fromBpu.resp.bits.s2 479 val bpu_s3_resp = io.fromBpu.resp.bits.s3 480 val bpu_s2_redirect = bpu_s2_resp.valid && bpu_s2_resp.hasRedirect 481 val bpu_s3_redirect = bpu_s3_resp.valid && bpu_s3_resp.hasRedirect 482 483 io.toBpu.enq_ptr := bpuPtr 484 val enq_fire = io.fromBpu.resp.fire() && allowBpuIn // from bpu s1 485 val bpu_in_fire = (io.fromBpu.resp.fire() || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn 486 487 val bpu_in_resp = WireInit(io.fromBpu.resp.bits.selectedResp) 488 val bpu_in_stage = WireInit(io.fromBpu.resp.bits.selectedRespIdx) 489 val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx) 490 val bpu_in_resp_idx = bpu_in_resp_ptr.value 491 492 // read ports: jumpPc + redirects + loadPred + robFlush + ifuReq1 + ifuReq2 + commitUpdate 493 val ftq_pc_mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, 1+numRedirect+2+1+1+1, 1)) 494 // resp from uBTB 495 ftq_pc_mem.io.wen(0) := bpu_in_fire 496 ftq_pc_mem.io.waddr(0) := bpu_in_resp_idx 497 ftq_pc_mem.io.wdata(0).fromBranchPrediction(bpu_in_resp) 498 499 // ifuRedirect + backendRedirect + commit 500 val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1)) 501 // these info is intended to enq at the last stage of bpu 502 ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid 503 ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value 504 ftq_redirect_sram.io.wdata.fromBranchPrediction(io.fromBpu.resp.bits.lastStage) 505 506 val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1)) 507 // these info is intended to enq at the last stage of bpu 508 ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid 509 ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value 510 ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.meta 511 // ifuRedirect + backendRedirect + commit 512 val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1)) 513 ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid 514 ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value 515 ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.lastStage.ftb_entry 516 517 518 // multi-write 519 val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) 520 val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W)))) 521 val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool()))) 522 val pred_stage = Reg(Vec(FtqSize, UInt(2.W))) 523 524 val c_invalid :: c_valid :: c_commited :: Nil = Enum(3) 525 val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) { 526 VecInit(Seq.fill(PredictWidth)(c_invalid)) 527 })) 528 529 val f_to_send :: f_sent :: Nil = Enum(2) 530 val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent))) 531 532 val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3) 533 val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit))) 534 535 536 when (bpu_in_fire) { 537 entry_fetch_status(bpu_in_resp_idx) := f_to_send 538 commitStateQueue(bpu_in_resp_idx) := VecInit(Seq.fill(PredictWidth)(c_invalid)) 539 cfiIndex_vec(bpu_in_resp_idx) := bpu_in_resp.genCfiIndex 540 mispredict_vec(bpu_in_resp_idx) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B))) 541 update_target(bpu_in_resp_idx) := bpu_in_resp.target 542 pred_stage(bpu_in_resp_idx) := bpu_in_stage 543 } 544 545 bpuPtr := bpuPtr + enq_fire 546 ifuPtr := ifuPtr + io.toIfu.req.fire 547 548 // only use ftb result to assign hit status 549 when (bpu_s2_resp.valid) { 550 entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.preds.hit, h_hit, h_not_hit) 551 } 552 553 554 io.toIfu.flushFromBpu.s2.valid := bpu_s2_resp.valid && bpu_s2_resp.hasRedirect 555 io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx 556 when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) { 557 bpuPtr := bpu_s2_resp.ftq_idx + 1.U 558 // only when ifuPtr runs ahead of bpu s2 resp should we recover it 559 when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) { 560 ifuPtr := bpu_s2_resp.ftq_idx 561 } 562 } 563 564 io.toIfu.flushFromBpu.s3.valid := bpu_s3_resp.valid && bpu_s3_resp.hasRedirect 565 io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx 566 when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) { 567 bpuPtr := bpu_s3_resp.ftq_idx + 1.U 568 // only when ifuPtr runs ahead of bpu s2 resp should we recover it 569 when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) { 570 ifuPtr := bpu_s3_resp.ftq_idx 571 } 572 XSError(true.B, "\ns3_redirect mechanism not implemented!\n") 573 } 574 575 XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n") 576 577 // **************************************************************** 578 // **************************** to ifu **************************** 579 // **************************************************************** 580 val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata(0), enable=bpu_in_fire) 581 val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr) 582 val last_cycle_bpu_in = RegNext(bpu_in_fire) 583 val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire) 584 585 // read pc and target 586 ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value 587 ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value 588 589 val toIfuReq = Wire(chiselTypeOf(io.toIfu.req)) 590 591 toIfuReq.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr 592 toIfuReq.bits.ftqIdx := ifuPtr 593 toIfuReq.bits.target := update_target(ifuPtr.value) 594 toIfuReq.bits.ftqOffset := cfiIndex_vec(ifuPtr.value) 595 toIfuReq.bits.fallThruError := false.B 596 597 when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) { 598 toIfuReq.bits.fromFtqPcBundle(bpu_in_bypass_buf) 599 }.elsewhen (last_cycle_to_ifu_fire) { 600 toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last) 601 }.otherwise { 602 toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last) 603 } 604 605 io.toIfu.req <> toIfuReq 606 607 // when fall through is smaller in value than start address, there must be a false hit 608 when (toIfuReq.bits.fallThroughError() && entry_hit_status(ifuPtr.value) === h_hit) { 609 when (io.toIfu.req.fire && 610 !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && 611 !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr) 612 ) { 613 entry_hit_status(ifuPtr.value) := h_false_hit 614 XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr) 615 } 616 io.toIfu.req.bits.fallThruAddr := toIfuReq.bits.startAddr + (FetchWidth*4).U 617 io.toIfu.req.bits.fallThruError := true.B 618 XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr) 619 } 620 621 val ifu_req_should_be_flushed = 622 io.toIfu.flushFromBpu.shouldFlushByStage2(toIfuReq.bits.ftqIdx) || 623 io.toIfu.flushFromBpu.shouldFlushByStage3(toIfuReq.bits.ftqIdx) 624 625 when (io.toIfu.req.fire && !ifu_req_should_be_flushed) { 626 entry_fetch_status(ifuPtr.value) := f_sent 627 } 628 629 630 // ********************************************************************* 631 // **************************** wb from ifu **************************** 632 // ********************************************************************* 633 val pdWb = io.fromIfu.pdWb 634 val pds = pdWb.bits.pd 635 val ifu_wb_valid = pdWb.valid 636 val ifu_wb_idx = pdWb.bits.ftqIdx.value 637 // read ports: commit update 638 val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1)) 639 ftq_pd_mem.io.wen(0) := ifu_wb_valid 640 ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value 641 ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits) 642 643 val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid 644 val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid 645 val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B) 646 val pd_reg = RegEnable(pds, enable = pdWb.valid) 647 val start_pc_reg = RegEnable(pdWb.bits.pc(0), enable = pdWb.valid) 648 val wb_idx_reg = RegEnable(ifu_wb_idx, enable = pdWb.valid) 649 650 when (ifu_wb_valid) { 651 val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{ 652 case (v, inRange) => v && inRange 653 }) 654 (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{ 655 case (qe, v) => when (v) { qe := c_valid } 656 } 657 } 658 659 ifuWbPtr := ifuWbPtr + ifu_wb_valid 660 661 ftb_entry_mem.io.raddr.head := ifu_wb_idx 662 val has_false_hit = WireInit(false.B) 663 when (RegNext(hit_pd_valid)) { 664 // check for false hit 665 val pred_ftb_entry = ftb_entry_mem.io.rdata.head 666 val brSlots = pred_ftb_entry.brSlots 667 val tailSlot = pred_ftb_entry.tailSlot 668 // we check cfis that bpu predicted 669 670 // bpu predicted branches but denied by predecode 671 val br_false_hit = 672 brSlots.map{ 673 s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr) 674 }.reduce(_||_) || 675 (shareTailSlot.B && tailSlot.valid && pred_ftb_entry.tailSlot.sharing && 676 !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr)) 677 678 val jmpOffset = tailSlot.offset 679 val jmp_pd = pd_reg(jmpOffset) 680 val jal_false_hit = pred_ftb_entry.jmpValid && 681 ((pred_ftb_entry.isJal && !(jmp_pd.valid && jmp_pd.isJal)) || 682 (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) || 683 (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) || 684 (pred_ftb_entry.isRet && !(jmp_pd.valid && jmp_pd.isRet)) 685 ) 686 687 has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg 688 XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0)) 689 690 // assert(!has_false_hit) 691 } 692 693 when (has_false_hit) { 694 entry_hit_status(wb_idx_reg) := h_false_hit 695 } 696 697 698 // ********************************************************************** 699 // **************************** backend read **************************** 700 // ********************************************************************** 701 702 // pc reads 703 for ((req, i) <- io.toBackend.pc_reads.zipWithIndex) { 704 ftq_pc_mem.io.raddr(i) := req.ptr.value 705 req.data := ftq_pc_mem.io.rdata(i).getPc(RegNext(req.offset)) 706 } 707 // target read 708 io.toBackend.target_read.data := RegNext(update_target(io.toBackend.target_read.ptr.value)) 709 710 // ******************************************************************************* 711 // **************************** redirect from backend **************************** 712 // ******************************************************************************* 713 714 // redirect read cfiInfo, couples to redirectGen s2 715 ftq_redirect_sram.io.ren.init.last := io.fromBackend.stage2Redirect.valid 716 ftq_redirect_sram.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value 717 718 ftb_entry_mem.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value 719 720 val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last 721 val fromBackendRedirect = WireInit(io.fromBackend.stage3Redirect) 722 val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate 723 backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo) 724 725 val r_ftb_entry = ftb_entry_mem.io.rdata.init.last 726 val r_ftqOffset = fromBackendRedirect.bits.ftqOffset 727 728 when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) { 729 backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +& 730 (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) && 731 !r_ftb_entry.newBrCanNotInsert(r_ftqOffset)) 732 733 backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) || 734 !r_ftb_entry.newBrCanNotInsert(r_ftqOffset)) 735 }.otherwise { 736 backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt 737 backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt 738 } 739 740 741 // *************************************************************************** 742 // **************************** redirect from ifu **************************** 743 // *************************************************************************** 744 val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new Redirect))) 745 fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush 746 fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx 747 fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits 748 fromIfuRedirect.bits.level := RedirectLevel.flushAfter 749 750 val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate 751 ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits) 752 ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits) 753 ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid 754 ifuRedirectCfiUpdate.target := pdWb.bits.target 755 ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid 756 ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid 757 758 val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new Redirect))) 759 val ifuRedirectToBpu = WireInit(ifuRedirectReg) 760 ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid 761 762 ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid 763 ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value 764 765 ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value 766 767 val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate 768 toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head) 769 when (ifuRedirectReg.bits.cfiUpdate.pd.isRet) { 770 toBpuCfi.target := toBpuCfi.rasEntry.retAddr 771 } 772 773 // ********************************************************************* 774 // **************************** wb from exu **************************** 775 // ********************************************************************* 776 777 def extractRedirectInfo(wb: Valid[Redirect]) = { 778 val ftqIdx = wb.bits.ftqIdx.value 779 val ftqOffset = wb.bits.ftqOffset 780 val taken = wb.bits.cfiUpdate.taken 781 val mispred = wb.bits.cfiUpdate.isMisPred 782 (wb.valid, ftqIdx, ftqOffset, taken, mispred) 783 } 784 785 // fix mispredict entry 786 val lastIsMispredict = RegNext( 787 stage2Redirect.valid && stage2Redirect.bits.level === RedirectLevel.flushAfter, init = false.B 788 ) 789 790 def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = { 791 val (r_valid, r_idx, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect) 792 val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits 793 val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits 794 when (cfiIndex_bits_wen || cfiIndex_valid_wen) { 795 cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken 796 } 797 when (cfiIndex_bits_wen) { 798 cfiIndex_vec(r_idx).bits := r_offset 799 } 800 update_target(r_idx) := redirect.bits.cfiUpdate.target 801 if (isBackend) { 802 mispredict_vec(r_idx)(r_offset) := r_mispred 803 } 804 } 805 806 when(stage3Redirect.valid && lastIsMispredict) { 807 updateCfiInfo(stage3Redirect) 808 }.elsewhen (ifuRedirectToBpu.valid) { 809 updateCfiInfo(ifuRedirectToBpu, isBackend=false) 810 } 811 812 // *********************************************************************************** 813 // **************************** flush ptr and state queue **************************** 814 // *********************************************************************************** 815 816 class RedirectInfo extends Bundle { 817 val valid = Bool() 818 val ftqIdx = new FtqPtr 819 val ftqOffset = UInt(log2Ceil(PredictWidth).W) 820 val flushItSelf = Bool() 821 def apply(redirect: Valid[Redirect]) = { 822 this.valid := redirect.valid 823 this.ftqIdx := redirect.bits.ftqIdx 824 this.ftqOffset := redirect.bits.ftqOffset 825 this.flushItSelf := RedirectLevel.flushItself(redirect.bits.level) 826 this 827 } 828 } 829 val redirectVec = Wire(Vec(3, new RedirectInfo)) 830 val robRedirect = robFlush 831 832 redirectVec.zip(Seq(robRedirect, stage2Redirect, fromIfuRedirect)).map { 833 case (ve, r) => ve(r) 834 } 835 836 // when redirect, we should reset ptrs and status queues 837 when(redirectVec.map(r => r.valid).reduce(_||_)){ 838 val r = PriorityMux(redirectVec.map(r => (r.valid -> r))) 839 val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_) 840 val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, r.flushItSelf) 841 val next = idx + 1.U 842 bpuPtr := next 843 ifuPtr := next 844 ifuWbPtr := next 845 when (notIfu) { 846 commitStateQueue(idx.value).zipWithIndex.foreach({ case (s, i) => 847 when(i.U > offset || i.U === offset && flushItSelf){ 848 s := c_invalid 849 } 850 }) 851 } 852 } 853 854 // only the valid bit is actually needed 855 io.toIfu.redirect.bits := Mux(robFlush.valid, robFlush.bits, stage2Redirect.bits) 856 io.toIfu.redirect.valid := stage2Flush 857 858 // commit 859 for (c <- io.fromBackend.rob_commits) { 860 when(c.valid) { 861 commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited 862 // TODO: remove this 863 // For instruction fusions, we also update the next instruction 864 when (c.bits.commitType === 4.U) { 865 commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited 866 }.elsewhen(c.bits.commitType === 5.U) { 867 commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited 868 }.elsewhen(c.bits.commitType === 6.U) { 869 val index = (c.bits.ftqIdx + 1.U).value 870 commitStateQueue(index)(0) := c_commited 871 }.elsewhen(c.bits.commitType === 7.U) { 872 val index = (c.bits.ftqIdx + 1.U).value 873 commitStateQueue(index)(1) := c_commited 874 } 875 } 876 } 877 878 // **************************************************************** 879 // **************************** to bpu **************************** 880 // **************************************************************** 881 882 io.toBpu.redirect <> Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu) 883 884 val may_have_stall_from_bpu = RegInit(false.B) 885 val canCommit = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu && 886 Cat(commitStateQueue(commPtr.value).map(s => { 887 s === c_invalid || s === c_commited 888 })).andR() 889 890 // commit reads 891 ftq_pc_mem.io.raddr.last := commPtr.value 892 val commit_pc_bundle = ftq_pc_mem.io.rdata.last 893 ftq_pd_mem.io.raddr.last := commPtr.value 894 val commit_pd = ftq_pd_mem.io.rdata.last 895 ftq_redirect_sram.io.ren.last := canCommit 896 ftq_redirect_sram.io.raddr.last := commPtr.value 897 val commit_spec_meta = ftq_redirect_sram.io.rdata.last 898 ftq_meta_1r_sram.io.ren(0) := canCommit 899 ftq_meta_1r_sram.io.raddr(0) := commPtr.value 900 val commit_meta = ftq_meta_1r_sram.io.rdata(0) 901 ftb_entry_mem.io.raddr.last := commPtr.value 902 val commit_ftb_entry = ftb_entry_mem.io.rdata.last 903 904 // need one cycle to read mem and srams 905 val do_commit_ptr = RegNext(commPtr) 906 val do_commit = RegNext(canCommit, init=false.B) 907 when (canCommit) { commPtr := commPtr + 1.U } 908 val commit_state = RegNext(commitStateQueue(commPtr.value)) 909 val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value)) 910 when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) { 911 can_commit_cfi.valid := false.B 912 } 913 val commit_cfi = RegNext(can_commit_cfi) 914 915 val commit_mispredict = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map { 916 case (mis, state) => mis && state === c_commited 917 }) 918 val can_commit_hit = entry_hit_status(commPtr.value) 919 val commit_hit = RegNext(can_commit_hit) 920 val commit_target = RegNext(update_target(commPtr.value)) 921 val commit_valid = commit_hit === h_hit || commit_cfi.valid // hit or taken 922 923 val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit 924 may_have_stall_from_bpu := can_commit_cfi.valid && !to_bpu_hit && !may_have_stall_from_bpu 925 926 io.toBpu.update := DontCare 927 io.toBpu.update.valid := commit_valid && do_commit 928 val update = io.toBpu.update.bits 929 update.false_hit := commit_hit === h_false_hit 930 update.pc := commit_pc_bundle.startAddr 931 update.preds.hit := commit_hit === h_hit || commit_hit === h_false_hit 932 update.meta := commit_meta.meta 933 update.full_target := commit_target 934 update.fromFtqRedirectSram(commit_spec_meta) 935 936 val commit_real_hit = commit_hit === h_hit 937 val update_ftb_entry = update.ftb_entry 938 939 val ftbEntryGen = Module(new FTBEntryGen).io 940 ftbEntryGen.start_addr := commit_pc_bundle.startAddr 941 ftbEntryGen.old_entry := commit_ftb_entry 942 ftbEntryGen.pd := commit_pd 943 ftbEntryGen.cfiIndex := commit_cfi 944 ftbEntryGen.target := commit_target 945 ftbEntryGen.hit := commit_real_hit 946 ftbEntryGen.mispredict_vec := commit_mispredict 947 948 update_ftb_entry := ftbEntryGen.new_entry 949 update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos 950 update.mispred_mask := ftbEntryGen.mispred_mask 951 update.old_entry := ftbEntryGen.is_old_entry 952 update.preds.br_taken_mask := ftbEntryGen.taken_mask 953 954 // ****************************************************************************** 955 // **************************** commit perf counters **************************** 956 // ****************************************************************************** 957 958 val commit_inst_mask = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt 959 val commit_mispred_mask = commit_mispredict.asUInt 960 val commit_not_mispred_mask = ~commit_mispred_mask 961 962 val commit_br_mask = commit_pd.brMask.asUInt 963 val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W))) 964 val commit_cfi_mask = (commit_br_mask | commit_jmp_mask) 965 966 val mbpInstrs = commit_inst_mask & commit_cfi_mask 967 968 val mbpRights = mbpInstrs & commit_not_mispred_mask 969 val mbpWrongs = mbpInstrs & commit_mispred_mask 970 971 io.bpuInfo.bpRight := PopCount(mbpRights) 972 io.bpuInfo.bpWrong := PopCount(mbpWrongs) 973 974 // Cfi Info 975 for (i <- 0 until PredictWidth) { 976 val pc = commit_pc_bundle.startAddr + (i * instBytes).U 977 val v = commit_state(i) === c_commited 978 val isBr = commit_pd.brMask(i) 979 val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U 980 val isCfi = isBr || isJmp 981 val isTaken = commit_cfi.valid && commit_cfi.bits === i.U 982 val misPred = commit_mispredict(i) 983 // val ghist = commit_spec_meta.ghist.predHist 984 val histPtr = commit_spec_meta.histPtr 985 val predCycle = commit_meta.meta(63, 0) 986 val target = commit_target 987 988 val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}))) 989 val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_) 990 val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid)) 991 XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " + 992 p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " + 993 p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " + 994 p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n") 995 } 996 997 val enq = io.fromBpu.resp 998 val perf_redirect = io.fromBackend.stage2Redirect 999 1000 XSPerfAccumulate("entry", validEntries) 1001 XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready) 1002 XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level) 1003 XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)) 1004 XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid) 1005 1006 XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid) 1007 1008 XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready) 1009 XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn) 1010 XSPerfAccumulate("bpu_to_ftq_bubble", bpuPtr === ifuPtr) 1011 1012 val from_bpu = io.fromBpu.resp.bits 1013 def in_entry_len_map_gen(resp: BranchPredictionBundle)(stage: String) = { 1014 val entry_len = (resp.ftb_entry.getFallThrough(resp.pc) - resp.pc) >> instOffsetBits 1015 val entry_len_recording_vec = (1 to PredictWidth+1).map(i => entry_len === i.U) 1016 val entry_len_map = (1 to PredictWidth+1).map(i => 1017 f"${stage}_ftb_entry_len_$i" -> (entry_len_recording_vec(i-1) && resp.valid) 1018 ).foldLeft(Map[String, UInt]())(_+_) 1019 entry_len_map 1020 } 1021 val s1_entry_len_map = in_entry_len_map_gen(from_bpu.s1)("s1") 1022 val s2_entry_len_map = in_entry_len_map_gen(from_bpu.s2)("s2") 1023 val s3_entry_len_map = in_entry_len_map_gen(from_bpu.s3)("s3") 1024 1025 val to_ifu = io.toIfu.req.bits 1026 val to_ifu_entry_len = (to_ifu.fallThruAddr - to_ifu.startAddr) >> instOffsetBits 1027 val to_ifu_entry_len_recording_vec = (1 to PredictWidth+1).map(i => to_ifu_entry_len === i.U) 1028 val to_ifu_entry_len_map = (1 to PredictWidth+1).map(i => 1029 f"to_ifu_ftb_entry_len_$i" -> (to_ifu_entry_len_recording_vec(i-1) && io.toIfu.req.fire) 1030 ).foldLeft(Map[String, UInt]())(_+_) 1031 1032 1033 1034 val commit_num_inst_recording_vec = (1 to PredictWidth).map(i => PopCount(commit_inst_mask) === i.U) 1035 val commit_num_inst_map = (1 to PredictWidth).map(i => 1036 f"commit_num_inst_$i" -> (commit_num_inst_recording_vec(i-1) && do_commit) 1037 ).foldLeft(Map[String, UInt]())(_+_) 1038 1039 1040 1041 val commit_jal_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W))) 1042 val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W))) 1043 val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W))) 1044 val commit_ret_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W))) 1045 1046 1047 val mbpBRights = mbpRights & commit_br_mask 1048 val mbpJRights = mbpRights & commit_jal_mask 1049 val mbpIRights = mbpRights & commit_jalr_mask 1050 val mbpCRights = mbpRights & commit_call_mask 1051 val mbpRRights = mbpRights & commit_ret_mask 1052 1053 val mbpBWrongs = mbpWrongs & commit_br_mask 1054 val mbpJWrongs = mbpWrongs & commit_jal_mask 1055 val mbpIWrongs = mbpWrongs & commit_jalr_mask 1056 val mbpCWrongs = mbpWrongs & commit_call_mask 1057 val mbpRWrongs = mbpWrongs & commit_ret_mask 1058 1059 val commit_pred_stage = RegNext(pred_stage(commPtr.value)) 1060 1061 def pred_stage_map(src: UInt, name: String) = { 1062 (0 until numBpStages).map(i => 1063 f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i))) 1064 ).foldLeft(Map[String, UInt]())(_+_) 1065 } 1066 1067 val mispred_stage_map = pred_stage_map(mbpWrongs, "mispredict") 1068 val br_mispred_stage_map = pred_stage_map(mbpBWrongs, "br_mispredict") 1069 val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict") 1070 val correct_stage_map = pred_stage_map(mbpRights, "correct") 1071 val br_correct_stage_map = pred_stage_map(mbpBRights, "br_correct") 1072 val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct") 1073 1074 val update_valid = io.toBpu.update.valid 1075 def u(cond: Bool) = update_valid && cond 1076 val ftb_false_hit = u(update.false_hit) 1077 // assert(!ftb_false_hit) 1078 val ftb_hit = u(commit_hit === h_hit) 1079 1080 val ftb_new_entry = u(ftbEntryGen.is_init_entry) 1081 val ftb_new_entry_only_br = ftb_new_entry && !update.ftb_entry.jmpValid 1082 val ftb_new_entry_only_jmp = ftb_new_entry && !update.ftb_entry.brValids(0) 1083 val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update.ftb_entry.brValids(0) && update.ftb_entry.jmpValid 1084 1085 val ftb_old_entry = u(ftbEntryGen.is_old_entry) 1086 1087 val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified) 1088 val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br) 1089 val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified) 1090 val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full 1091 val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified 1092 1093 val ftb_entry_len = (ftbEntryGen.new_entry.getFallThrough(update.pc) - update.pc) >> instOffsetBits 1094 val ftb_entry_len_recording_vec = (1 to PredictWidth+1).map(i => ftb_entry_len === i.U) 1095 val ftb_init_entry_len_map = (1 to PredictWidth+1).map(i => 1096 f"ftb_init_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_new_entry) 1097 ).foldLeft(Map[String, UInt]())(_+_) 1098 val ftb_modified_entry_len_map = (1 to PredictWidth+1).map(i => 1099 f"ftb_modified_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_modified_entry) 1100 ).foldLeft(Map[String, UInt]())(_+_) 1101 1102 val ftq_occupancy_map = (0 to FtqSize).map(i => 1103 f"ftq_has_entry_$i" ->( validEntries === i.U) 1104 ).foldLeft(Map[String, UInt]())(_+_) 1105 1106 val perfCountsMap = Map( 1107 "BpInstr" -> PopCount(mbpInstrs), 1108 "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs), 1109 "BpRight" -> PopCount(mbpRights), 1110 "BpWrong" -> PopCount(mbpWrongs), 1111 "BpBRight" -> PopCount(mbpBRights), 1112 "BpBWrong" -> PopCount(mbpBWrongs), 1113 "BpJRight" -> PopCount(mbpJRights), 1114 "BpJWrong" -> PopCount(mbpJWrongs), 1115 "BpIRight" -> PopCount(mbpIRights), 1116 "BpIWrong" -> PopCount(mbpIWrongs), 1117 "BpCRight" -> PopCount(mbpCRights), 1118 "BpCWrong" -> PopCount(mbpCWrongs), 1119 "BpRRight" -> PopCount(mbpRRights), 1120 "BpRWrong" -> PopCount(mbpRWrongs), 1121 1122 "ftb_false_hit" -> PopCount(ftb_false_hit), 1123 "ftb_hit" -> PopCount(ftb_hit), 1124 "ftb_new_entry" -> PopCount(ftb_new_entry), 1125 "ftb_new_entry_only_br" -> PopCount(ftb_new_entry_only_br), 1126 "ftb_new_entry_only_jmp" -> PopCount(ftb_new_entry_only_jmp), 1127 "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp), 1128 "ftb_old_entry" -> PopCount(ftb_old_entry), 1129 "ftb_modified_entry" -> PopCount(ftb_modified_entry), 1130 "ftb_modified_entry_new_br" -> PopCount(ftb_modified_entry_new_br), 1131 "ftb_jalr_target_modified" -> PopCount(ftb_modified_entry_jalr_target_modified), 1132 "ftb_modified_entry_br_full" -> PopCount(ftb_modified_entry_br_full), 1133 "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken) 1134 ) ++ ftb_init_entry_len_map ++ ftb_modified_entry_len_map ++ s1_entry_len_map ++ 1135 s2_entry_len_map ++ s3_entry_len_map ++ 1136 to_ifu_entry_len_map ++ commit_num_inst_map ++ ftq_occupancy_map ++ 1137 mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++ 1138 correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map 1139 1140 for((key, value) <- perfCountsMap) { 1141 XSPerfAccumulate(key, value) 1142 } 1143 1144 // --------------------------- Debug -------------------------------- 1145 // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable) 1146 XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable) 1147 XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n") 1148 XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n") 1149 XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " + 1150 p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n") 1151 XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n") 1152 1153 // def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1154 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1155 // case (((valid, pd), ans), taken) => 1156 // Mux(valid && pd.isBr, 1157 // isWrong ^ Mux(ans.hit.asBool, 1158 // Mux(ans.taken.asBool, taken && ans.target === commitEntry.target, 1159 // !taken), 1160 // !taken), 1161 // false.B) 1162 // } 1163 // } 1164 1165 // def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1166 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1167 // case (((valid, pd), ans), taken) => 1168 // Mux(valid && pd.isBr, 1169 // isWrong ^ Mux(ans.hit.asBool, 1170 // Mux(ans.taken.asBool, taken && ans.target === commitEntry.target, 1171 // !taken), 1172 // !taken), 1173 // false.B) 1174 // } 1175 // } 1176 1177 // def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1178 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1179 // case (((valid, pd), ans), taken) => 1180 // Mux(valid && pd.isBr, 1181 // isWrong ^ (ans.taken.asBool === taken), 1182 // false.B) 1183 // } 1184 // } 1185 1186 // def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1187 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1188 // case (((valid, pd), ans), taken) => 1189 // Mux(valid && (pd.isBr) && ans.hit.asBool, 1190 // isWrong ^ (!taken), 1191 // false.B) 1192 // } 1193 // } 1194 1195 // def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1196 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1197 // case (((valid, pd), ans), taken) => 1198 // Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool, 1199 // isWrong ^ (ans.target === commitEntry.target), 1200 // false.B) 1201 // } 1202 // } 1203 1204 // val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B) 1205 // val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B) 1206 // // btb and ubtb pred jal and jalr as well 1207 // val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B) 1208 // val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B) 1209 // val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B) 1210 // val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B) 1211 1212 // val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B) 1213 // val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B) 1214 1215 // val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B) 1216 // val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B) 1217 val perfinfo = IO(new Bundle(){ 1218 val perfEvents = Output(new PerfEventsBundle(22)) 1219 }) 1220 val perfEvents = Seq( 1221 ("bpu_s2_redirect ", bpu_s2_redirect ), 1222 ("bpu_s3_redirect ", bpu_s3_redirect ), 1223 ("bpu_to_ftq_stall ", enq.valid && ~enq.ready ), 1224 ("mispredictRedirect ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level), 1225 ("replayRedirect ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level) ), 1226 ("predecodeRedirect ", fromIfuRedirect.valid ), 1227 ("to_ifu_bubble ", io.toIfu.req.ready && !io.toIfu.req.valid ), 1228 ("from_bpu_real_bubble ", !enq.valid && enq.ready && allowBpuIn ), 1229 ("BpInstr ", PopCount(mbpInstrs) ), 1230 ("BpBInstr ", PopCount(mbpBRights | mbpBWrongs) ), 1231 ("BpRight ", PopCount(mbpRights) ), 1232 ("BpWrong ", PopCount(mbpWrongs) ), 1233 ("BpBRight ", PopCount(mbpBRights) ), 1234 ("BpBWrong ", PopCount(mbpBWrongs) ), 1235 ("BpJRight ", PopCount(mbpJRights) ), 1236 ("BpJWrong ", PopCount(mbpJWrongs) ), 1237 ("BpIRight ", PopCount(mbpIRights) ), 1238 ("BpIWrong ", PopCount(mbpIWrongs) ), 1239 ("BpCRight ", PopCount(mbpCRights) ), 1240 ("BpCWrong ", PopCount(mbpCWrongs) ), 1241 ("BpRRight ", PopCount(mbpRRights) ), 1242 ("BpRWrong ", PopCount(mbpRWrongs) ), 1243 ("ftb_false_hit ", PopCount(ftb_false_hit) ), 1244 ("ftb_hit ", PopCount(ftb_hit) ), 1245 ) 1246 1247 for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) { 1248 perf_out.incr_step := RegNext(perf) 1249 } 1250} 1251