1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.frontend 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils.{AsyncDataModuleTemplate, CircularQueuePtr, DataModuleTemplate, HasCircularQueuePtrHelper, SRAMTemplate, SyncDataModuleTemplate, XSDebug, XSPerfAccumulate, PerfBundle, PerfEventsBundle, XSError} 23import xiangshan._ 24import scala.tools.nsc.doc.model.Val 25import utils.{ParallelPriorityMux, ParallelPriorityEncoder} 26import xiangshan.backend.{CtrlToFtqIO} 27import firrtl.annotations.MemoryLoadFileType 28 29class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr]( 30 p => p(XSCoreParamsKey).FtqSize 31){ 32 override def cloneType = (new FtqPtr).asInstanceOf[this.type] 33} 34 35object FtqPtr { 36 def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = { 37 val ptr = Wire(new FtqPtr) 38 ptr.flag := f 39 ptr.value := v 40 ptr 41 } 42 def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = { 43 apply(!ptr.flag, ptr.value) 44 } 45} 46 47class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule { 48 49 val io = IO(new Bundle() { 50 val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W))) 51 val ren = Input(Vec(numRead, Bool())) 52 val rdata = Output(Vec(numRead, gen)) 53 val waddr = Input(UInt(log2Up(FtqSize).W)) 54 val wen = Input(Bool()) 55 val wdata = Input(gen) 56 }) 57 58 for(i <- 0 until numRead){ 59 val sram = Module(new SRAMTemplate(gen, FtqSize)) 60 sram.io.r.req.valid := io.ren(i) 61 sram.io.r.req.bits.setIdx := io.raddr(i) 62 io.rdata(i) := sram.io.r.resp.data(0) 63 sram.io.w.req.valid := io.wen 64 sram.io.w.req.bits.setIdx := io.waddr 65 sram.io.w.req.bits.data := VecInit(io.wdata) 66 } 67 68} 69 70class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils { 71 // TODO: move pftAddr, oversize, carry to another mem 72 val startAddr = UInt(VAddrBits.W) 73 val nextRangeAddr = UInt(VAddrBits.W) 74 val pftAddr = UInt((log2Ceil(PredictWidth)+1).W) 75 val isNextMask = Vec(PredictWidth, Bool()) 76 val oversize = Bool() 77 val carry = Bool() 78 def getPc(offset: UInt) = { 79 def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits) 80 def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits-1, instOffsetBits) 81 Cat(getHigher(Mux(isNextMask(offset), nextRangeAddr, startAddr)), 82 getOffset(startAddr)+offset, 0.U(instOffsetBits.W)) 83 } 84 def getFallThrough() = { 85 getFallThroughAddr(this.startAddr, this.carry, this.pftAddr) 86 } 87 def fallThroughError() = { 88 !carry && startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits) > pftAddr 89 } 90 def fromBranchPrediction(resp: BranchPredictionBundle) = { 91 this.startAddr := resp.pc 92 this.nextRangeAddr := resp.pc + (FetchWidth * 4).U 93 this.pftAddr := resp.ftb_entry.pftAddr 94 this.isNextMask := VecInit((0 until PredictWidth).map(i => 95 (resp.pc(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool() 96 )) 97 this.oversize := resp.ftb_entry.oversize 98 this.carry := resp.ftb_entry.carry 99 this 100 } 101 override def toPrintable: Printable = { 102 p"startAddr:${Hexadecimal(startAddr)}, fallThru:${Hexadecimal(getFallThrough())}" 103 } 104} 105 106class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle { 107 val brMask = Vec(PredictWidth, Bool()) 108 val jmpInfo = ValidUndirectioned(Vec(3, Bool())) 109 val jmpOffset = UInt(log2Ceil(PredictWidth).W) 110 val jalTarget = UInt(VAddrBits.W) 111 val rvcMask = Vec(PredictWidth, Bool()) 112 def hasJal = jmpInfo.valid && !jmpInfo.bits(0) 113 def hasJalr = jmpInfo.valid && jmpInfo.bits(0) 114 def hasCall = jmpInfo.valid && jmpInfo.bits(1) 115 def hasRet = jmpInfo.valid && jmpInfo.bits(2) 116 117 def fromPdWb(pdWb: PredecodeWritebackBundle) = { 118 val pds = pdWb.pd 119 this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid)) 120 this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR 121 this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid), 122 pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet))) 123 this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)) 124 this.rvcMask := VecInit(pds.map(pd => pd.isRVC)) 125 this.jalTarget := pdWb.jalTarget 126 } 127 128 def toPd(offset: UInt) = { 129 require(offset.getWidth == log2Ceil(PredictWidth)) 130 val pd = Wire(new PreDecodeInfo) 131 pd.valid := true.B 132 pd.isRVC := rvcMask(offset) 133 val isBr = brMask(offset) 134 val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0) 135 pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr) 136 pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1) 137 pd.isRet := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2) 138 pd 139 } 140} 141 142 143 144class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst { 145 val rasSp = UInt(log2Ceil(RasSize).W) 146 val rasEntry = new RASEntry 147 val specCnt = Vec(numBr, UInt(10.W)) 148 val ghist = new GlobalHistory 149 val phist = UInt(PathHistoryLength.W) 150 val phNewBit = UInt(1.W) 151 152 def fromBranchPrediction(resp: BranchPredictionBundle) = { 153 this.rasSp := resp.rasSp 154 this.rasEntry := resp.rasTop 155 this.specCnt := resp.specCnt 156 this.ghist := resp.ghist 157 this.phist := resp.phist 158 this.phNewBit := resp.pc(instOffsetBits) 159 this 160 } 161} 162 163class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst { 164 val meta = UInt(MaxMetaLength.W) 165} 166 167class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle { 168 val target = UInt(VAddrBits.W) 169 val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W)) 170} 171 172class FtqEntry(implicit p: Parameters) extends XSBundle with HasBPUConst { 173 val startAddr = UInt(VAddrBits.W) 174 val fallThruAddr = UInt(VAddrBits.W) 175 val isNextMask = Vec(PredictWidth, Bool()) 176 177 val meta = UInt(MaxMetaLength.W) 178 179 val rasSp = UInt(log2Ceil(RasSize).W) 180 val rasEntry = new RASEntry 181 val hist = new GlobalHistory 182 val specCnt = Vec(numBr, UInt(10.W)) 183 184 val valids = Vec(PredictWidth, Bool()) 185 val brMask = Vec(PredictWidth, Bool()) 186 // isJalr, isCall, isRet 187 val jmpInfo = ValidUndirectioned(Vec(3, Bool())) 188 val jmpOffset = UInt(log2Ceil(PredictWidth).W) 189 190 val mispredVec = Vec(PredictWidth, Bool()) 191 val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W)) 192 val target = UInt(VAddrBits.W) 193} 194 195class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle { 196 val ptr = Output(new FtqPtr) 197 val offset = Output(UInt(log2Ceil(PredictWidth).W)) 198 val data = Input(gen) 199 def apply(ptr: FtqPtr, offset: UInt) = { 200 this.ptr := ptr 201 this.offset := offset 202 this.data 203 } 204 override def cloneType = (new FtqRead(gen)).asInstanceOf[this.type] 205} 206 207 208class FtqToBpuIO(implicit p: Parameters) extends XSBundle { 209 val redirect = Valid(new BranchPredictionRedirect) 210 val update = Valid(new BranchPredictionUpdate) 211 val enq_ptr = Output(new FtqPtr) 212} 213 214class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper { 215 val req = Decoupled(new FetchRequestBundle) 216 val redirect = Valid(new Redirect) 217 val flushFromBpu = new Bundle { 218 // when ifu pipeline is not stalled, 219 // a packet from bpu s3 can reach f1 at most 220 val s2 = Valid(new FtqPtr) 221 val s3 = Valid(new FtqPtr) 222 def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = { 223 src.valid && !isAfter(src.bits, idx_to_flush) 224 } 225 def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx) 226 def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx) 227 } 228} 229 230trait HasBackendRedirectInfo extends HasXSParameter { 231 def numRedirect = exuParameters.JmpCnt + exuParameters.AluCnt + 1 232 def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself() 233} 234 235class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo { 236 val pc_reads = Vec(1 + numRedirect + 1 + 1, Flipped(new FtqRead(UInt(VAddrBits.W)))) 237 val target_read = Flipped(new FtqRead(UInt(VAddrBits.W))) 238 def getJumpPcRead = pc_reads.head 239 def getRedirectPcRead = VecInit(pc_reads.tail.dropRight(2)) 240 def getMemPredPcRead = pc_reads.init.last 241 def getRobFlushPcRead = pc_reads.last 242} 243 244 245class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter { 246 val io = IO(new Bundle { 247 val start_addr = Input(UInt(VAddrBits.W)) 248 val old_entry = Input(new FTBEntry) 249 val pd = Input(new Ftq_pd_Entry) 250 val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W))) 251 val target = Input(UInt(VAddrBits.W)) 252 val hit = Input(Bool()) 253 val mispredict_vec = Input(Vec(PredictWidth, Bool())) 254 255 val new_entry = Output(new FTBEntry) 256 val new_br_insert_pos = Output(Vec(numBr, Bool())) 257 val taken_mask = Output(Vec(numBr, Bool())) 258 val mispred_mask = Output(Vec(numBr+1, Bool())) 259 260 // for perf counters 261 val is_init_entry = Output(Bool()) 262 val is_old_entry = Output(Bool()) 263 val is_new_br = Output(Bool()) 264 val is_jalr_target_modified = Output(Bool()) 265 val is_always_taken_modified = Output(Bool()) 266 val is_br_full = Output(Bool()) 267 }) 268 269 // no mispredictions detected at predecode 270 val hit = io.hit 271 val pd = io.pd 272 273 val init_entry = WireInit(0.U.asTypeOf(new FTBEntry)) 274 275 276 val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid 277 val entry_has_jmp = pd.jmpInfo.valid 278 val new_jmp_is_jal = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid 279 val new_jmp_is_jalr = entry_has_jmp && pd.jmpInfo.bits(0) && io.cfiIndex.valid 280 val new_jmp_is_call = entry_has_jmp && pd.jmpInfo.bits(1) && io.cfiIndex.valid 281 val new_jmp_is_ret = entry_has_jmp && pd.jmpInfo.bits(2) && io.cfiIndex.valid 282 val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last 283 val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last 284 285 val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal 286 val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr 287 288 def carryPos = log2Ceil(PredictWidth)+instOffsetBits+1 289 def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits) 290 // if not hit, establish a new entry 291 init_entry.valid := true.B 292 // tag is left for ftb to assign 293 294 // case br 295 val init_br_slot = init_entry.getSlotForBr(0) 296 when (cfi_is_br) { 297 init_br_slot.valid := true.B 298 init_br_slot.offset := io.cfiIndex.bits 299 init_br_slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && numBr == 1) 300 init_entry.always_taken(0) := true.B // set to always taken on init 301 } 302 // init_entry.isBrSharing := shareTailSlot.B && (numBr == 1).B && cfi_is_br 303 304 // case jmp 305 when (entry_has_jmp) { 306 init_entry.tailSlot.offset := pd.jmpOffset 307 init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr 308 init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false) 309 } 310 311 val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U) 312 init_entry.pftAddr := Mux(entry_has_jmp, jmpPft, getLower(io.start_addr) + ((FetchWidth*4)>>instOffsetBits).U + Mux(last_br_rvi, 1.U, 0.U)) 313 init_entry.carry := Mux(entry_has_jmp, jmpPft(carryPos-instOffsetBits), io.start_addr(carryPos-1) || (io.start_addr(carryPos-2, instOffsetBits).andR && last_br_rvi)) 314 init_entry.isJalr := new_jmp_is_jalr 315 init_entry.isCall := new_jmp_is_call 316 init_entry.isRet := new_jmp_is_ret 317 init_entry.last_is_rvc := Mux(entry_has_jmp, pd.rvcMask(pd.jmpOffset), pd.rvcMask.last) 318 319 init_entry.oversize := last_br_rvi || last_jmp_rvi 320 321 // if hit, check whether a new cfi(only br is possible) is detected 322 val oe = io.old_entry 323 val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits) 324 val br_recorded = br_recorded_vec.asUInt.orR 325 val is_new_br = cfi_is_br && !br_recorded 326 val new_br_offset = io.cfiIndex.bits 327 // vec(i) means new br will be inserted BEFORE old br(i) 328 val allBrSlotsVec = oe.allSlotsForBr 329 val new_br_insert_onehot = VecInit((0 until numBr).map{ 330 i => i match { 331 case 0 => 332 !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset 333 case idx => 334 allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset && 335 (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset) 336 } 337 }) 338 339 val old_entry_modified = WireInit(io.old_entry) 340 for (i <- 0 until numBr) { 341 val slot = old_entry_modified.allSlotsForBr(i) 342 when (new_br_insert_onehot(i)) { 343 slot.valid := true.B 344 slot.offset := new_br_offset 345 slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && i == numBr-1) 346 old_entry_modified.always_taken(i) := true.B 347 }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) { 348 old_entry_modified.always_taken(i) := false.B 349 // all other fields remain unchanged 350 }.otherwise { 351 // case i == 0, remain unchanged 352 if (i != 0) { 353 val noNeedToMoveFromFormerSlot = (shareTailSlot && i == numBr-1).B && !oe.brSlots.last.valid 354 when (!noNeedToMoveFromFormerSlot) { 355 slot.fromAnotherSlot(oe.allSlotsForBr(i-1)) 356 old_entry_modified.always_taken(i) := oe.always_taken(i) 357 } 358 } 359 } 360 } 361 362 // two circumstances: 363 // 1. oe: | br | j |, new br should be in front of j, thus addr of j should be new pft 364 // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either 365 // the previous last br or the new br 366 val may_have_to_replace = oe.noEmptySlotForNewBr 367 val pft_need_to_change = is_new_br && may_have_to_replace 368 // it should either be the given last br or the new br 369 when (pft_need_to_change) { 370 val new_pft_offset = 371 Mux(!new_br_insert_onehot.asUInt.orR, 372 new_br_offset, oe.allSlotsForBr.last.offset) 373 374 // set jmp to invalid 375 if (!shareTailSlot) { 376 old_entry_modified.tailSlot.valid := false.B 377 } 378 old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset 379 old_entry_modified.last_is_rvc := pd.rvcMask(new_pft_offset - 1.U) // TODO: fix this 380 old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool 381 old_entry_modified.oversize := false.B 382 old_entry_modified.isCall := false.B 383 old_entry_modified.isRet := false.B 384 old_entry_modified.isJalr := false.B 385 } 386 387 val old_entry_jmp_target_modified = WireInit(oe) 388 val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits 389 val old_tail_is_jmp = !oe.tailSlot.sharing || !shareTailSlot.B 390 val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target 391 when (jalr_target_modified) { 392 old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target) 393 old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool())) 394 } 395 396 val old_entry_always_taken = WireInit(oe) 397 val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not 398 for (i <- 0 until numBr) { 399 old_entry_always_taken.always_taken(i) := 400 oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i) 401 always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i) 402 } 403 val always_taken_modified = always_taken_modified_vec.reduce(_||_) 404 405 406 407 val derived_from_old_entry = 408 Mux(is_new_br, old_entry_modified, 409 Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken)) 410 411 412 io.new_entry := Mux(!hit, init_entry, derived_from_old_entry) 413 414 io.new_br_insert_pos := new_br_insert_onehot 415 io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{ 416 case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v 417 }) 418 for (i <- 0 until numBr) { 419 io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i)) 420 } 421 io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset) 422 423 // for perf counters 424 io.is_init_entry := !hit 425 io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified 426 io.is_new_br := hit && is_new_br 427 io.is_jalr_target_modified := hit && jalr_target_modified 428 io.is_always_taken_modified := hit && always_taken_modified 429 io.is_br_full := hit && is_new_br && may_have_to_replace 430} 431 432class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper 433 with HasBackendRedirectInfo with BPUUtils with HasBPUConst { 434 val io = IO(new Bundle { 435 val fromBpu = Flipped(new BpuToFtqIO) 436 val fromIfu = Flipped(new IfuToFtqIO) 437 val fromBackend = Flipped(new CtrlToFtqIO) 438 439 val toBpu = new FtqToBpuIO 440 val toIfu = new FtqToIfuIO 441 val toBackend = new FtqToCtrlIO 442 443 val bpuInfo = new Bundle { 444 val bpRight = Output(UInt(XLEN.W)) 445 val bpWrong = Output(UInt(XLEN.W)) 446 } 447 }) 448 io.bpuInfo := DontCare 449 450 val robFlush = io.fromBackend.robFlush 451 val stage2Redirect = io.fromBackend.stage2Redirect 452 val stage3Redirect = io.fromBackend.stage3Redirect 453 454 val stage2Flush = stage2Redirect.valid || robFlush.valid 455 val backendFlush = stage2Flush || RegNext(stage2Flush) 456 val ifuFlush = Wire(Bool()) 457 458 val flush = stage2Flush || RegNext(stage2Flush) 459 460 val allowBpuIn, allowToIfu = WireInit(false.B) 461 val flushToIfu = !allowToIfu 462 allowBpuIn := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid 463 allowToIfu := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid 464 465 val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U)) 466 val validEntries = distanceBetween(bpuPtr, commPtr) 467 468 // ********************************************************************** 469 // **************************** enq from bpu **************************** 470 // ********************************************************************** 471 val new_entry_ready = validEntries < FtqSize.U 472 io.fromBpu.resp.ready := new_entry_ready 473 474 val bpu_s2_resp = io.fromBpu.resp.bits.s2 475 val bpu_s3_resp = io.fromBpu.resp.bits.s3 476 val bpu_s2_redirect = bpu_s2_resp.valid && bpu_s2_resp.hasRedirect 477 val bpu_s3_redirect = bpu_s3_resp.valid && bpu_s3_resp.hasRedirect 478 479 io.toBpu.enq_ptr := bpuPtr 480 val enq_fire = io.fromBpu.resp.fire() && allowBpuIn // from bpu s1 481 val bpu_in_fire = (io.fromBpu.resp.fire() || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn 482 483 val bpu_in_resp = WireInit(io.fromBpu.resp.bits.selectedResp) 484 val bpu_in_stage = WireInit(io.fromBpu.resp.bits.selectedRespIdx) 485 val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx) 486 val bpu_in_resp_idx = bpu_in_resp_ptr.value 487 488 // read ports: jumpPc + redirects + loadPred + robFlush + ifuReq1 + ifuReq2 + commitUpdate 489 val ftq_pc_mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, 1+numRedirect+2+1+1+1, 1)) 490 // resp from uBTB 491 ftq_pc_mem.io.wen(0) := bpu_in_fire 492 ftq_pc_mem.io.waddr(0) := bpu_in_resp_idx 493 ftq_pc_mem.io.wdata(0).fromBranchPrediction(bpu_in_resp) 494 495 // ifuRedirect + backendRedirect + commit 496 val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1)) 497 // these info is intended to enq at the last stage of bpu 498 ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid 499 ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value 500 ftq_redirect_sram.io.wdata.fromBranchPrediction(io.fromBpu.resp.bits.lastStage) 501 502 val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1)) 503 // these info is intended to enq at the last stage of bpu 504 ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid 505 ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value 506 ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.meta 507 // ifuRedirect + backendRedirect + commit 508 val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1)) 509 ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid 510 ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value 511 ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.lastStage.ftb_entry 512 513 514 // multi-write 515 val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) 516 val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W)))) 517 val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool()))) 518 val pred_stage = Reg(Vec(FtqSize, UInt(2.W))) 519 520 val c_invalid :: c_valid :: c_commited :: Nil = Enum(3) 521 val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) { 522 VecInit(Seq.fill(PredictWidth)(c_invalid)) 523 })) 524 525 val f_to_send :: f_sent :: Nil = Enum(2) 526 val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent))) 527 528 val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3) 529 val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit))) 530 531 532 when (bpu_in_fire) { 533 entry_fetch_status(bpu_in_resp_idx) := f_to_send 534 commitStateQueue(bpu_in_resp_idx) := VecInit(Seq.fill(PredictWidth)(c_invalid)) 535 cfiIndex_vec(bpu_in_resp_idx) := bpu_in_resp.genCfiIndex 536 mispredict_vec(bpu_in_resp_idx) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B))) 537 update_target(bpu_in_resp_idx) := bpu_in_resp.target 538 pred_stage(bpu_in_resp_idx) := bpu_in_stage 539 } 540 541 bpuPtr := bpuPtr + enq_fire 542 ifuPtr := ifuPtr + io.toIfu.req.fire 543 544 // only use ftb result to assign hit status 545 when (bpu_s2_resp.valid) { 546 entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.preds.hit, h_hit, h_not_hit) 547 } 548 549 550 io.toIfu.flushFromBpu.s2.valid := bpu_s2_resp.valid && bpu_s2_resp.hasRedirect 551 io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx 552 when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) { 553 bpuPtr := bpu_s2_resp.ftq_idx + 1.U 554 // only when ifuPtr runs ahead of bpu s2 resp should we recover it 555 when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) { 556 ifuPtr := bpu_s2_resp.ftq_idx 557 } 558 } 559 560 io.toIfu.flushFromBpu.s3.valid := bpu_s3_resp.valid && bpu_s3_resp.hasRedirect 561 io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx 562 when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) { 563 bpuPtr := bpu_s3_resp.ftq_idx + 1.U 564 // only when ifuPtr runs ahead of bpu s2 resp should we recover it 565 when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) { 566 ifuPtr := bpu_s3_resp.ftq_idx 567 } 568 XSError(true.B, "\ns3_redirect mechanism not implemented!\n") 569 } 570 571 XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n") 572 573 // **************************************************************** 574 // **************************** to ifu **************************** 575 // **************************************************************** 576 val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata(0), enable=bpu_in_fire) 577 val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr) 578 val last_cycle_bpu_in = RegNext(bpu_in_fire) 579 val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire) 580 581 // read pc and target 582 ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value 583 ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value 584 585 val toIfuReq = Wire(chiselTypeOf(io.toIfu.req)) 586 587 toIfuReq.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr 588 toIfuReq.bits.ftqIdx := ifuPtr 589 toIfuReq.bits.target := update_target(ifuPtr.value) 590 toIfuReq.bits.ftqOffset := cfiIndex_vec(ifuPtr.value) 591 toIfuReq.bits.fallThruError := false.B 592 593 when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) { 594 toIfuReq.bits.fromFtqPcBundle(bpu_in_bypass_buf) 595 }.elsewhen (last_cycle_to_ifu_fire) { 596 toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last) 597 }.otherwise { 598 toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last) 599 } 600 601 io.toIfu.req <> toIfuReq 602 603 // when fall through is smaller in value than start address, there must be a false hit 604 when (toIfuReq.bits.fallThroughError() && entry_hit_status(ifuPtr.value) === h_hit) { 605 when (io.toIfu.req.fire && 606 !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && 607 !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr) 608 ) { 609 entry_hit_status(ifuPtr.value) := h_false_hit 610 XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr) 611 } 612 io.toIfu.req.bits.fallThruAddr := toIfuReq.bits.startAddr + (FetchWidth*4).U 613 io.toIfu.req.bits.fallThruError := true.B 614 XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr) 615 } 616 617 val ifu_req_should_be_flushed = 618 io.toIfu.flushFromBpu.shouldFlushByStage2(toIfuReq.bits.ftqIdx) || 619 io.toIfu.flushFromBpu.shouldFlushByStage3(toIfuReq.bits.ftqIdx) 620 621 when (io.toIfu.req.fire && !ifu_req_should_be_flushed) { 622 entry_fetch_status(ifuPtr.value) := f_sent 623 } 624 625 626 // ********************************************************************* 627 // **************************** wb from ifu **************************** 628 // ********************************************************************* 629 val pdWb = io.fromIfu.pdWb 630 val pds = pdWb.bits.pd 631 val ifu_wb_valid = pdWb.valid 632 val ifu_wb_idx = pdWb.bits.ftqIdx.value 633 // read ports: commit update 634 val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1)) 635 ftq_pd_mem.io.wen(0) := ifu_wb_valid 636 ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value 637 ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits) 638 639 val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid 640 val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid 641 val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B) 642 val pd_reg = RegEnable(pds, enable = pdWb.valid) 643 val start_pc_reg = RegEnable(pdWb.bits.pc(0), enable = pdWb.valid) 644 val wb_idx_reg = RegEnable(ifu_wb_idx, enable = pdWb.valid) 645 646 when (ifu_wb_valid) { 647 val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{ 648 case (v, inRange) => v && inRange 649 }) 650 (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{ 651 case (qe, v) => when (v) { qe := c_valid } 652 } 653 } 654 655 ifuWbPtr := ifuWbPtr + ifu_wb_valid 656 657 ftb_entry_mem.io.raddr.head := ifu_wb_idx 658 val has_false_hit = WireInit(false.B) 659 when (RegNext(hit_pd_valid)) { 660 // check for false hit 661 val pred_ftb_entry = ftb_entry_mem.io.rdata.head 662 val brSlots = pred_ftb_entry.brSlots 663 val tailSlot = pred_ftb_entry.tailSlot 664 // we check cfis that bpu predicted 665 666 // bpu predicted branches but denied by predecode 667 val br_false_hit = 668 brSlots.map{ 669 s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr) 670 }.reduce(_||_) || 671 (shareTailSlot.B && tailSlot.valid && pred_ftb_entry.tailSlot.sharing && 672 !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr)) 673 674 val jmpOffset = tailSlot.offset 675 val jmp_pd = pd_reg(jmpOffset) 676 val jal_false_hit = pred_ftb_entry.jmpValid && 677 ((pred_ftb_entry.isJal && !(jmp_pd.valid && jmp_pd.isJal)) || 678 (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) || 679 (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) || 680 (pred_ftb_entry.isRet && !(jmp_pd.valid && jmp_pd.isRet)) 681 ) 682 683 has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg 684 XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0)) 685 686 // assert(!has_false_hit) 687 } 688 689 when (has_false_hit) { 690 entry_hit_status(wb_idx_reg) := h_false_hit 691 } 692 693 694 // ********************************************************************** 695 // **************************** backend read **************************** 696 // ********************************************************************** 697 698 // pc reads 699 for ((req, i) <- io.toBackend.pc_reads.zipWithIndex) { 700 ftq_pc_mem.io.raddr(i) := req.ptr.value 701 req.data := ftq_pc_mem.io.rdata(i).getPc(RegNext(req.offset)) 702 } 703 // target read 704 io.toBackend.target_read.data := RegNext(update_target(io.toBackend.target_read.ptr.value)) 705 706 // ******************************************************************************* 707 // **************************** redirect from backend **************************** 708 // ******************************************************************************* 709 710 // redirect read cfiInfo, couples to redirectGen s2 711 ftq_redirect_sram.io.ren.init.last := io.fromBackend.stage2Redirect.valid 712 ftq_redirect_sram.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value 713 714 ftb_entry_mem.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value 715 716 val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last 717 val fromBackendRedirect = WireInit(io.fromBackend.stage3Redirect) 718 val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate 719 backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo) 720 721 val r_ftb_entry = ftb_entry_mem.io.rdata.init.last 722 val r_ftqOffset = fromBackendRedirect.bits.ftqOffset 723 724 when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) { 725 backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +& 726 (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) && 727 !r_ftb_entry.newBrCanNotInsert(r_ftqOffset)) 728 729 backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) || 730 !r_ftb_entry.newBrCanNotInsert(r_ftqOffset)) 731 }.otherwise { 732 backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt 733 backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt 734 } 735 736 737 // *************************************************************************** 738 // **************************** redirect from ifu **************************** 739 // *************************************************************************** 740 val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new Redirect))) 741 fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush 742 fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx 743 fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits 744 fromIfuRedirect.bits.level := RedirectLevel.flushAfter 745 746 val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate 747 ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits) 748 ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits) 749 ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid 750 ifuRedirectCfiUpdate.target := pdWb.bits.target 751 ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid 752 ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid 753 754 val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new Redirect))) 755 val ifuRedirectToBpu = WireInit(ifuRedirectReg) 756 ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid 757 758 ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid 759 ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value 760 761 ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value 762 763 val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate 764 toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head) 765 when (ifuRedirectReg.bits.cfiUpdate.pd.isRet) { 766 toBpuCfi.target := toBpuCfi.rasEntry.retAddr 767 } 768 769 // ********************************************************************* 770 // **************************** wb from exu **************************** 771 // ********************************************************************* 772 773 def extractRedirectInfo(wb: Valid[Redirect]) = { 774 val ftqIdx = wb.bits.ftqIdx.value 775 val ftqOffset = wb.bits.ftqOffset 776 val taken = wb.bits.cfiUpdate.taken 777 val mispred = wb.bits.cfiUpdate.isMisPred 778 (wb.valid, ftqIdx, ftqOffset, taken, mispred) 779 } 780 781 // fix mispredict entry 782 val lastIsMispredict = RegNext( 783 stage2Redirect.valid && stage2Redirect.bits.level === RedirectLevel.flushAfter, init = false.B 784 ) 785 786 def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = { 787 val (r_valid, r_idx, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect) 788 val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits 789 val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits 790 when (cfiIndex_bits_wen || cfiIndex_valid_wen) { 791 cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken 792 } 793 when (cfiIndex_bits_wen) { 794 cfiIndex_vec(r_idx).bits := r_offset 795 } 796 update_target(r_idx) := redirect.bits.cfiUpdate.target 797 if (isBackend) { 798 mispredict_vec(r_idx)(r_offset) := r_mispred 799 } 800 } 801 802 when(stage3Redirect.valid && lastIsMispredict) { 803 updateCfiInfo(stage3Redirect) 804 }.elsewhen (ifuRedirectToBpu.valid) { 805 updateCfiInfo(ifuRedirectToBpu, isBackend=false) 806 } 807 808 // *********************************************************************************** 809 // **************************** flush ptr and state queue **************************** 810 // *********************************************************************************** 811 812 class RedirectInfo extends Bundle { 813 val valid = Bool() 814 val ftqIdx = new FtqPtr 815 val ftqOffset = UInt(log2Ceil(PredictWidth).W) 816 val flushItSelf = Bool() 817 def apply(redirect: Valid[Redirect]) = { 818 this.valid := redirect.valid 819 this.ftqIdx := redirect.bits.ftqIdx 820 this.ftqOffset := redirect.bits.ftqOffset 821 this.flushItSelf := RedirectLevel.flushItself(redirect.bits.level) 822 this 823 } 824 } 825 val redirectVec = Wire(Vec(3, new RedirectInfo)) 826 val robRedirect = robFlush 827 828 redirectVec.zip(Seq(robRedirect, stage2Redirect, fromIfuRedirect)).map { 829 case (ve, r) => ve(r) 830 } 831 832 // when redirect, we should reset ptrs and status queues 833 when(redirectVec.map(r => r.valid).reduce(_||_)){ 834 val r = PriorityMux(redirectVec.map(r => (r.valid -> r))) 835 val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_) 836 val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, r.flushItSelf) 837 val next = idx + 1.U 838 bpuPtr := next 839 ifuPtr := next 840 ifuWbPtr := next 841 when (notIfu) { 842 commitStateQueue(idx.value).zipWithIndex.foreach({ case (s, i) => 843 when(i.U > offset || i.U === offset && flushItSelf){ 844 s := c_invalid 845 } 846 }) 847 } 848 } 849 850 // only the valid bit is actually needed 851 io.toIfu.redirect.bits := Mux(robFlush.valid, robFlush.bits, stage2Redirect.bits) 852 io.toIfu.redirect.valid := stage2Flush 853 854 // commit 855 for (c <- io.fromBackend.rob_commits) { 856 when(c.valid) { 857 commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited 858 // TODO: remove this 859 // For instruction fusions, we also update the next instruction 860 when (c.bits.commitType === 4.U) { 861 commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited 862 }.elsewhen(c.bits.commitType === 5.U) { 863 commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited 864 }.elsewhen(c.bits.commitType === 6.U) { 865 val index = (c.bits.ftqIdx + 1.U).value 866 commitStateQueue(index)(0) := c_commited 867 }.elsewhen(c.bits.commitType === 7.U) { 868 val index = (c.bits.ftqIdx + 1.U).value 869 commitStateQueue(index)(1) := c_commited 870 } 871 } 872 } 873 874 // **************************************************************** 875 // **************************** to bpu **************************** 876 // **************************************************************** 877 878 io.toBpu.redirect <> Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu) 879 880 val may_have_stall_from_bpu = RegInit(false.B) 881 val canCommit = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu && 882 Cat(commitStateQueue(commPtr.value).map(s => { 883 s === c_invalid || s === c_commited 884 })).andR() 885 886 // commit reads 887 ftq_pc_mem.io.raddr.last := commPtr.value 888 val commit_pc_bundle = ftq_pc_mem.io.rdata.last 889 ftq_pd_mem.io.raddr.last := commPtr.value 890 val commit_pd = ftq_pd_mem.io.rdata.last 891 ftq_redirect_sram.io.ren.last := canCommit 892 ftq_redirect_sram.io.raddr.last := commPtr.value 893 val commit_spec_meta = ftq_redirect_sram.io.rdata.last 894 ftq_meta_1r_sram.io.ren(0) := canCommit 895 ftq_meta_1r_sram.io.raddr(0) := commPtr.value 896 val commit_meta = ftq_meta_1r_sram.io.rdata(0) 897 ftb_entry_mem.io.raddr.last := commPtr.value 898 val commit_ftb_entry = ftb_entry_mem.io.rdata.last 899 900 // need one cycle to read mem and srams 901 val do_commit_ptr = RegNext(commPtr) 902 val do_commit = RegNext(canCommit, init=false.B) 903 when (canCommit) { commPtr := commPtr + 1.U } 904 val commit_state = RegNext(commitStateQueue(commPtr.value)) 905 val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value)) 906 when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) { 907 can_commit_cfi.valid := false.B 908 } 909 val commit_cfi = RegNext(can_commit_cfi) 910 911 val commit_mispredict = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map { 912 case (mis, state) => mis && state === c_commited 913 }) 914 val can_commit_hit = entry_hit_status(commPtr.value) 915 val commit_hit = RegNext(can_commit_hit) 916 val commit_target = RegNext(update_target(commPtr.value)) 917 val commit_valid = commit_hit === h_hit || commit_cfi.valid // hit or taken 918 919 val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit 920 may_have_stall_from_bpu := can_commit_cfi.valid && !to_bpu_hit && !may_have_stall_from_bpu 921 922 io.toBpu.update := DontCare 923 io.toBpu.update.valid := commit_valid && do_commit 924 val update = io.toBpu.update.bits 925 update.false_hit := commit_hit === h_false_hit 926 update.pc := commit_pc_bundle.startAddr 927 update.preds.hit := commit_hit === h_hit || commit_hit === h_false_hit 928 update.meta := commit_meta.meta 929 update.full_target := commit_target 930 update.fromFtqRedirectSram(commit_spec_meta) 931 932 val commit_real_hit = commit_hit === h_hit 933 val update_ftb_entry = update.ftb_entry 934 935 val ftbEntryGen = Module(new FTBEntryGen).io 936 ftbEntryGen.start_addr := commit_pc_bundle.startAddr 937 ftbEntryGen.old_entry := commit_ftb_entry 938 ftbEntryGen.pd := commit_pd 939 ftbEntryGen.cfiIndex := commit_cfi 940 ftbEntryGen.target := commit_target 941 ftbEntryGen.hit := commit_real_hit 942 ftbEntryGen.mispredict_vec := commit_mispredict 943 944 update_ftb_entry := ftbEntryGen.new_entry 945 update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos 946 update.mispred_mask := ftbEntryGen.mispred_mask 947 update.old_entry := ftbEntryGen.is_old_entry 948 update.preds.br_taken_mask := ftbEntryGen.taken_mask 949 950 // ****************************************************************************** 951 // **************************** commit perf counters **************************** 952 // ****************************************************************************** 953 954 val commit_inst_mask = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt 955 val commit_mispred_mask = commit_mispredict.asUInt 956 val commit_not_mispred_mask = ~commit_mispred_mask 957 958 val commit_br_mask = commit_pd.brMask.asUInt 959 val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W))) 960 val commit_cfi_mask = (commit_br_mask | commit_jmp_mask) 961 962 val mbpInstrs = commit_inst_mask & commit_cfi_mask 963 964 val mbpRights = mbpInstrs & commit_not_mispred_mask 965 val mbpWrongs = mbpInstrs & commit_mispred_mask 966 967 io.bpuInfo.bpRight := PopCount(mbpRights) 968 io.bpuInfo.bpWrong := PopCount(mbpWrongs) 969 970 // Cfi Info 971 for (i <- 0 until PredictWidth) { 972 val pc = commit_pc_bundle.startAddr + (i * instBytes).U 973 val v = commit_state(i) === c_commited 974 val isBr = commit_pd.brMask(i) 975 val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U 976 val isCfi = isBr || isJmp 977 val isTaken = commit_cfi.valid && commit_cfi.bits === i.U 978 val misPred = commit_mispredict(i) 979 val ghist = commit_spec_meta.ghist.predHist 980 val predCycle = commit_meta.meta(63, 0) 981 val target = commit_target 982 983 val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}))) 984 val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_) 985 val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid)) 986 XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " + 987 p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${Hexadecimal(ghist)}) " + 988 p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " + 989 p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n") 990 } 991 992 val enq = io.fromBpu.resp 993 val perf_redirect = io.fromBackend.stage2Redirect 994 995 XSPerfAccumulate("entry", validEntries) 996 XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready) 997 XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level) 998 XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)) 999 XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid) 1000 1001 XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid) 1002 1003 XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready) 1004 XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn) 1005 XSPerfAccumulate("bpu_to_ftq_bubble", bpuPtr === ifuPtr) 1006 1007 val from_bpu = io.fromBpu.resp.bits 1008 def in_entry_len_map_gen(resp: BranchPredictionBundle)(stage: String) = { 1009 val entry_len = (resp.ftb_entry.getFallThrough(resp.pc) - resp.pc) >> instOffsetBits 1010 val entry_len_recording_vec = (1 to PredictWidth+1).map(i => entry_len === i.U) 1011 val entry_len_map = (1 to PredictWidth+1).map(i => 1012 f"${stage}_ftb_entry_len_$i" -> (entry_len_recording_vec(i-1) && resp.valid) 1013 ).foldLeft(Map[String, UInt]())(_+_) 1014 entry_len_map 1015 } 1016 val s1_entry_len_map = in_entry_len_map_gen(from_bpu.s1)("s1") 1017 val s2_entry_len_map = in_entry_len_map_gen(from_bpu.s2)("s2") 1018 val s3_entry_len_map = in_entry_len_map_gen(from_bpu.s3)("s3") 1019 1020 val to_ifu = io.toIfu.req.bits 1021 val to_ifu_entry_len = (to_ifu.fallThruAddr - to_ifu.startAddr) >> instOffsetBits 1022 val to_ifu_entry_len_recording_vec = (1 to PredictWidth+1).map(i => to_ifu_entry_len === i.U) 1023 val to_ifu_entry_len_map = (1 to PredictWidth+1).map(i => 1024 f"to_ifu_ftb_entry_len_$i" -> (to_ifu_entry_len_recording_vec(i-1) && io.toIfu.req.fire) 1025 ).foldLeft(Map[String, UInt]())(_+_) 1026 1027 1028 1029 val commit_num_inst_recording_vec = (1 to PredictWidth).map(i => PopCount(commit_inst_mask) === i.U) 1030 val commit_num_inst_map = (1 to PredictWidth).map(i => 1031 f"commit_num_inst_$i" -> (commit_num_inst_recording_vec(i-1) && do_commit) 1032 ).foldLeft(Map[String, UInt]())(_+_) 1033 1034 1035 1036 val commit_jal_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W))) 1037 val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W))) 1038 val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W))) 1039 val commit_ret_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W))) 1040 1041 1042 val mbpBRights = mbpRights & commit_br_mask 1043 val mbpJRights = mbpRights & commit_jal_mask 1044 val mbpIRights = mbpRights & commit_jalr_mask 1045 val mbpCRights = mbpRights & commit_call_mask 1046 val mbpRRights = mbpRights & commit_ret_mask 1047 1048 val mbpBWrongs = mbpWrongs & commit_br_mask 1049 val mbpJWrongs = mbpWrongs & commit_jal_mask 1050 val mbpIWrongs = mbpWrongs & commit_jalr_mask 1051 val mbpCWrongs = mbpWrongs & commit_call_mask 1052 val mbpRWrongs = mbpWrongs & commit_ret_mask 1053 1054 val commit_pred_stage = RegNext(pred_stage(commPtr.value)) 1055 1056 def pred_stage_map(src: UInt, name: String) = { 1057 (0 until numBpStages).map(i => 1058 f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i))) 1059 ).foldLeft(Map[String, UInt]())(_+_) 1060 } 1061 1062 val mispred_stage_map = pred_stage_map(mbpWrongs, "mispredict") 1063 val br_mispred_stage_map = pred_stage_map(mbpBWrongs, "br_mispredict") 1064 val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict") 1065 val correct_stage_map = pred_stage_map(mbpRights, "correct") 1066 val br_correct_stage_map = pred_stage_map(mbpBRights, "br_correct") 1067 val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct") 1068 1069 val update_valid = io.toBpu.update.valid 1070 def u(cond: Bool) = update_valid && cond 1071 val ftb_false_hit = u(update.false_hit) 1072 // assert(!ftb_false_hit) 1073 val ftb_hit = u(commit_hit === h_hit) 1074 1075 val ftb_new_entry = u(ftbEntryGen.is_init_entry) 1076 val ftb_new_entry_only_br = ftb_new_entry && !update.ftb_entry.jmpValid 1077 val ftb_new_entry_only_jmp = ftb_new_entry && !update.ftb_entry.brValids(0) 1078 val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update.ftb_entry.brValids(0) && update.ftb_entry.jmpValid 1079 1080 val ftb_old_entry = u(ftbEntryGen.is_old_entry) 1081 1082 val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified) 1083 val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br) 1084 val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified) 1085 val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full 1086 val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified 1087 1088 val ftb_entry_len = (ftbEntryGen.new_entry.getFallThrough(update.pc) - update.pc) >> instOffsetBits 1089 val ftb_entry_len_recording_vec = (1 to PredictWidth+1).map(i => ftb_entry_len === i.U) 1090 val ftb_init_entry_len_map = (1 to PredictWidth+1).map(i => 1091 f"ftb_init_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_new_entry) 1092 ).foldLeft(Map[String, UInt]())(_+_) 1093 val ftb_modified_entry_len_map = (1 to PredictWidth+1).map(i => 1094 f"ftb_modified_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_modified_entry) 1095 ).foldLeft(Map[String, UInt]())(_+_) 1096 1097 val ftq_occupancy_map = (0 to FtqSize).map(i => 1098 f"ftq_has_entry_$i" ->( validEntries === i.U) 1099 ).foldLeft(Map[String, UInt]())(_+_) 1100 1101 val perfCountsMap = Map( 1102 "BpInstr" -> PopCount(mbpInstrs), 1103 "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs), 1104 "BpRight" -> PopCount(mbpRights), 1105 "BpWrong" -> PopCount(mbpWrongs), 1106 "BpBRight" -> PopCount(mbpBRights), 1107 "BpBWrong" -> PopCount(mbpBWrongs), 1108 "BpJRight" -> PopCount(mbpJRights), 1109 "BpJWrong" -> PopCount(mbpJWrongs), 1110 "BpIRight" -> PopCount(mbpIRights), 1111 "BpIWrong" -> PopCount(mbpIWrongs), 1112 "BpCRight" -> PopCount(mbpCRights), 1113 "BpCWrong" -> PopCount(mbpCWrongs), 1114 "BpRRight" -> PopCount(mbpRRights), 1115 "BpRWrong" -> PopCount(mbpRWrongs), 1116 1117 "ftb_false_hit" -> PopCount(ftb_false_hit), 1118 "ftb_hit" -> PopCount(ftb_hit), 1119 "ftb_new_entry" -> PopCount(ftb_new_entry), 1120 "ftb_new_entry_only_br" -> PopCount(ftb_new_entry_only_br), 1121 "ftb_new_entry_only_jmp" -> PopCount(ftb_new_entry_only_jmp), 1122 "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp), 1123 "ftb_old_entry" -> PopCount(ftb_old_entry), 1124 "ftb_modified_entry" -> PopCount(ftb_modified_entry), 1125 "ftb_modified_entry_new_br" -> PopCount(ftb_modified_entry_new_br), 1126 "ftb_jalr_target_modified" -> PopCount(ftb_modified_entry_jalr_target_modified), 1127 "ftb_modified_entry_br_full" -> PopCount(ftb_modified_entry_br_full), 1128 "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken) 1129 ) ++ ftb_init_entry_len_map ++ ftb_modified_entry_len_map ++ s1_entry_len_map ++ 1130 s2_entry_len_map ++ s3_entry_len_map ++ 1131 to_ifu_entry_len_map ++ commit_num_inst_map ++ ftq_occupancy_map ++ 1132 mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++ 1133 correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map 1134 1135 for((key, value) <- perfCountsMap) { 1136 XSPerfAccumulate(key, value) 1137 } 1138 1139 // --------------------------- Debug -------------------------------- 1140 // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable) 1141 XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable) 1142 XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n") 1143 XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n") 1144 XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " + 1145 p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n") 1146 XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n") 1147 1148 // def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1149 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1150 // case (((valid, pd), ans), taken) => 1151 // Mux(valid && pd.isBr, 1152 // isWrong ^ Mux(ans.hit.asBool, 1153 // Mux(ans.taken.asBool, taken && ans.target === commitEntry.target, 1154 // !taken), 1155 // !taken), 1156 // false.B) 1157 // } 1158 // } 1159 1160 // def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1161 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1162 // case (((valid, pd), ans), taken) => 1163 // Mux(valid && pd.isBr, 1164 // isWrong ^ Mux(ans.hit.asBool, 1165 // Mux(ans.taken.asBool, taken && ans.target === commitEntry.target, 1166 // !taken), 1167 // !taken), 1168 // false.B) 1169 // } 1170 // } 1171 1172 // def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1173 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1174 // case (((valid, pd), ans), taken) => 1175 // Mux(valid && pd.isBr, 1176 // isWrong ^ (ans.taken.asBool === taken), 1177 // false.B) 1178 // } 1179 // } 1180 1181 // def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1182 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1183 // case (((valid, pd), ans), taken) => 1184 // Mux(valid && (pd.isBr) && ans.hit.asBool, 1185 // isWrong ^ (!taken), 1186 // false.B) 1187 // } 1188 // } 1189 1190 // def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = { 1191 // commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map { 1192 // case (((valid, pd), ans), taken) => 1193 // Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool, 1194 // isWrong ^ (ans.target === commitEntry.target), 1195 // false.B) 1196 // } 1197 // } 1198 1199 // val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B) 1200 // val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B) 1201 // // btb and ubtb pred jal and jalr as well 1202 // val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B) 1203 // val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B) 1204 // val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B) 1205 // val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B) 1206 1207 // val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B) 1208 // val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B) 1209 1210 // val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B) 1211 // val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B) 1212 val perfinfo = IO(new Bundle(){ 1213 val perfEvents = Output(new PerfEventsBundle(22)) 1214 }) 1215 val perfEvents = Seq( 1216 ("bpu_s2_redirect ", bpu_s2_redirect ), 1217 ("bpu_s3_redirect ", bpu_s3_redirect ), 1218 ("bpu_to_ftq_stall ", enq.valid && ~enq.ready ), 1219 ("mispredictRedirect ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level), 1220 ("replayRedirect ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level) ), 1221 ("predecodeRedirect ", fromIfuRedirect.valid ), 1222 ("to_ifu_bubble ", io.toIfu.req.ready && !io.toIfu.req.valid ), 1223 ("from_bpu_real_bubble ", !enq.valid && enq.ready && allowBpuIn ), 1224 ("BpInstr ", PopCount(mbpInstrs) ), 1225 ("BpBInstr ", PopCount(mbpBRights | mbpBWrongs) ), 1226 ("BpRight ", PopCount(mbpRights) ), 1227 ("BpWrong ", PopCount(mbpWrongs) ), 1228 ("BpBRight ", PopCount(mbpBRights) ), 1229 ("BpBWrong ", PopCount(mbpBWrongs) ), 1230 ("BpJRight ", PopCount(mbpJRights) ), 1231 ("BpJWrong ", PopCount(mbpJWrongs) ), 1232 ("BpIRight ", PopCount(mbpIRights) ), 1233 ("BpIWrong ", PopCount(mbpIWrongs) ), 1234 ("BpCRight ", PopCount(mbpCRights) ), 1235 ("BpCWrong ", PopCount(mbpCWrongs) ), 1236 ("BpRRight ", PopCount(mbpRRights) ), 1237 ("BpRWrong ", PopCount(mbpRWrongs) ), 1238 ("ftb_false_hit ", PopCount(ftb_false_hit) ), 1239 ("ftb_hit ", PopCount(ftb_hit) ), 1240 ) 1241 1242 for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) { 1243 perf_out.incr_step := RegNext(perf) 1244 } 1245} 1246