1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import xiangshan._ 23import utils._ 24import utility._ 25import freechips.rocketchip.tilelink._ 26import freechips.rocketchip.tilelink.ClientStates._ 27import freechips.rocketchip.tilelink.MemoryOpCategories._ 28import freechips.rocketchip.tilelink.TLPermissions._ 29import difftest._ 30import coupledL2.{AliasKey, DirtyKey, PrefetchKey} 31import mem.{AddPipelineReg} 32import mem.trace._ 33 34class MissReqWoStoreData(implicit p: Parameters) extends DCacheBundle { 35 val source = UInt(sourceTypeWidth.W) 36 val cmd = UInt(M_SZ.W) 37 val addr = UInt(PAddrBits.W) 38 val vaddr = UInt(VAddrBits.W) 39 val way_en = UInt(DCacheWays.W) 40 val pc = UInt(VAddrBits.W) 41 42 // store 43 val full_overwrite = Bool() 44 45 // which word does amo work on? 46 val word_idx = UInt(log2Up(blockWords).W) 47 val amo_data = UInt(DataBits.W) 48 val amo_mask = UInt((DataBits / 8).W) 49 50 val req_coh = new ClientMetadata 51 val replace_coh = new ClientMetadata 52 val replace_tag = UInt(tagBits.W) 53 val id = UInt(reqIdWidth.W) 54 55 // For now, miss queue entry req is actually valid when req.valid && !cancel 56 // * req.valid is fast to generate 57 // * cancel is slow to generate, it will not be used until the last moment 58 // 59 // cancel may come from the following sources: 60 // 1. miss req blocked by writeback queue: 61 // a writeback req of the same address is in progress 62 // 2. pmp check failed 63 val cancel = Bool() // cancel is slow to generate, it will cancel missreq.valid 64 65 // Req source decode 66 // Note that req source is NOT cmd type 67 // For instance, a req which isFromPrefetch may have R or W cmd 68 def isFromLoad = source === LOAD_SOURCE.U 69 def isFromStore = source === STORE_SOURCE.U 70 def isFromAMO = source === AMO_SOURCE.U 71 def isFromPrefetch = source >= DCACHE_PREFETCH_SOURCE.U 72 def hit = req_coh.isValid() 73} 74 75class MissReqStoreData(implicit p: Parameters) extends DCacheBundle { 76 // store data and store mask will be written to miss queue entry 77 // 1 cycle after req.fire() and meta write 78 val store_data = UInt((cfg.blockBytes * 8).W) 79 val store_mask = UInt(cfg.blockBytes.W) 80} 81 82class MissReq(implicit p: Parameters) extends MissReqWoStoreData { 83 // store data and store mask will be written to miss queue entry 84 // 1 cycle after req.fire() and meta write 85 val store_data = UInt((cfg.blockBytes * 8).W) 86 val store_mask = UInt(cfg.blockBytes.W) 87 88 def toMissReqStoreData(): MissReqStoreData = { 89 val out = Wire(new MissReqStoreData) 90 out.store_data := store_data 91 out.store_mask := store_mask 92 out 93 } 94 95 def toMissReqWoStoreData(): MissReqWoStoreData = { 96 val out = Wire(new MissReqWoStoreData) 97 out.source := source 98 out.cmd := cmd 99 out.addr := addr 100 out.vaddr := vaddr 101 out.way_en := way_en 102 out.full_overwrite := full_overwrite 103 out.word_idx := word_idx 104 out.amo_data := amo_data 105 out.amo_mask := amo_mask 106 out.req_coh := req_coh 107 out.replace_coh := replace_coh 108 out.replace_tag := replace_tag 109 out.id := id 110 out.cancel := cancel 111 out.pc := pc 112 out 113 } 114} 115 116class MissResp(implicit p: Parameters) extends DCacheBundle { 117 val id = UInt(log2Up(cfg.nMissEntries).W) 118 // cache miss request is handled by miss queue, either merged or newly allocated 119 val handled = Bool() 120 // cache req missed, merged into one of miss queue entries 121 // i.e. !miss_merged means this access is the first miss for this cacheline 122 val merged = Bool() 123 val repl_way_en = UInt(DCacheWays.W) 124} 125 126 127/** 128 * miss queue enq logic: enq is now splited into 2 cycles 129 * +---------------------------------------------------------------------+ pipeline reg +-------------------------+ 130 * + s0: enq source arbiter, judge mshr alloc or merge + +-------+ + s1: real alloc or merge + 131 * + +-----+ primary_fire? -> + | alloc | + + 132 * + mainpipe -> req0 -> | | secondary_fire? -> + | merge | + + 133 * + loadpipe0 -> req1 -> | arb | -> req -> + -> | req | -> + + 134 * + loadpipe1 -> req2 -> | | mshr id -> + | id | + + 135 * + +-----+ + +-------+ + + 136 * +---------------------------------------------------------------------+ +-------------------------+ 137 */ 138 139// a pipeline reg between MissReq and MissEntry 140class MissReqPipeRegBundle(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheBundle { 141 val req = new MissReq 142 // this request is about to merge to an existing mshr 143 val merge = Bool() 144 // this request is about to allocate a new mshr 145 val alloc = Bool() 146 val mshr_id = UInt(log2Up(cfg.nMissEntries).W) 147 148 def reg_valid(): Bool = { 149 (merge || alloc) 150 } 151 152 def reject_req(new_req: MissReq): Bool = { 153 val set_match = addr_to_dcache_set(req.vaddr) === addr_to_dcache_set(new_req.vaddr) 154 val way_match = req.way_en === new_req.way_en 155 reg_valid() && set_match && way_match 156 } 157 158 def merge_req(new_req: MissReq): Bool = { 159 val block_match = get_block(req.addr) === get_block(new_req.addr) 160 val merge_load = (req.isFromLoad || req.isFromStore || req.isFromPrefetch) && new_req.isFromLoad 161 // store merge to a store is disabled, sbuffer should avoid this situation, as store to same address should preserver their program order to match memory model 162 val merge_store = (req.isFromLoad || req.isFromPrefetch) && new_req.isFromStore 163 Mux(alloc, block_match && (merge_load || merge_store), false.B) 164 } 165 166 // send out acquire as soon as possible 167 // if a new store miss req is about to merge into this pipe reg, don't send acquire now 168 def can_send_acquire(valid: Bool, new_req: MissReq): Bool = { 169 alloc && (!valid || !merge_req(new_req) || !new_req.isFromStore) 170 } 171 172 def get_acquire(l2_pf_store_only: Bool): TLBundleA = { 173 val acquire = Wire(new TLBundleA(edge.bundle)) 174 val grow_param = req.req_coh.onAccess(req.cmd)._2 175 val acquireBlock = edge.AcquireBlock( 176 fromSource = mshr_id, 177 toAddress = get_block_addr(req.addr), 178 lgSize = (log2Up(cfg.blockBytes)).U, 179 growPermissions = grow_param 180 )._2 181 val acquirePerm = edge.AcquirePerm( 182 fromSource = mshr_id, 183 toAddress = get_block_addr(req.addr), 184 lgSize = (log2Up(cfg.blockBytes)).U, 185 growPermissions = grow_param 186 )._2 187 acquire := Mux(req.full_overwrite, acquirePerm, acquireBlock) 188 // resolve cache alias by L2 189 acquire.user.lift(AliasKey).foreach( _ := req.vaddr(13, 12)) 190 // trigger prefetch 191 acquire.user.lift(PrefetchKey).foreach(_ := Mux(l2_pf_store_only, req.isFromStore, true.B)) 192 // req source 193 when(req.isFromLoad) { 194 acquire.user.lift(ReqSourceKey).foreach(_ := MemReqSource.CPULoadData.id.U) 195 }.elsewhen(req.isFromStore) { 196 acquire.user.lift(ReqSourceKey).foreach(_ := MemReqSource.CPUStoreData.id.U) 197 }.elsewhen(req.isFromAMO) { 198 acquire.user.lift(ReqSourceKey).foreach(_ := MemReqSource.CPUAtomicData.id.U) 199 }.otherwise { 200 acquire.user.lift(ReqSourceKey).foreach(_ := MemReqSource.L1DataPrefetch.id.U) 201 } 202 203 acquire 204 } 205} 206 207class MissEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule { 208 val io = IO(new Bundle() { 209 val hartId = Input(UInt(8.W)) 210 // MSHR ID 211 val id = Input(UInt(log2Up(cfg.nMissEntries).W)) 212 // client requests 213 // MSHR update request, MSHR state and addr will be updated when req.fire() 214 val req = Flipped(ValidIO(new MissReqWoStoreData)) 215 // pipeline reg 216 val miss_req_pipe_reg = Input(new MissReqPipeRegBundle(edge)) 217 // allocate this entry for new req 218 val primary_valid = Input(Bool()) 219 // this entry is free and can be allocated to new reqs 220 val primary_ready = Output(Bool()) 221 // this entry is busy, but it can merge the new req 222 val secondary_ready = Output(Bool()) 223 // this entry is busy and it can not merge the new req 224 val secondary_reject = Output(Bool()) 225 // way selected for replacing, used to support plru update 226 val repl_way_en = Output(UInt(DCacheWays.W)) 227 228 // bus 229 val mem_acquire = DecoupledIO(new TLBundleA(edge.bundle)) 230 val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle))) 231 val mem_finish = DecoupledIO(new TLBundleE(edge.bundle)) 232 233 // send refill info to load queue 234 val refill_to_ldq = ValidIO(new Refill) 235 236 // refill pipe 237 val refill_pipe_req = DecoupledIO(new RefillPipeReq) 238 val refill_pipe_resp = Input(Bool()) 239 240 // replace pipe 241 val replace_pipe_req = DecoupledIO(new MainPipeReq) 242 val replace_pipe_resp = Input(Bool()) 243 244 // main pipe: amo miss 245 val main_pipe_req = DecoupledIO(new MainPipeReq) 246 val main_pipe_resp = Input(Bool()) 247 248 val block_addr = ValidIO(UInt(PAddrBits.W)) 249 250 val debug_early_replace = ValidIO(new Bundle() { 251 // info about the block that has been replaced 252 val idx = UInt(idxBits.W) // vaddr 253 val tag = UInt(tagBits.W) // paddr 254 }) 255 256 val req_handled_by_this_entry = Output(Bool()) 257 258 val forwardInfo = Output(new MissEntryForwardIO) 259 val l2_pf_store_only = Input(Bool()) 260 261 // whether the pipeline reg has send out an acquire 262 val acquire_fired_by_pipe_reg = Input(Bool()) 263 264 val perf_pending_prefetch = Output(Bool()) 265 val perf_pending_normal = Output(Bool()) 266 267 val rob_head_query = new DCacheBundle { 268 val vaddr = Input(UInt(VAddrBits.W)) 269 val query_valid = Input(Bool()) 270 271 val resp = Output(Bool()) 272 273 def hit(e_vaddr: UInt): Bool = { 274 require(e_vaddr.getWidth == VAddrBits) 275 query_valid && vaddr(VAddrBits - 1, DCacheLineOffset) === e_vaddr(VAddrBits - 1, DCacheLineOffset) 276 } 277 } 278 279 val latency_monitor = new DCacheBundle { 280 val load_miss_refilling = Output(Bool()) 281 val store_miss_refilling = Output(Bool()) 282 val amo_miss_refilling = Output(Bool()) 283 val pf_miss_refilling = Output(Bool()) 284 } 285 }) 286 287 assert(!RegNext(io.primary_valid && !io.primary_ready)) 288 289 val req = Reg(new MissReqWoStoreData) 290 val req_primary_fire = Reg(new MissReqWoStoreData) // for perf use 291 val req_store_mask = Reg(UInt(cfg.blockBytes.W)) 292 val req_valid = RegInit(false.B) 293 val set = addr_to_dcache_set(req.vaddr) 294 295 val miss_req_pipe_reg_bits = io.miss_req_pipe_reg.req 296 297 val input_req_is_prefetch = isPrefetch(miss_req_pipe_reg_bits.cmd) 298 299 val s_acquire = RegInit(true.B) 300 val s_grantack = RegInit(true.B) 301 val s_replace_req = RegInit(true.B) 302 val s_refill = RegInit(true.B) 303 val s_mainpipe_req = RegInit(true.B) 304 305 val w_grantfirst = RegInit(true.B) 306 val w_grantlast = RegInit(true.B) 307 val w_replace_resp = RegInit(true.B) 308 val w_refill_resp = RegInit(true.B) 309 val w_mainpipe_resp = RegInit(true.B) 310 311 val release_entry = s_grantack && w_refill_resp && w_mainpipe_resp 312 313 val acquire_not_sent = !s_acquire && !io.mem_acquire.ready 314 val data_not_refilled = !w_grantfirst 315 316 val error = RegInit(false.B) 317 val prefetch = RegInit(false.B) 318 val access = RegInit(false.B) 319 320 val should_refill_data_reg = Reg(Bool()) 321 val should_refill_data = WireInit(should_refill_data_reg) 322 323 // val full_overwrite = req.isFromStore && req_store_mask.andR 324 val full_overwrite = Reg(Bool()) 325 326 val (_, _, refill_done, refill_count) = edge.count(io.mem_grant) 327 val grant_param = Reg(UInt(TLPermissions.bdWidth.W)) 328 329 // refill data with store data, this reg will be used to store: 330 // 1. store data (if needed), before l2 refill data 331 // 2. store data and l2 refill data merged result (i.e. new cacheline taht will be write to data array) 332 val refill_and_store_data = Reg(Vec(blockRows, UInt(rowBits.W))) 333 // raw data refilled to l1 by l2 334 val refill_data_raw = Reg(Vec(blockBytes/beatBytes, UInt(beatBits.W))) 335 336 // allocate current miss queue entry for a miss req 337 val primary_fire = WireInit(io.req.valid && io.primary_ready && io.primary_valid && !io.req.bits.cancel) 338 // merge miss req to current miss queue entry 339 val secondary_fire = WireInit(io.req.valid && io.secondary_ready && !io.req.bits.cancel) 340 341 val req_handled_by_this_entry = primary_fire || secondary_fire 342 343 // for perf use 344 val secondary_fired = RegInit(false.B) 345 346 io.perf_pending_prefetch := req_valid && prefetch && !secondary_fired 347 io.perf_pending_normal := req_valid && (!prefetch || secondary_fired) 348 349 io.rob_head_query.resp := io.rob_head_query.hit(req.vaddr) && req_valid 350 351 io.req_handled_by_this_entry := req_handled_by_this_entry 352 353 when (release_entry && req_valid) { 354 req_valid := false.B 355 } 356 357 when (io.miss_req_pipe_reg.alloc) { 358 assert(RegNext(primary_fire), "after 1 cycle of primary_fire, entry will be allocated") 359 req_valid := true.B 360 361 req := miss_req_pipe_reg_bits.toMissReqWoStoreData() 362 req_primary_fire := miss_req_pipe_reg_bits.toMissReqWoStoreData() 363 req.addr := get_block_addr(miss_req_pipe_reg_bits.addr) 364 365 s_acquire := io.acquire_fired_by_pipe_reg 366 s_grantack := false.B 367 368 w_grantfirst := false.B 369 w_grantlast := false.B 370 371 when(miss_req_pipe_reg_bits.isFromStore) { 372 req_store_mask := miss_req_pipe_reg_bits.store_mask 373 for (i <- 0 until blockRows) { 374 refill_and_store_data(i) := miss_req_pipe_reg_bits.store_data(rowBits * (i + 1) - 1, rowBits * i) 375 } 376 } 377 full_overwrite := miss_req_pipe_reg_bits.isFromStore && miss_req_pipe_reg_bits.full_overwrite 378 379 when (!miss_req_pipe_reg_bits.isFromAMO) { 380 s_refill := false.B 381 w_refill_resp := false.B 382 } 383 384 when (!miss_req_pipe_reg_bits.hit && miss_req_pipe_reg_bits.replace_coh.isValid() && !miss_req_pipe_reg_bits.isFromAMO) { 385 s_replace_req := false.B 386 w_replace_resp := false.B 387 } 388 389 when (miss_req_pipe_reg_bits.isFromAMO) { 390 s_mainpipe_req := false.B 391 w_mainpipe_resp := false.B 392 } 393 394 should_refill_data_reg := miss_req_pipe_reg_bits.isFromLoad 395 error := false.B 396 prefetch := input_req_is_prefetch 397 access := false.B 398 secondary_fired := false.B 399 } 400 401 when (io.miss_req_pipe_reg.merge) { 402 assert(RegNext(secondary_fire) || RegNext(RegNext(primary_fire)), "after 1 cycle of secondary_fire or 2 cycle of primary_fire, entry will be merged") 403 assert(miss_req_pipe_reg_bits.req_coh.state <= req.req_coh.state || (prefetch && !access)) 404 assert(!(miss_req_pipe_reg_bits.isFromAMO || req.isFromAMO)) 405 // use the most uptodate meta 406 req.req_coh := miss_req_pipe_reg_bits.req_coh 407 408 when (miss_req_pipe_reg_bits.isFromStore) { 409 req := miss_req_pipe_reg_bits 410 req.addr := get_block_addr(miss_req_pipe_reg_bits.addr) 411 // vaddr?? 412 req.vaddr := req.vaddr 413 req.way_en := req.way_en 414 req.replace_coh := req.replace_coh 415 req.replace_tag := req.replace_tag 416 req_store_mask := miss_req_pipe_reg_bits.store_mask 417 for (i <- 0 until blockRows) { 418 refill_and_store_data(i) := miss_req_pipe_reg_bits.store_data(rowBits * (i + 1) - 1, rowBits * i) 419 } 420 full_overwrite := miss_req_pipe_reg_bits.isFromStore && miss_req_pipe_reg_bits.full_overwrite 421 } 422 423 should_refill_data := should_refill_data_reg || miss_req_pipe_reg_bits.isFromLoad 424 should_refill_data_reg := should_refill_data 425 when (!input_req_is_prefetch) { 426 access := true.B // when merge non-prefetch req, set access bit 427 } 428 secondary_fired := true.B 429 } 430 431 when (io.mem_acquire.fire()) { 432 s_acquire := true.B 433 } 434 435 // merge data refilled by l2 and store data, update miss queue entry, gen refill_req 436 val new_data = Wire(Vec(blockRows, UInt(rowBits.W))) 437 val new_mask = Wire(Vec(blockRows, UInt(rowBytes.W))) 438 // merge refilled data and store data (if needed) 439 def mergePutData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = { 440 val full_wmask = FillInterleaved(8, wmask) 441 (~full_wmask & old_data | full_wmask & new_data) 442 } 443 for (i <- 0 until blockRows) { 444 // new_data(i) := req.store_data(rowBits * (i + 1) - 1, rowBits * i) 445 new_data(i) := refill_and_store_data(i) 446 // we only need to merge data for Store 447 new_mask(i) := Mux(req.isFromStore, req_store_mask(rowBytes * (i + 1) - 1, rowBytes * i), 0.U) 448 } 449 450 val hasData = RegInit(true.B) 451 val isDirty = RegInit(false.B) 452 when (io.mem_grant.fire()) { 453 w_grantfirst := true.B 454 grant_param := io.mem_grant.bits.param 455 when (edge.hasData(io.mem_grant.bits)) { 456 // GrantData 457 for (i <- 0 until beatRows) { 458 val idx = (refill_count << log2Floor(beatRows)) + i.U 459 val grant_row = io.mem_grant.bits.data(rowBits * (i + 1) - 1, rowBits * i) 460 refill_and_store_data(idx) := mergePutData(grant_row, new_data(idx), new_mask(idx)) 461 } 462 w_grantlast := w_grantlast || refill_done 463 hasData := true.B 464 }.otherwise { 465 // Grant 466 assert(full_overwrite) 467 for (i <- 0 until blockRows) { 468 refill_and_store_data(i) := new_data(i) 469 } 470 w_grantlast := true.B 471 hasData := false.B 472 } 473 474 error := io.mem_grant.bits.denied || io.mem_grant.bits.corrupt || error 475 476 refill_data_raw(refill_count) := io.mem_grant.bits.data 477 isDirty := io.mem_grant.bits.echo.lift(DirtyKey).getOrElse(false.B) 478 } 479 480 when (io.mem_finish.fire()) { 481 s_grantack := true.B 482 } 483 484 when (io.replace_pipe_req.fire()) { 485 s_replace_req := true.B 486 } 487 488 when (io.replace_pipe_resp) { 489 w_replace_resp := true.B 490 } 491 492 when (io.refill_pipe_req.fire()) { 493 s_refill := true.B 494 } 495 496 when (io.refill_pipe_resp) { 497 w_refill_resp := true.B 498 } 499 500 when (io.main_pipe_req.fire()) { 501 s_mainpipe_req := true.B 502 } 503 504 when (io.main_pipe_resp) { 505 w_mainpipe_resp := true.B 506 } 507 508 def before_req_sent_can_merge(new_req: MissReqWoStoreData): Bool = { 509 acquire_not_sent && (req.isFromLoad || req.isFromPrefetch) && (new_req.isFromLoad || new_req.isFromStore) 510 } 511 512 def before_data_refill_can_merge(new_req: MissReqWoStoreData): Bool = { 513 data_not_refilled && (req.isFromLoad || req.isFromStore || req.isFromPrefetch) && new_req.isFromLoad 514 } 515 516 // Note that late prefetch will be ignored 517 518 def should_merge(new_req: MissReqWoStoreData): Bool = { 519 val block_match = get_block(req.addr) === get_block(new_req.addr) 520 block_match && 521 ( 522 before_req_sent_can_merge(new_req) || 523 before_data_refill_can_merge(new_req) 524 ) 525 } 526 527 // store can be merged before io.mem_acquire.fire() 528 // store can not be merged the cycle that io.mem_acquire.fire() 529 // load can be merged before io.mem_grant.fire() 530 // 531 // TODO: merge store if possible? mem_acquire may need to be re-issued, 532 // but sbuffer entry can be freed 533 def should_reject(new_req: MissReqWoStoreData): Bool = { 534 val block_match = get_block(req.addr) === get_block(new_req.addr) 535 val set_match = set === addr_to_dcache_set(new_req.vaddr) 536 537 req_valid && 538 Mux( 539 block_match, 540 !before_req_sent_can_merge(new_req) && 541 !before_data_refill_can_merge(new_req), 542 set_match && new_req.way_en === req.way_en 543 ) 544 } 545 546 // req_valid will be updated 1 cycle after primary_fire, so next cycle, this entry cannot accept a new req 547 io.primary_ready := !req_valid && !RegNext(primary_fire) 548 io.secondary_ready := should_merge(io.req.bits) 549 io.secondary_reject := should_reject(io.req.bits) 550 io.repl_way_en := req.way_en 551 552 // should not allocate, merge or reject at the same time 553 assert(RegNext(PopCount(Seq(io.primary_ready, io.secondary_ready, io.secondary_reject)) <= 1.U)) 554 555 val refill_data_splited = WireInit(VecInit(Seq.tabulate(cfg.blockBytes * 8 / l1BusDataWidth)(i => { 556 val data = refill_and_store_data.asUInt 557 data((i + 1) * l1BusDataWidth - 1, i * l1BusDataWidth) 558 }))) 559 // when granted data is all ready, wakeup lq's miss load 560 io.refill_to_ldq.valid := RegNext(!w_grantlast && io.mem_grant.fire()) 561 io.refill_to_ldq.bits.addr := RegNext(req.addr + (refill_count << refillOffBits)) 562 io.refill_to_ldq.bits.data := refill_data_splited(RegNext(refill_count)) 563 io.refill_to_ldq.bits.error := RegNext(io.mem_grant.bits.corrupt || io.mem_grant.bits.denied) 564 io.refill_to_ldq.bits.refill_done := RegNext(refill_done && io.mem_grant.fire()) 565 io.refill_to_ldq.bits.hasdata := hasData 566 io.refill_to_ldq.bits.data_raw := refill_data_raw.asUInt 567 io.refill_to_ldq.bits.id := io.id 568 569 // if the entry has a pending merge req, wait for it 570 // Note: now, only wait for store, because store may acquire T 571 // TODO: support prefetch 572 io.mem_acquire.valid := !s_acquire && !(io.miss_req_pipe_reg.merge && miss_req_pipe_reg_bits.isFromStore) 573 val grow_param = req.req_coh.onAccess(req.cmd)._2 574 val acquireBlock = edge.AcquireBlock( 575 fromSource = io.id, 576 toAddress = req.addr, 577 lgSize = (log2Up(cfg.blockBytes)).U, 578 growPermissions = grow_param 579 )._2 580 val acquirePerm = edge.AcquirePerm( 581 fromSource = io.id, 582 toAddress = req.addr, 583 lgSize = (log2Up(cfg.blockBytes)).U, 584 growPermissions = grow_param 585 )._2 586 io.mem_acquire.bits := Mux(full_overwrite, acquirePerm, acquireBlock) 587 // resolve cache alias by L2 588 io.mem_acquire.bits.user.lift(AliasKey).foreach( _ := req.vaddr(13, 12)) 589 // trigger prefetch 590 io.mem_acquire.bits.user.lift(PrefetchKey).foreach(_ := Mux(io.l2_pf_store_only, req.isFromStore, true.B)) 591 // req source 592 when(prefetch && !secondary_fired) { 593 io.mem_acquire.bits.user.lift(ReqSourceKey).foreach(_ := MemReqSource.L1DataPrefetch.id.U) 594 }.otherwise { 595 when(req.isFromStore) { 596 io.mem_acquire.bits.user.lift(ReqSourceKey).foreach(_ := MemReqSource.CPUStoreData.id.U) 597 }.elsewhen(req.isFromLoad) { 598 io.mem_acquire.bits.user.lift(ReqSourceKey).foreach(_ := MemReqSource.CPULoadData.id.U) 599 }.elsewhen(req.isFromAMO) { 600 io.mem_acquire.bits.user.lift(ReqSourceKey).foreach(_ := MemReqSource.CPUAtomicData.id.U) 601 }.otherwise { 602 io.mem_acquire.bits.user.lift(ReqSourceKey).foreach(_ := MemReqSource.L1DataPrefetch.id.U) 603 } 604 } 605 require(nSets <= 256) 606 607 io.mem_grant.ready := !w_grantlast && s_acquire 608 609 val grantack = RegEnable(edge.GrantAck(io.mem_grant.bits), io.mem_grant.fire()) 610 assert(RegNext(!io.mem_grant.fire() || edge.isRequest(io.mem_grant.bits))) 611 io.mem_finish.valid := !s_grantack && w_grantfirst 612 io.mem_finish.bits := grantack 613 614 io.replace_pipe_req.valid := !s_replace_req 615 val replace = io.replace_pipe_req.bits 616 replace := DontCare 617 replace.miss := false.B 618 replace.miss_id := io.id 619 replace.miss_dirty := false.B 620 replace.probe := false.B 621 replace.probe_need_data := false.B 622 replace.source := LOAD_SOURCE.U 623 replace.vaddr := req.vaddr // only untag bits are needed 624 replace.addr := Cat(req.replace_tag, 0.U(pgUntagBits.W)) // only tag bits are needed 625 replace.store_mask := 0.U 626 replace.replace := true.B 627 replace.replace_way_en := req.way_en 628 replace.error := false.B 629 630 io.refill_pipe_req.valid := !s_refill && w_replace_resp && w_grantlast 631 val refill = io.refill_pipe_req.bits 632 refill.source := req.source 633 refill.vaddr := req.vaddr 634 refill.addr := req.addr 635 refill.way_en := req.way_en 636 refill.wmask := Mux( 637 hasData || req.isFromLoad, 638 ~0.U(DCacheBanks.W), 639 VecInit((0 until DCacheBanks).map(i => get_mask_of_bank(i, req_store_mask).orR)).asUInt 640 ) 641 refill.data := refill_and_store_data.asTypeOf((new RefillPipeReq).data) 642 refill.miss_id := io.id 643 refill.id := req.id 644 def missCohGen(cmd: UInt, param: UInt, dirty: Bool) = { 645 val c = categorize(cmd) 646 MuxLookup(Cat(c, param, dirty), Nothing, Seq( 647 //(effect param) -> (next) 648 Cat(rd, toB, false.B) -> Branch, 649 Cat(rd, toB, true.B) -> Branch, 650 Cat(rd, toT, false.B) -> Trunk, 651 Cat(rd, toT, true.B) -> Dirty, 652 Cat(wi, toT, false.B) -> Trunk, 653 Cat(wi, toT, true.B) -> Dirty, 654 Cat(wr, toT, false.B) -> Dirty, 655 Cat(wr, toT, true.B) -> Dirty)) 656 } 657 refill.meta.coh := ClientMetadata(missCohGen(req.cmd, grant_param, isDirty)) 658 refill.error := error 659 refill.prefetch := prefetch 660 refill.access := access 661 refill.alias := req.vaddr(13, 12) // TODO 662 663 io.main_pipe_req.valid := !s_mainpipe_req && w_grantlast 664 io.main_pipe_req.bits := DontCare 665 io.main_pipe_req.bits.miss := true.B 666 io.main_pipe_req.bits.miss_id := io.id 667 io.main_pipe_req.bits.miss_param := grant_param 668 io.main_pipe_req.bits.miss_dirty := isDirty 669 io.main_pipe_req.bits.miss_way_en := req.way_en 670 io.main_pipe_req.bits.probe := false.B 671 io.main_pipe_req.bits.source := req.source 672 io.main_pipe_req.bits.cmd := req.cmd 673 io.main_pipe_req.bits.vaddr := req.vaddr 674 io.main_pipe_req.bits.addr := req.addr 675 io.main_pipe_req.bits.store_data := refill_and_store_data.asUInt 676 io.main_pipe_req.bits.store_mask := ~0.U(blockBytes.W) 677 io.main_pipe_req.bits.word_idx := req.word_idx 678 io.main_pipe_req.bits.amo_data := req.amo_data 679 io.main_pipe_req.bits.amo_mask := req.amo_mask 680 io.main_pipe_req.bits.error := error 681 io.main_pipe_req.bits.id := req.id 682 683 io.block_addr.valid := req_valid && w_grantlast && !w_refill_resp 684 io.block_addr.bits := req.addr 685 686 io.debug_early_replace.valid := BoolStopWatch(io.replace_pipe_resp, io.refill_pipe_req.fire()) 687 io.debug_early_replace.bits.idx := addr_to_dcache_set(req.vaddr) 688 io.debug_early_replace.bits.tag := req.replace_tag 689 690 io.forwardInfo.apply(req_valid, req.addr, refill_data_raw, w_grantfirst, w_grantlast) 691 692 // refill latency monitor 693 io.latency_monitor.load_miss_refilling := req_valid && req_primary_fire.isFromLoad && BoolStopWatch(io.mem_acquire.fire, io.mem_grant.fire && !refill_done, true) 694 io.latency_monitor.store_miss_refilling := req_valid && req_primary_fire.isFromStore && BoolStopWatch(io.mem_acquire.fire, io.mem_grant.fire && !refill_done, true) 695 io.latency_monitor.amo_miss_refilling := req_valid && req_primary_fire.isFromAMO && BoolStopWatch(io.mem_acquire.fire, io.mem_grant.fire && !refill_done, true) 696 io.latency_monitor.pf_miss_refilling := req_valid && req_primary_fire.isFromPrefetch && BoolStopWatch(io.mem_acquire.fire, io.mem_grant.fire && !refill_done, true) 697 698 XSPerfAccumulate("miss_req_primary", primary_fire) 699 XSPerfAccumulate("miss_req_merged", secondary_fire) 700 XSPerfAccumulate("load_miss_penalty_to_use", 701 should_refill_data && 702 BoolStopWatch(primary_fire, io.refill_to_ldq.valid, true) 703 ) 704 XSPerfAccumulate("main_pipe_penalty", BoolStopWatch(io.main_pipe_req.fire(), io.main_pipe_resp)) 705 XSPerfAccumulate("penalty_blocked_by_channel_A", io.mem_acquire.valid && !io.mem_acquire.ready) 706 XSPerfAccumulate("penalty_waiting_for_channel_D", s_acquire && !w_grantlast && !io.mem_grant.valid) 707 XSPerfAccumulate("penalty_waiting_for_channel_E", io.mem_finish.valid && !io.mem_finish.ready) 708 XSPerfAccumulate("penalty_from_grant_to_refill", !w_refill_resp && w_grantlast) 709 XSPerfAccumulate("prefetch_req_primary", primary_fire && io.req.bits.source === DCACHE_PREFETCH_SOURCE.U) 710 XSPerfAccumulate("prefetch_req_merged", secondary_fire && io.req.bits.source === DCACHE_PREFETCH_SOURCE.U) 711 712 val (mshr_penalty_sample, mshr_penalty) = TransactionLatencyCounter(RegNext(RegNext(primary_fire)), release_entry) 713 XSPerfHistogram("miss_penalty", mshr_penalty, mshr_penalty_sample, 0, 20, 1, true, true) 714 XSPerfHistogram("miss_penalty", mshr_penalty, mshr_penalty_sample, 20, 100, 10, true, false) 715 716 val load_miss_begin = primary_fire && io.req.bits.isFromLoad 717 val refill_finished = RegNext(!w_grantlast && refill_done) && should_refill_data 718 val (load_miss_penalty_sample, load_miss_penalty) = TransactionLatencyCounter(load_miss_begin, refill_finished) // not real refill finish time 719 XSPerfHistogram("load_miss_penalty_to_use", load_miss_penalty, load_miss_penalty_sample, 0, 20, 1, true, true) 720 XSPerfHistogram("load_miss_penalty_to_use", load_miss_penalty, load_miss_penalty_sample, 20, 100, 10, true, false) 721 722 val (a_to_d_penalty_sample, a_to_d_penalty) = TransactionLatencyCounter(io.mem_acquire.fire(), io.mem_grant.fire() && refill_done) 723 XSPerfHistogram("a_to_d_penalty", a_to_d_penalty, a_to_d_penalty_sample, 0, 20, 1, true, true) 724 XSPerfHistogram("a_to_d_penalty", a_to_d_penalty, a_to_d_penalty_sample, 20, 100, 10, true, false) 725} 726 727class MissQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasPerfEvents { 728 val io = IO(new Bundle { 729 val hartId = Input(UInt(8.W)) 730 val req = Flipped(DecoupledIO(new MissReq)) 731 val resp = Output(new MissResp) 732 val refill_to_ldq = ValidIO(new Refill) 733 734 val mem_acquire = DecoupledIO(new TLBundleA(edge.bundle)) 735 val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle))) 736 val mem_finish = DecoupledIO(new TLBundleE(edge.bundle)) 737 738 val refill_pipe_req = DecoupledIO(new RefillPipeReq) 739 val refill_pipe_req_dup = Vec(nDupStatus, DecoupledIO(new RefillPipeReqCtrl)) 740 val refill_pipe_resp = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W))) 741 742 val replace_pipe_req = DecoupledIO(new MainPipeReq) 743 val replace_pipe_resp = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W))) 744 745 val main_pipe_req = DecoupledIO(new MainPipeReq) 746 val main_pipe_resp = Flipped(ValidIO(new AtomicsResp)) 747 748 // block probe 749 val probe_addr = Input(UInt(PAddrBits.W)) 750 val probe_block = Output(Bool()) 751 752 val full = Output(Bool()) 753 754 // only for performance counter 755 // This is valid when an mshr has finished replacing a block (w_replace_resp), 756 // but hasn't received Grant from L2 (!w_grantlast) 757 val debug_early_replace = Vec(cfg.nMissEntries, ValidIO(new Bundle() { 758 // info about the block that has been replaced 759 val idx = UInt(idxBits.W) // vaddr 760 val tag = UInt(tagBits.W) // paddr 761 })) 762 763 // forward missqueue 764 val forward = Vec(LoadPipelineWidth, new LduToMissqueueForwardIO) 765 val l2_pf_store_only = Input(Bool()) 766 767 val mq_enq_cancel = Output(Bool()) 768 }) 769 770 // 128KBL1: FIXME: provide vaddr for l2 771 772 val entries = Seq.fill(cfg.nMissEntries)(Module(new MissEntry(edge))) 773 774 val miss_req_pipe_reg = RegInit(0.U.asTypeOf(new MissReqPipeRegBundle(edge))) 775 val acquire_from_pipereg = Wire(chiselTypeOf(io.mem_acquire)) 776 777 val primary_ready_vec = entries.map(_.io.primary_ready) 778 val secondary_ready_vec = entries.map(_.io.secondary_ready) 779 val secondary_reject_vec = entries.map(_.io.secondary_reject) 780 val probe_block_vec = entries.map { case e => e.io.block_addr.valid && e.io.block_addr.bits === io.probe_addr } 781 782 val merge = Cat(secondary_ready_vec ++ Seq(miss_req_pipe_reg.merge_req(io.req.bits))).orR 783 val reject = Cat(secondary_reject_vec ++ Seq(miss_req_pipe_reg.reject_req(io.req.bits))).orR 784 val alloc = !reject && !merge && Cat(primary_ready_vec).orR 785 val accept = alloc || merge 786 787 val req_mshr_handled_vec = entries.map(_.io.req_handled_by_this_entry) 788 // merged to pipeline reg 789 val req_pipeline_reg_handled = miss_req_pipe_reg.merge_req(io.req.bits) 790 assert(PopCount(Seq(req_pipeline_reg_handled, VecInit(req_mshr_handled_vec).asUInt.orR)) <= 1.U, "miss req will either go to mshr or pipeline reg") 791 assert(PopCount(req_mshr_handled_vec) <= 1.U, "Only one mshr can handle a req") 792 io.resp.id := Mux(!req_pipeline_reg_handled, OHToUInt(req_mshr_handled_vec), miss_req_pipe_reg.mshr_id) 793 io.resp.handled := Cat(req_mshr_handled_vec).orR || req_pipeline_reg_handled 794 io.resp.merged := merge 795 io.resp.repl_way_en := Mux(!req_pipeline_reg_handled, Mux1H(secondary_ready_vec, entries.map(_.io.repl_way_en)), miss_req_pipe_reg.req.way_en) 796 797 /* MissQueue enq logic is now splitted into 2 cycles 798 * 799 */ 800 miss_req_pipe_reg.req := io.req.bits 801 miss_req_pipe_reg.alloc := alloc && io.req.valid && !io.req.bits.cancel 802 miss_req_pipe_reg.merge := merge && io.req.valid && !io.req.bits.cancel 803 miss_req_pipe_reg.mshr_id := io.resp.id 804 805 assert(PopCount(Seq(alloc && io.req.valid, merge && io.req.valid)) <= 1.U, "allocate and merge a mshr in same cycle!") 806 807 val forwardInfo_vec = VecInit(entries.map(_.io.forwardInfo)) 808 (0 until LoadPipelineWidth).map(i => { 809 val id = io.forward(i).mshrid 810 val req_valid = io.forward(i).valid 811 val paddr = io.forward(i).paddr 812 813 val (forward_mshr, forwardData) = forwardInfo_vec(id).forward(req_valid, paddr) 814 io.forward(i).forward_result_valid := forwardInfo_vec(id).check(req_valid, paddr) 815 io.forward(i).forward_mshr := forward_mshr 816 io.forward(i).forwardData := forwardData 817 }) 818 819 assert(RegNext(PopCount(secondary_ready_vec) <= 1.U)) 820// assert(RegNext(PopCount(secondary_reject_vec) <= 1.U)) 821 // It is possible that one mshr wants to merge a req, while another mshr wants to reject it. 822 // That is, a coming req has the same paddr as that of mshr_0 (merge), 823 // while it has the same set and the same way as mshr_1 (reject). 824 // In this situation, the coming req should be merged by mshr_0 825// assert(RegNext(PopCount(Seq(merge, reject)) <= 1.U)) 826 827 def select_valid_one[T <: Bundle]( 828 in: Seq[DecoupledIO[T]], 829 out: DecoupledIO[T], 830 name: Option[String] = None): Unit = { 831 832 if (name.nonEmpty) { out.suggestName(s"${name.get}_select") } 833 out.valid := Cat(in.map(_.valid)).orR 834 out.bits := ParallelMux(in.map(_.valid) zip in.map(_.bits)) 835 in.map(_.ready := out.ready) 836 assert(!RegNext(out.valid && PopCount(Cat(in.map(_.valid))) > 1.U)) 837 } 838 839 io.mem_grant.ready := false.B 840 841 entries.zipWithIndex.foreach { 842 case (e, i) => 843 val former_primary_ready = if(i == 0) 844 false.B 845 else 846 Cat((0 until i).map(j => entries(j).io.primary_ready)).orR 847 848 e.io.hartId := io.hartId 849 e.io.id := i.U 850 e.io.l2_pf_store_only := io.l2_pf_store_only 851 e.io.req.valid := io.req.valid 852 e.io.primary_valid := io.req.valid && 853 !merge && 854 !reject && 855 !former_primary_ready && 856 e.io.primary_ready 857 e.io.req.bits := io.req.bits.toMissReqWoStoreData() 858 859 e.io.mem_grant.valid := false.B 860 e.io.mem_grant.bits := DontCare 861 when (io.mem_grant.bits.source === i.U) { 862 e.io.mem_grant <> io.mem_grant 863 } 864 865 when(miss_req_pipe_reg.reg_valid() && miss_req_pipe_reg.mshr_id === i.U) { 866 e.io.miss_req_pipe_reg := miss_req_pipe_reg 867 }.otherwise { 868 e.io.miss_req_pipe_reg := DontCare 869 e.io.miss_req_pipe_reg.merge := false.B 870 e.io.miss_req_pipe_reg.alloc := false.B 871 } 872 873 e.io.acquire_fired_by_pipe_reg := acquire_from_pipereg.fire 874 875 e.io.refill_pipe_resp := io.refill_pipe_resp.valid && io.refill_pipe_resp.bits === i.U 876 e.io.replace_pipe_resp := io.replace_pipe_resp.valid && io.replace_pipe_resp.bits === i.U 877 e.io.main_pipe_resp := io.main_pipe_resp.valid && io.main_pipe_resp.bits.ack_miss_queue && io.main_pipe_resp.bits.miss_id === i.U 878 879 io.debug_early_replace(i) := e.io.debug_early_replace 880 e.io.main_pipe_req.ready := io.main_pipe_req.ready 881 } 882 883 io.req.ready := accept 884 io.mq_enq_cancel := io.req.bits.cancel 885 io.refill_to_ldq.valid := Cat(entries.map(_.io.refill_to_ldq.valid)).orR 886 io.refill_to_ldq.bits := ParallelMux(entries.map(_.io.refill_to_ldq.valid) zip entries.map(_.io.refill_to_ldq.bits)) 887 888 acquire_from_pipereg.valid := miss_req_pipe_reg.can_send_acquire(io.req.valid, io.req.bits) 889 acquire_from_pipereg.bits := miss_req_pipe_reg.get_acquire(io.l2_pf_store_only) 890 891 XSPerfAccumulate("acquire_fire_from_pipereg", acquire_from_pipereg.fire) 892 XSPerfAccumulate("pipereg_valid", miss_req_pipe_reg.reg_valid()) 893 894 val acquire_sources = Seq(acquire_from_pipereg) ++ entries.map(_.io.mem_acquire) 895 TLArbiter.lowest(edge, io.mem_acquire, acquire_sources:_*) 896 TLArbiter.lowest(edge, io.mem_finish, entries.map(_.io.mem_finish):_*) 897 898 // arbiter_with_pipereg_N_dup(entries.map(_.io.refill_pipe_req), io.refill_pipe_req, 899 // io.refill_pipe_req_dup, 900 // Some("refill_pipe_req")) 901 val out_refill_pipe_req = Wire(Decoupled(new RefillPipeReq)) 902 val out_refill_pipe_req_ctrl = Wire(Decoupled(new RefillPipeReqCtrl)) 903 out_refill_pipe_req_ctrl.valid := out_refill_pipe_req.valid 904 out_refill_pipe_req_ctrl.bits := out_refill_pipe_req.bits.getCtrl 905 out_refill_pipe_req.ready := out_refill_pipe_req_ctrl.ready 906 arbiter(entries.map(_.io.refill_pipe_req), out_refill_pipe_req, Some("refill_pipe_req")) 907 for (dup <- io.refill_pipe_req_dup) { 908 AddPipelineReg(out_refill_pipe_req_ctrl, dup, false.B) 909 } 910 AddPipelineReg(out_refill_pipe_req, io.refill_pipe_req, false.B) 911 912 arbiter_with_pipereg(entries.map(_.io.replace_pipe_req), io.replace_pipe_req, Some("replace_pipe_req")) 913 914 // amo's main pipe req out 915 val main_pipe_req_vec = entries.map(_.io.main_pipe_req) 916 io.main_pipe_req.valid := VecInit(main_pipe_req_vec.map(_.valid)).asUInt.orR 917 io.main_pipe_req.bits := Mux1H(main_pipe_req_vec.map(_.valid), main_pipe_req_vec.map(_.bits)) 918 assert(PopCount(VecInit(main_pipe_req_vec.map(_.valid))) <= 1.U, "multi main pipe req") 919 920 io.probe_block := Cat(probe_block_vec).orR 921 922 io.full := ~Cat(entries.map(_.io.primary_ready)).andR 923 924 // L1MissTrace Chisel DB 925 val debug_miss_trace = Wire(new L1MissTrace) 926 debug_miss_trace.vaddr := io.req.bits.vaddr 927 debug_miss_trace.paddr := io.req.bits.addr 928 debug_miss_trace.source := io.req.bits.source 929 debug_miss_trace.pc := io.req.bits.pc 930 931 val isWriteL1MissQMissTable = WireInit(Constantin.createRecord("isWriteL1MissQMissTable" + p(XSCoreParamsKey).HartId.toString)) 932 val table = ChiselDB.createTable("L1MissQMissTrace_hart"+ p(XSCoreParamsKey).HartId.toString, new L1MissTrace) 933 table.log(debug_miss_trace, isWriteL1MissQMissTable.orR && io.req.valid && !io.req.bits.cancel && alloc, "MissQueue", clock, reset) 934 935 // Difftest 936 if (env.EnableDifftest) { 937 val difftest = Module(new DifftestRefillEvent) 938 difftest.io.clock := clock 939 difftest.io.coreid := io.hartId 940 difftest.io.cacheid := 1.U 941 difftest.io.valid := io.refill_to_ldq.valid && io.refill_to_ldq.bits.hasdata && io.refill_to_ldq.bits.refill_done 942 difftest.io.addr := io.refill_to_ldq.bits.addr 943 difftest.io.data := io.refill_to_ldq.bits.data_raw.asTypeOf(difftest.io.data) 944 } 945 946 // Perf count 947 XSPerfAccumulate("miss_req", io.req.fire() && !io.req.bits.cancel) 948 XSPerfAccumulate("miss_req_allocate", io.req.fire() && !io.req.bits.cancel && alloc) 949 XSPerfAccumulate("miss_req_load_allocate", io.req.fire() && !io.req.bits.cancel && alloc && io.req.bits.isFromLoad) 950 XSPerfAccumulate("miss_req_store_allocate", io.req.fire() && !io.req.bits.cancel && alloc && io.req.bits.isFromStore) 951 XSPerfAccumulate("miss_req_amo_allocate", io.req.fire() && !io.req.bits.cancel && alloc && io.req.bits.isFromAMO) 952 XSPerfAccumulate("miss_req_merge_load", io.req.fire() && !io.req.bits.cancel && merge && io.req.bits.isFromLoad) 953 XSPerfAccumulate("miss_req_reject_load", io.req.valid && !io.req.bits.cancel && reject && io.req.bits.isFromLoad) 954 XSPerfAccumulate("probe_blocked_by_miss", io.probe_block) 955 XSPerfAccumulate("prefetch_primary_fire", io.req.fire() && !io.req.bits.cancel && alloc && io.req.bits.isFromPrefetch) 956 XSPerfAccumulate("prefetch_secondary_fire", io.req.fire() && !io.req.bits.cancel && merge && io.req.bits.isFromPrefetch) 957 val max_inflight = RegInit(0.U((log2Up(cfg.nMissEntries) + 1).W)) 958 val num_valids = PopCount(~Cat(primary_ready_vec).asUInt) 959 when (num_valids > max_inflight) { 960 max_inflight := num_valids 961 } 962 // max inflight (average) = max_inflight_total / cycle cnt 963 XSPerfAccumulate("max_inflight", max_inflight) 964 QueuePerf(cfg.nMissEntries, num_valids, num_valids === cfg.nMissEntries.U) 965 io.full := num_valids === cfg.nMissEntries.U 966 XSPerfHistogram("num_valids", num_valids, true.B, 0, cfg.nMissEntries, 1) 967 968 XSPerfHistogram("L1DMLP_CPUData", PopCount(VecInit(entries.map(_.io.perf_pending_normal)).asUInt), true.B, 0, cfg.nMissEntries, 1) 969 XSPerfHistogram("L1DMLP_Prefetch", PopCount(VecInit(entries.map(_.io.perf_pending_prefetch)).asUInt), true.B, 0, cfg.nMissEntries, 1) 970 XSPerfHistogram("L1DMLP_Total", num_valids, true.B, 0, cfg.nMissEntries, 1) 971 972 XSPerfAccumulate("miss_load_refill_latency", PopCount(entries.map(_.io.latency_monitor.load_miss_refilling))) 973 XSPerfAccumulate("miss_store_refill_latency", PopCount(entries.map(_.io.latency_monitor.store_miss_refilling))) 974 XSPerfAccumulate("miss_amo_refill_latency", PopCount(entries.map(_.io.latency_monitor.amo_miss_refilling))) 975 XSPerfAccumulate("miss_pf_refill_latency", PopCount(entries.map(_.io.latency_monitor.pf_miss_refilling))) 976 977 val rob_head_miss_in_dcache = VecInit(entries.map(_.io.rob_head_query.resp)).asUInt.orR 978 val sourceVaddr = WireInit(0.U.asTypeOf(new Valid(UInt(VAddrBits.W)))) 979 val lq_doing_other_replay = WireInit(false.B) 980 981 ExcitingUtils.addSink(sourceVaddr, s"rob_head_vaddr_${coreParams.HartId}", ExcitingUtils.Perf) 982 ExcitingUtils.addSink(lq_doing_other_replay, s"rob_head_other_replay_${coreParams.HartId}", ExcitingUtils.Perf) 983 984 entries.foreach { 985 case e => { 986 e.io.rob_head_query.query_valid := sourceVaddr.valid 987 e.io.rob_head_query.vaddr := sourceVaddr.bits 988 } 989 } 990 991 // ExcitingUtils.addSource(!rob_head_miss_in_dcache && !lq_doing_other_replay, s"load_l1_cache_stall_without_bank_conflict_${coreParams.HartId}", ExcitingUtils.Perf, true) 992 ExcitingUtils.addSource(rob_head_miss_in_dcache, s"load_l1_miss_${coreParams.HartId}", ExcitingUtils.Perf, true) 993 994 val perfValidCount = RegNext(PopCount(entries.map(entry => (!entry.io.primary_ready)))) 995 val perfEvents = Seq( 996 ("dcache_missq_req ", io.req.fire()), 997 ("dcache_missq_1_4_valid", (perfValidCount < (cfg.nMissEntries.U/4.U))), 998 ("dcache_missq_2_4_valid", (perfValidCount > (cfg.nMissEntries.U/4.U)) & (perfValidCount <= (cfg.nMissEntries.U/2.U))), 999 ("dcache_missq_3_4_valid", (perfValidCount > (cfg.nMissEntries.U/2.U)) & (perfValidCount <= (cfg.nMissEntries.U*3.U/4.U))), 1000 ("dcache_missq_4_4_valid", (perfValidCount > (cfg.nMissEntries.U*3.U/4.U))), 1001 ) 1002 generatePerfEvent() 1003} 1004