1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import chisel3._ 20import chisel3.experimental.ExtModule 21import chisel3.util._ 22import coupledL2.VaddrField 23import coupledL2.IsKeywordField 24import coupledL2.IsKeywordKey 25import freechips.rocketchip.diplomacy._ 26import freechips.rocketchip.tilelink._ 27import freechips.rocketchip.util.BundleFieldBase 28import huancun.{AliasField, PrefetchField} 29import org.chipsalliance.cde.config.Parameters 30import utility._ 31import utils._ 32import xiangshan._ 33import xiangshan.backend.Bundles.DynInst 34import xiangshan.backend.rob.RobDebugRollingIO 35import xiangshan.cache.wpu._ 36import xiangshan.mem.{AddPipelineReg, HasL1PrefetchSourceParameter} 37import xiangshan.mem.prefetch._ 38import xiangshan.mem.LqPtr 39 40// DCache specific parameters 41case class DCacheParameters 42( 43 nSets: Int = 128, 44 nWays: Int = 8, 45 rowBits: Int = 64, 46 tagECC: Option[String] = None, 47 dataECC: Option[String] = None, 48 replacer: Option[String] = Some("setplru"), 49 updateReplaceOn2ndmiss: Boolean = true, 50 nMissEntries: Int = 1, 51 nProbeEntries: Int = 1, 52 nReleaseEntries: Int = 1, 53 nMMIOEntries: Int = 1, 54 nMMIOs: Int = 1, 55 blockBytes: Int = 64, 56 nMaxPrefetchEntry: Int = 1, 57 alwaysReleaseData: Boolean = false, 58 isKeywordBitsOpt: Option[Boolean] = Some(true), 59 enableDataEcc: Boolean = false, 60 enableTagEcc: Boolean = false, 61 cacheCtrlAddressOpt: Option[AddressSet] = None, 62) extends L1CacheParameters { 63 // if sets * blockBytes > 4KB(page size), 64 // cache alias will happen, 65 // we need to avoid this by recoding additional bits in L2 cache 66 val setBytes = nSets * blockBytes 67 val aliasBitsOpt = if(setBytes > pageSize) Some(log2Ceil(setBytes / pageSize)) else None 68 69 def tagCode: Code = Code.fromString(tagECC) 70 71 def dataCode: Code = Code.fromString(dataECC) 72} 73 74// Physical Address 75// -------------------------------------- 76// | Physical Tag | PIndex | Offset | 77// -------------------------------------- 78// | 79// DCacheTagOffset 80// 81// Virtual Address 82// -------------------------------------- 83// | Above index | Set | Bank | Offset | 84// -------------------------------------- 85// | | | | 86// | | | 0 87// | | DCacheBankOffset 88// | DCacheSetOffset 89// DCacheAboveIndexOffset 90 91// Default DCache size = 64 sets * 8 ways * 8 banks * 8 Byte = 32K Byte 92 93trait HasDCacheParameters extends HasL1CacheParameters with HasL1PrefetchSourceParameter{ 94 val cacheParams = dcacheParameters 95 val cfg = cacheParams 96 97 def blockProbeAfterGrantCycles = 8 // give the processor some time to issue a request after a grant 98 99 def nSourceType = 10 100 def sourceTypeWidth = log2Up(nSourceType) 101 // non-prefetch source < 3 102 def LOAD_SOURCE = 0 103 def STORE_SOURCE = 1 104 def AMO_SOURCE = 2 105 // prefetch source >= 3 106 def DCACHE_PREFETCH_SOURCE = 3 107 def SOFT_PREFETCH = 4 108 // the following sources are only used inside SMS 109 def HW_PREFETCH_AGT = 5 110 def HW_PREFETCH_PHT_CUR = 6 111 def HW_PREFETCH_PHT_INC = 7 112 def HW_PREFETCH_PHT_DEC = 8 113 def HW_PREFETCH_BOP = 9 114 def HW_PREFETCH_STRIDE = 10 115 116 def BLOOM_FILTER_ENTRY_NUM = 4096 117 118 // each source use a id to distinguish its multiple reqs 119 def reqIdWidth = log2Up(nEntries) max log2Up(StoreBufferSize) 120 121 require(isPow2(cfg.nMissEntries)) // TODO 122 // require(isPow2(cfg.nReleaseEntries)) 123 require(cfg.nMissEntries < cfg.nReleaseEntries) 124 val nEntries = cfg.nMissEntries + cfg.nReleaseEntries 125 val releaseIdBase = cfg.nMissEntries 126 val EnableDataEcc = cacheParams.enableDataEcc 127 val EnableTagEcc = cacheParams.enableTagEcc 128 129 // banked dcache support 130 val DCacheSetDiv = 1 131 val DCacheSets = cacheParams.nSets 132 val DCacheWays = cacheParams.nWays 133 val DCacheBanks = 8 // hardcoded 134 val DCacheDupNum = 16 135 val DCacheSRAMRowBits = cacheParams.rowBits // hardcoded 136 val DCacheWordBits = 64 // hardcoded 137 val DCacheWordBytes = DCacheWordBits / 8 138 val MaxPrefetchEntry = cacheParams.nMaxPrefetchEntry 139 val DCacheVWordBytes = VLEN / 8 140 require(DCacheSRAMRowBits == 64) 141 142 val DCacheSetDivBits = log2Ceil(DCacheSetDiv) 143 val DCacheSetBits = log2Ceil(DCacheSets) 144 val DCacheSizeBits = DCacheSRAMRowBits * DCacheBanks * DCacheWays * DCacheSets 145 val DCacheSizeBytes = DCacheSizeBits / 8 146 val DCacheSizeWords = DCacheSizeBits / 64 // TODO 147 148 val DCacheSameVPAddrLength = 12 149 150 val DCacheSRAMRowBytes = DCacheSRAMRowBits / 8 151 val DCacheWordOffset = log2Up(DCacheWordBytes) 152 val DCacheVWordOffset = log2Up(DCacheVWordBytes) 153 154 val DCacheBankOffset = log2Up(DCacheSRAMRowBytes) 155 val DCacheSetOffset = DCacheBankOffset + log2Up(DCacheBanks) 156 val DCacheAboveIndexOffset = DCacheSetOffset + log2Up(DCacheSets) 157 val DCacheTagOffset = DCacheAboveIndexOffset min DCacheSameVPAddrLength 158 val DCacheLineOffset = DCacheSetOffset 159 160 def encWordBits = cacheParams.dataCode.width(wordBits) 161 def encRowBits = encWordBits * rowWords // for DuplicatedDataArray only 162 def eccBits = encWordBits - wordBits 163 164 def encTagBits = if (EnableTagEcc) cacheParams.tagCode.width(tagBits) else tagBits 165 def tagECCBits = encTagBits - tagBits 166 167 def encDataBits = if (EnableDataEcc) cacheParams.dataCode.width(DCacheSRAMRowBits) else DCacheSRAMRowBits 168 def dataECCBits = encDataBits - DCacheSRAMRowBits 169 170 // L1 DCache controller 171 val cacheCtrlParamsOpt = OptionWrapper( 172 cacheParams.cacheCtrlAddressOpt.nonEmpty, 173 L1CacheCtrlParams(cacheParams.cacheCtrlAddressOpt.get) 174 ) 175 // uncache 176 val uncacheIdxBits = log2Up(VirtualLoadQueueMaxStoreQueueSize + 1) 177 // hardware prefetch parameters 178 // high confidence hardware prefetch port 179 val HighConfHWPFLoadPort = LoadPipelineWidth - 1 // use the last load port by default 180 val IgnorePrefetchConfidence = false 181 182 // parameters about duplicating regs to solve fanout 183 // In Main Pipe: 184 // tag_write.ready -> data_write.valid * 8 banks 185 // tag_write.ready -> meta_write.valid 186 // tag_write.ready -> tag_write.valid 187 // tag_write.ready -> err_write.valid 188 // tag_write.ready -> wb.valid 189 val nDupTagWriteReady = DCacheBanks + 4 190 // In Main Pipe: 191 // data_write.ready -> data_write.valid * 8 banks 192 // data_write.ready -> meta_write.valid 193 // data_write.ready -> tag_write.valid 194 // data_write.ready -> err_write.valid 195 // data_write.ready -> wb.valid 196 val nDupDataWriteReady = DCacheBanks + 4 197 val nDupWbReady = DCacheBanks + 4 198 val nDupStatus = nDupTagWriteReady + nDupDataWriteReady 199 val dataWritePort = 0 200 val metaWritePort = DCacheBanks 201 val tagWritePort = metaWritePort + 1 202 val errWritePort = tagWritePort + 1 203 val wbPort = errWritePort + 1 204 205 def set_to_dcache_div(set: UInt) = { 206 require(set.getWidth >= DCacheSetBits) 207 if (DCacheSetDivBits == 0) 0.U else set(DCacheSetDivBits-1, 0) 208 } 209 210 def set_to_dcache_div_set(set: UInt) = { 211 require(set.getWidth >= DCacheSetBits) 212 set(DCacheSetBits - 1, DCacheSetDivBits) 213 } 214 215 def addr_to_dcache_bank(addr: UInt) = { 216 require(addr.getWidth >= DCacheSetOffset) 217 addr(DCacheSetOffset-1, DCacheBankOffset) 218 } 219 220 def addr_to_dcache_div(addr: UInt) = { 221 require(addr.getWidth >= DCacheAboveIndexOffset) 222 if(DCacheSetDivBits == 0) 0.U else addr(DCacheSetOffset + DCacheSetDivBits - 1, DCacheSetOffset) 223 } 224 225 def addr_to_dcache_div_set(addr: UInt) = { 226 require(addr.getWidth >= DCacheAboveIndexOffset) 227 addr(DCacheAboveIndexOffset - 1, DCacheSetOffset + DCacheSetDivBits) 228 } 229 230 def addr_to_dcache_set(addr: UInt) = { 231 require(addr.getWidth >= DCacheAboveIndexOffset) 232 addr(DCacheAboveIndexOffset-1, DCacheSetOffset) 233 } 234 235 def get_data_of_bank(bank: Int, data: UInt) = { 236 require(data.getWidth >= (bank+1)*DCacheSRAMRowBits) 237 data(DCacheSRAMRowBits * (bank + 1) - 1, DCacheSRAMRowBits * bank) 238 } 239 240 def get_mask_of_bank(bank: Int, data: UInt) = { 241 require(data.getWidth >= (bank+1)*DCacheSRAMRowBytes) 242 data(DCacheSRAMRowBytes * (bank + 1) - 1, DCacheSRAMRowBytes * bank) 243 } 244 245 def get_alias(vaddr: UInt): UInt ={ 246 // require(blockOffBits + idxBits > pgIdxBits) 247 if(blockOffBits + idxBits > pgIdxBits){ 248 vaddr(blockOffBits + idxBits - 1, pgIdxBits) 249 }else{ 250 0.U 251 } 252 } 253 254 def is_alias_match(vaddr0: UInt, vaddr1: UInt): Bool = { 255 require(vaddr0.getWidth == VAddrBits && vaddr1.getWidth == VAddrBits) 256 if(blockOffBits + idxBits > pgIdxBits) { 257 vaddr0(blockOffBits + idxBits - 1, pgIdxBits) === vaddr1(blockOffBits + idxBits - 1, pgIdxBits) 258 }else { 259 // no alias problem 260 true.B 261 } 262 } 263 264 def get_direct_map_way(addr:UInt): UInt = { 265 addr(DCacheAboveIndexOffset + log2Up(DCacheWays) - 1, DCacheAboveIndexOffset) 266 } 267 268 def arbiter[T <: Bundle]( 269 in: Seq[DecoupledIO[T]], 270 out: DecoupledIO[T], 271 name: Option[String] = None): Unit = { 272 val arb = Module(new Arbiter[T](chiselTypeOf(out.bits), in.size)) 273 if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } 274 for ((a, req) <- arb.io.in.zip(in)) { 275 a <> req 276 } 277 out <> arb.io.out 278 } 279 280 def arbiter_with_pipereg[T <: Bundle]( 281 in: Seq[DecoupledIO[T]], 282 out: DecoupledIO[T], 283 name: Option[String] = None): Unit = { 284 val arb = Module(new Arbiter[T](chiselTypeOf(out.bits), in.size)) 285 if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } 286 for ((a, req) <- arb.io.in.zip(in)) { 287 a <> req 288 } 289 AddPipelineReg(arb.io.out, out, false.B) 290 } 291 292 def arbiter_with_pipereg_N_dup[T <: Bundle]( 293 in: Seq[DecoupledIO[T]], 294 out: DecoupledIO[T], 295 dups: Seq[DecoupledIO[T]], 296 name: Option[String] = None): Unit = { 297 val arb = Module(new Arbiter[T](chiselTypeOf(out.bits), in.size)) 298 if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } 299 for ((a, req) <- arb.io.in.zip(in)) { 300 a <> req 301 } 302 for (dup <- dups) { 303 AddPipelineReg(arb.io.out, dup, false.B) 304 } 305 AddPipelineReg(arb.io.out, out, false.B) 306 } 307 308 def rrArbiter[T <: Bundle]( 309 in: Seq[DecoupledIO[T]], 310 out: DecoupledIO[T], 311 name: Option[String] = None): Unit = { 312 val arb = Module(new RRArbiter[T](chiselTypeOf(out.bits), in.size)) 313 if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } 314 for ((a, req) <- arb.io.in.zip(in)) { 315 a <> req 316 } 317 out <> arb.io.out 318 } 319 320 def fastArbiter[T <: Bundle]( 321 in: Seq[DecoupledIO[T]], 322 out: DecoupledIO[T], 323 name: Option[String] = None): Unit = { 324 val arb = Module(new FastArbiter[T](chiselTypeOf(out.bits), in.size)) 325 if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } 326 for ((a, req) <- arb.io.in.zip(in)) { 327 a <> req 328 } 329 out <> arb.io.out 330 } 331 332 val numReplaceRespPorts = 2 333 334 require(isPow2(nSets), s"nSets($nSets) must be pow2") 335 require(isPow2(nWays), s"nWays($nWays) must be pow2") 336 require(full_divide(rowBits, wordBits), s"rowBits($rowBits) must be multiple of wordBits($wordBits)") 337 require(full_divide(beatBits, rowBits), s"beatBits($beatBits) must be multiple of rowBits($rowBits)") 338} 339 340abstract class DCacheModule(implicit p: Parameters) extends L1CacheModule 341 with HasDCacheParameters 342 343abstract class DCacheBundle(implicit p: Parameters) extends L1CacheBundle 344 with HasDCacheParameters 345 346class ReplacementAccessBundle(implicit p: Parameters) extends DCacheBundle { 347 val set = UInt(log2Up(nSets).W) 348 val way = UInt(log2Up(nWays).W) 349} 350 351class ReplacementWayReqIO(implicit p: Parameters) extends DCacheBundle { 352 val set = ValidIO(UInt(log2Up(nSets).W)) 353 val dmWay = Output(UInt(log2Up(nWays).W)) 354 val way = Input(UInt(log2Up(nWays).W)) 355} 356 357class DCacheExtraMeta(implicit p: Parameters) extends DCacheBundle 358{ 359 val error = Bool() // cache line has been marked as corrupted by l2 / ecc error detected when store 360 val prefetch = UInt(L1PfSourceBits.W) // cache line is first required by prefetch 361 val access = Bool() // cache line has been accessed by load / store 362 363 // val debug_access_timestamp = UInt(64.W) // last time a load / store / refill access that cacheline 364} 365 366// memory request in word granularity(load, mmio, lr/sc, atomics) 367class DCacheWordReq(implicit p: Parameters) extends DCacheBundle 368{ 369 val cmd = UInt(M_SZ.W) 370 val vaddr = UInt(VAddrBits.W) 371 val data = UInt(VLEN.W) 372 val mask = UInt((VLEN/8).W) 373 val id = UInt(reqIdWidth.W) 374 val instrtype = UInt(sourceTypeWidth.W) 375 val isFirstIssue = Bool() 376 val replayCarry = new ReplayCarry(nWays) 377 val lqIdx = new LqPtr 378 379 val debug_robIdx = UInt(log2Ceil(RobSize).W) 380 def dump(cond: Bool) = { 381 XSDebug(cond, "DCacheWordReq: cmd: %x vaddr: %x data: %x mask: %x id: %d\n", 382 cmd, vaddr, data, mask, id) 383 } 384} 385 386// memory request in word granularity(store) 387class DCacheLineReq(implicit p: Parameters) extends DCacheBundle 388{ 389 val cmd = UInt(M_SZ.W) 390 val vaddr = UInt(VAddrBits.W) 391 val addr = UInt(PAddrBits.W) 392 val data = UInt((cfg.blockBytes * 8).W) 393 val mask = UInt(cfg.blockBytes.W) 394 val id = UInt(reqIdWidth.W) 395 def dump(cond: Bool) = { 396 XSDebug(cond, "DCacheLineReq: cmd: %x addr: %x data: %x mask: %x id: %d\n", 397 cmd, addr, data, mask, id) 398 } 399 def idx: UInt = get_idx(vaddr) 400} 401 402class DCacheWordReqWithVaddr(implicit p: Parameters) extends DCacheWordReq { 403 val addr = UInt(PAddrBits.W) 404 val wline = Bool() 405} 406 407class DCacheWordReqWithVaddrAndPfFlag(implicit p: Parameters) extends DCacheWordReqWithVaddr { 408 val prefetch = Bool() 409 val vecValid = Bool() 410 val sqNeedDeq = Bool() 411 412 def toDCacheWordReqWithVaddr() = { 413 val res = Wire(new DCacheWordReqWithVaddr) 414 res.vaddr := vaddr 415 res.wline := wline 416 res.cmd := cmd 417 res.addr := addr 418 res.data := data 419 res.mask := mask 420 res.id := id 421 res.instrtype := instrtype 422 res.replayCarry := replayCarry 423 res.isFirstIssue := isFirstIssue 424 res.debug_robIdx := debug_robIdx 425 426 res 427 } 428} 429 430class BaseDCacheWordResp(implicit p: Parameters) extends DCacheBundle 431{ 432 // read in s2 433 val data = UInt(VLEN.W) 434 // select in s3 435 val data_delayed = UInt(VLEN.W) 436 val id = UInt(reqIdWidth.W) 437 // cache req missed, send it to miss queue 438 val miss = Bool() 439 // cache miss, and failed to enter the missqueue, replay from RS is needed 440 val replay = Bool() 441 val replayCarry = new ReplayCarry(nWays) 442 // data has been corrupted 443 val tag_error = Bool() // tag error 444 val mshr_id = UInt(log2Up(cfg.nMissEntries).W) 445 446 val debug_robIdx = UInt(log2Ceil(RobSize).W) 447 def dump(cond: Bool) = { 448 XSDebug(cond, "DCacheWordResp: data: %x id: %d miss: %b replay: %b\n", 449 data, id, miss, replay) 450 } 451} 452 453class DCacheWordResp(implicit p: Parameters) extends BaseDCacheWordResp 454{ 455 val meta_prefetch = UInt(L1PfSourceBits.W) 456 val meta_access = Bool() 457 // s2 458 val handled = Bool() 459 val real_miss = Bool() 460 // s3: 1 cycle after data resp 461 val error_delayed = Bool() // all kinds of errors, include tag error 462 val replacementUpdated = Bool() 463} 464 465class BankedDCacheWordResp(implicit p: Parameters) extends DCacheWordResp 466{ 467 val bank_data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W)) 468 val bank_oh = UInt(DCacheBanks.W) 469} 470 471class DCacheWordRespWithError(implicit p: Parameters) extends BaseDCacheWordResp 472{ 473 val error = Bool() // all kinds of errors, include tag error 474 val nderr = Bool() 475} 476 477class DCacheLineResp(implicit p: Parameters) extends DCacheBundle 478{ 479 val data = UInt((cfg.blockBytes * 8).W) 480 // cache req missed, send it to miss queue 481 val miss = Bool() 482 // cache req nacked, replay it later 483 val replay = Bool() 484 val id = UInt(reqIdWidth.W) 485 def dump(cond: Bool) = { 486 XSDebug(cond, "DCacheLineResp: data: %x id: %d miss: %b replay: %b\n", 487 data, id, miss, replay) 488 } 489} 490 491class Refill(implicit p: Parameters) extends DCacheBundle 492{ 493 val addr = UInt(PAddrBits.W) 494 val data = UInt(l1BusDataWidth.W) 495 val error = Bool() // refilled data has been corrupted 496 // for debug usage 497 val data_raw = UInt((cfg.blockBytes * 8).W) 498 val hasdata = Bool() 499 val refill_done = Bool() 500 def dump(cond: Bool) = { 501 XSDebug(cond, "Refill: addr: %x data: %x\n", addr, data) 502 } 503 val id = UInt(log2Up(cfg.nMissEntries).W) 504} 505 506class Release(implicit p: Parameters) extends DCacheBundle 507{ 508 val paddr = UInt(PAddrBits.W) 509 def dump(cond: Bool) = { 510 XSDebug(cond, "Release: paddr: %x\n", paddr(PAddrBits-1, DCacheTagOffset)) 511 } 512} 513 514class DCacheWordIO(implicit p: Parameters) extends DCacheBundle 515{ 516 val req = DecoupledIO(new DCacheWordReq) 517 val resp = Flipped(DecoupledIO(new DCacheWordResp)) 518} 519 520 521class UncacheWordReq(implicit p: Parameters) extends DCacheBundle 522{ 523 val cmd = UInt(M_SZ.W) 524 val addr = UInt(PAddrBits.W) 525 val vaddr = UInt(VAddrBits.W) // for uncache buffer forwarding 526 val data = UInt(XLEN.W) 527 val mask = UInt((XLEN/8).W) 528 val id = UInt(uncacheIdxBits.W) 529 val instrtype = UInt(sourceTypeWidth.W) 530 val atomic = Bool() 531 val nc = Bool() 532 val isFirstIssue = Bool() 533 val replayCarry = new ReplayCarry(nWays) 534 535 def dump(cond: Bool) = { 536 XSDebug(cond, "UncacheWordReq: cmd: %x addr: %x data: %x mask: %x id: %d\n", 537 cmd, addr, data, mask, id) 538 } 539} 540 541class UncacheWordResp(implicit p: Parameters) extends DCacheBundle 542{ 543 val data = UInt(XLEN.W) 544 val data_delayed = UInt(XLEN.W) 545 val id = UInt(uncacheIdxBits.W) // resp identified signals 546 val nc = Bool() // resp identified signals 547 val is2lq = Bool() // resp identified signals 548 val miss = Bool() 549 val replay = Bool() 550 val tag_error = Bool() 551 val error = Bool() 552 val nderr = Bool() 553 val replayCarry = new ReplayCarry(nWays) 554 val mshr_id = UInt(log2Up(cfg.nMissEntries).W) // FIXME: why uncacheWordResp is not merged to baseDcacheResp 555 556 val debug_robIdx = UInt(log2Ceil(RobSize).W) 557 def dump(cond: Bool) = { 558 XSDebug(cond, "UncacheWordResp: data: %x id: %d miss: %b replay: %b, tag_error: %b, error: %b\n", 559 data, id, miss, replay, tag_error, error) 560 } 561} 562 563class UncacheWordIO(implicit p: Parameters) extends DCacheBundle 564{ 565 val req = DecoupledIO(new UncacheWordReq) 566 val resp = Flipped(DecoupledIO(new UncacheWordResp)) 567} 568 569class MainPipeResp(implicit p: Parameters) extends DCacheBundle { 570 //distinguish amo 571 val source = UInt(sourceTypeWidth.W) 572 val data = UInt(QuadWordBits.W) 573 val miss = Bool() 574 val miss_id = UInt(log2Up(cfg.nMissEntries).W) 575 val replay = Bool() 576 val error = Bool() 577 578 val ack_miss_queue = Bool() 579 580 val id = UInt(reqIdWidth.W) 581 582 def isAMO: Bool = source === AMO_SOURCE.U 583 def isStore: Bool = source === STORE_SOURCE.U 584} 585 586class AtomicWordIO(implicit p: Parameters) extends DCacheBundle 587{ 588 val req = DecoupledIO(new MainPipeReq) 589 val resp = Flipped(ValidIO(new MainPipeResp)) 590 val block_lr = Input(Bool()) 591} 592 593class CMOReq(implicit p: Parameters) extends Bundle { 594 val opcode = UInt(3.W) // 0-cbo.clean, 1-cbo.flush, 2-cbo.inval, 3-cbo.zero 595 val address = UInt(64.W) 596} 597 598class CMOResp(implicit p: Parameters) extends Bundle { 599 val address = UInt(64.W) 600} 601 602// used by load unit 603class DCacheLoadIO(implicit p: Parameters) extends DCacheWordIO 604{ 605 // kill previous cycle's req 606 val s1_kill_data_read = Output(Bool()) // only kill bandedDataRead at s1 607 val s1_kill = Output(Bool()) // kill loadpipe req at s1 608 val s2_kill = Output(Bool()) 609 val s0_pc = Output(UInt(VAddrBits.W)) 610 val s1_pc = Output(UInt(VAddrBits.W)) 611 val s2_pc = Output(UInt(VAddrBits.W)) 612 // cycle 0: load has updated replacement before 613 val replacementUpdated = Output(Bool()) 614 val is128Req = Bool() 615 // cycle 0: prefetch source bits 616 val pf_source = Output(UInt(L1PfSourceBits.W)) 617 // cycle0: load microop 618 // val s0_uop = Output(new MicroOp) 619 // cycle 0: virtual address: req.addr 620 // cycle 1: physical address: s1_paddr 621 val s1_paddr_dup_lsu = Output(UInt(PAddrBits.W)) // lsu side paddr 622 val s1_paddr_dup_dcache = Output(UInt(PAddrBits.W)) // dcache side paddr 623 val s1_disable_fast_wakeup = Input(Bool()) 624 // cycle 2: hit signal 625 val s2_hit = Input(Bool()) // hit signal for lsu, 626 val s2_first_hit = Input(Bool()) 627 val s2_bank_conflict = Input(Bool()) 628 val s2_wpu_pred_fail = Input(Bool()) 629 val s2_mq_nack = Input(Bool()) 630 631 // debug 632 val debug_s1_hit_way = Input(UInt(nWays.W)) 633 val debug_s2_pred_way_num = Input(UInt(XLEN.W)) 634 val debug_s2_dm_way_num = Input(UInt(XLEN.W)) 635 val debug_s2_real_way_num = Input(UInt(XLEN.W)) 636} 637 638class DCacheLineIO(implicit p: Parameters) extends DCacheBundle 639{ 640 val req = DecoupledIO(new DCacheLineReq) 641 val resp = Flipped(DecoupledIO(new DCacheLineResp)) 642} 643 644class DCacheToSbufferIO(implicit p: Parameters) extends DCacheBundle { 645 // sbuffer will directly send request to dcache main pipe 646 val req = Flipped(Decoupled(new DCacheLineReq)) 647 648 val main_pipe_hit_resp = ValidIO(new DCacheLineResp) 649 //val refill_hit_resp = ValidIO(new DCacheLineResp) 650 651 val replay_resp = ValidIO(new DCacheLineResp) 652 653 //def hit_resps: Seq[ValidIO[DCacheLineResp]] = Seq(main_pipe_hit_resp, refill_hit_resp) 654 def hit_resps: Seq[ValidIO[DCacheLineResp]] = Seq(main_pipe_hit_resp) 655} 656 657// forward tilelink channel D's data to ldu 658class DcacheToLduForwardIO(implicit p: Parameters) extends DCacheBundle { 659 val valid = Bool() 660 val data = UInt(l1BusDataWidth.W) 661 val mshrid = UInt(log2Up(cfg.nMissEntries).W) 662 val last = Bool() 663 664 def apply(req_valid : Bool, req_data : UInt, req_mshrid : UInt, req_last : Bool) = { 665 valid := req_valid 666 data := req_data 667 mshrid := req_mshrid 668 last := req_last 669 } 670 671 def dontCare() = { 672 valid := false.B 673 data := DontCare 674 mshrid := DontCare 675 last := DontCare 676 } 677 678 def forward(req_valid : Bool, req_mshr_id : UInt, req_paddr : UInt) = { 679 val all_match = req_valid && valid && 680 req_mshr_id === mshrid && 681 req_paddr(log2Up(refillBytes)) === last 682 val forward_D = RegInit(false.B) 683 val forwardData = RegInit(VecInit(List.fill(VLEN/8)(0.U(8.W)))) 684 685 val block_idx = req_paddr(log2Up(refillBytes) - 1, 3) 686 val block_data = Wire(Vec(l1BusDataWidth / 64, UInt(64.W))) 687 (0 until l1BusDataWidth / 64).map(i => { 688 block_data(i) := data(64 * i + 63, 64 * i) 689 }) 690 val selected_data = Wire(UInt(128.W)) 691 selected_data := Mux(req_paddr(3), Fill(2, block_data(block_idx)), Cat(block_data(block_idx + 1.U), block_data(block_idx))) 692 693 forward_D := all_match 694 for (i <- 0 until VLEN/8) { 695 when (all_match) { 696 forwardData(i) := selected_data(8 * i + 7, 8 * i) 697 } 698 } 699 700 (forward_D, forwardData) 701 } 702} 703 704class MissEntryForwardIO(implicit p: Parameters) extends DCacheBundle { 705 val inflight = Bool() 706 val paddr = UInt(PAddrBits.W) 707 val raw_data = Vec(blockRows, UInt(rowBits.W)) 708 val firstbeat_valid = Bool() 709 val lastbeat_valid = Bool() 710 711 def apply(mshr_valid : Bool, mshr_paddr : UInt, mshr_rawdata : Vec[UInt], mshr_first_valid : Bool, mshr_last_valid : Bool) = { 712 inflight := mshr_valid 713 paddr := mshr_paddr 714 raw_data := mshr_rawdata 715 firstbeat_valid := mshr_first_valid 716 lastbeat_valid := mshr_last_valid 717 } 718 719 // check if we can forward from mshr or D channel 720 def check(req_valid : Bool, req_paddr : UInt) = { 721 RegNext(req_valid && inflight && req_paddr(PAddrBits - 1, blockOffBits) === paddr(PAddrBits - 1, blockOffBits)) // TODO: clock gate(1-bit) 722 } 723 724 def forward(req_valid : Bool, req_paddr : UInt) = { 725 val all_match = (req_paddr(log2Up(refillBytes)) === 0.U && firstbeat_valid) || 726 (req_paddr(log2Up(refillBytes)) === 1.U && lastbeat_valid) 727 728 val forward_mshr = RegInit(false.B) 729 val forwardData = RegInit(VecInit(List.fill(VLEN/8)(0.U(8.W)))) 730 731 val block_idx = req_paddr(log2Up(refillBytes), 3) 732 val block_data = raw_data 733 734 val selected_data = Wire(UInt(128.W)) 735 selected_data := Mux(req_paddr(3), Fill(2, block_data(block_idx)), Cat(block_data(block_idx + 1.U), block_data(block_idx))) 736 737 forward_mshr := all_match 738 for (i <- 0 until VLEN/8) { 739 forwardData(i) := selected_data(8 * i + 7, 8 * i) 740 } 741 742 (forward_mshr, forwardData) 743 } 744} 745 746// forward mshr's data to ldu 747class LduToMissqueueForwardIO(implicit p: Parameters) extends DCacheBundle { 748 // req 749 val valid = Input(Bool()) 750 val mshrid = Input(UInt(log2Up(cfg.nMissEntries).W)) 751 val paddr = Input(UInt(PAddrBits.W)) 752 // resp 753 val forward_mshr = Output(Bool()) 754 val forwardData = Output(Vec(VLEN/8, UInt(8.W))) 755 val forward_result_valid = Output(Bool()) 756 757 def connect(sink: LduToMissqueueForwardIO) = { 758 sink.valid := valid 759 sink.mshrid := mshrid 760 sink.paddr := paddr 761 forward_mshr := sink.forward_mshr 762 forwardData := sink.forwardData 763 forward_result_valid := sink.forward_result_valid 764 } 765 766 def forward() = { 767 (forward_result_valid, forward_mshr, forwardData) 768 } 769} 770 771class StorePrefetchReq(implicit p: Parameters) extends DCacheBundle { 772 val paddr = UInt(PAddrBits.W) 773 val vaddr = UInt(VAddrBits.W) 774} 775 776class DCacheToLsuIO(implicit p: Parameters) extends DCacheBundle { 777 val load = Vec(LoadPipelineWidth, Flipped(new DCacheLoadIO)) // for speculative load 778 val sta = Vec(StorePipelineWidth, Flipped(new DCacheStoreIO)) // for non-blocking store 779 //val lsq = ValidIO(new Refill) // refill to load queue, wake up load misses 780 val tl_d_channel = Output(new DcacheToLduForwardIO) 781 val store = new DCacheToSbufferIO // for sbuffer 782 val atomics = Flipped(new AtomicWordIO) // atomics reqs 783 val release = ValidIO(new Release) // cacheline release hint for ld-ld violation check 784 val forward_D = Output(Vec(LoadPipelineWidth, new DcacheToLduForwardIO)) 785 val forward_mshr = Vec(LoadPipelineWidth, new LduToMissqueueForwardIO) 786} 787 788class DCacheTopDownIO(implicit p: Parameters) extends DCacheBundle { 789 val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W))) 790 val robHeadMissInDCache = Output(Bool()) 791 val robHeadOtherReplay = Input(Bool()) 792} 793 794class DCacheIO(implicit p: Parameters) extends DCacheBundle { 795 val hartId = Input(UInt(hartIdLen.W)) 796 val l2_pf_store_only = Input(Bool()) 797 val lsu = new DCacheToLsuIO 798 val csr = new L1CacheToCsrIO 799 val error = ValidIO(new L1CacheErrorInfo) 800 val mshrFull = Output(Bool()) 801 val memSetPattenDetected = Output(Bool()) 802 val lqEmpty = Input(Bool()) 803 val pf_ctrl = Output(new PrefetchControlBundle) 804 val force_write = Input(Bool()) 805 val sms_agt_evict_req = DecoupledIO(new AGTEvictReq) 806 val debugTopDown = new DCacheTopDownIO 807 val debugRolling = Flipped(new RobDebugRollingIO) 808 val l2_hint = Input(Valid(new L2ToL1Hint())) 809 val cmoOpReq = Flipped(DecoupledIO(new CMOReq)) 810 val cmoOpResp = DecoupledIO(new CMOResp) 811} 812 813private object ArbiterCtrl { 814 def apply(request: Seq[Bool]): Seq[Bool] = request.length match { 815 case 0 => Seq() 816 case 1 => Seq(true.B) 817 case _ => true.B +: request.tail.init.scanLeft(request.head)(_ || _).map(!_) 818 } 819} 820 821class TreeArbiter[T <: MissReqWoStoreData](val gen: T, val n: Int) extends Module{ 822 val io = IO(new ArbiterIO(gen, n)) 823 824 def selectTree(in: Vec[Valid[T]], sIdx: UInt): Tuple2[UInt, T] = { 825 if (in.length == 1) { 826 (sIdx, in(0).bits) 827 } else if (in.length == 2) { 828 ( 829 Mux(in(0).valid, sIdx, sIdx + 1.U), 830 Mux(in(0).valid, in(0).bits, in(1).bits) 831 ) 832 } else { 833 val half = in.length / 2 834 val leftValid = in.slice(0, half).map(_.valid).reduce(_ || _) 835 val (leftIdx, leftSel) = selectTree(VecInit(in.slice(0, half)), sIdx) 836 val (rightIdx, rightSel) = selectTree(VecInit(in.slice(half, in.length)), sIdx + half.U) 837 ( 838 Mux(leftValid, leftIdx, rightIdx), 839 Mux(leftValid, leftSel, rightSel) 840 ) 841 } 842 } 843 val ins = Wire(Vec(n, Valid(gen))) 844 for (i <- 0 until n) { 845 ins(i).valid := io.in(i).valid 846 ins(i).bits := io.in(i).bits 847 } 848 val (idx, sel) = selectTree(ins, 0.U) 849 // NOTE: io.chosen is very slow, dont use it 850 io.chosen := idx 851 io.out.bits := sel 852 853 val grant = ArbiterCtrl(io.in.map(_.valid)) 854 for ((in, g) <- io.in.zip(grant)) 855 in.ready := g && io.out.ready 856 io.out.valid := !grant.last || io.in.last.valid 857} 858 859class DCacheMEQueryIOBundle(implicit p: Parameters) extends DCacheBundle 860{ 861 val req = ValidIO(new MissReqWoStoreData) 862 val primary_ready = Input(Bool()) 863 val secondary_ready = Input(Bool()) 864 val secondary_reject = Input(Bool()) 865} 866 867class DCacheMQQueryIOBundle(implicit p: Parameters) extends DCacheBundle 868{ 869 val req = ValidIO(new MissReq) 870 val ready = Input(Bool()) 871} 872 873class MissReadyGen(val n: Int)(implicit p: Parameters) extends XSModule { 874 val io = IO(new Bundle { 875 val in = Vec(n, Flipped(DecoupledIO(new MissReq))) 876 val queryMQ = Vec(n, new DCacheMQQueryIOBundle) 877 }) 878 879 val mqReadyVec = io.queryMQ.map(_.ready) 880 881 io.queryMQ.zipWithIndex.foreach{ 882 case (q, idx) => { 883 q.req.valid := io.in(idx).valid 884 q.req.bits := io.in(idx).bits 885 } 886 } 887 io.in.zipWithIndex.map { 888 case (r, idx) => { 889 if (idx == 0) { 890 r.ready := mqReadyVec(idx) 891 } else { 892 r.ready := mqReadyVec(idx) && !Cat(io.in.slice(0, idx).map(_.valid)).orR 893 } 894 } 895 } 896 897} 898 899class DCache()(implicit p: Parameters) extends LazyModule with HasDCacheParameters { 900 override def shouldBeInlined: Boolean = false 901 902 val reqFields: Seq[BundleFieldBase] = Seq( 903 PrefetchField(), 904 ReqSourceField(), 905 VaddrField(VAddrBits - blockOffBits), 906 // IsKeywordField() 907 ) ++ cacheParams.aliasBitsOpt.map(AliasField) 908 val echoFields: Seq[BundleFieldBase] = Seq( 909 IsKeywordField() 910 ) 911 912 val clientParameters = TLMasterPortParameters.v1( 913 Seq(TLMasterParameters.v1( 914 name = "dcache", 915 sourceId = IdRange(0, nEntries + 1), 916 supportsProbe = TransferSizes(cfg.blockBytes) 917 )), 918 requestFields = reqFields, 919 echoFields = echoFields 920 ) 921 922 val clientNode = TLClientNode(Seq(clientParameters)) 923 val cacheCtrlOpt = cacheCtrlParamsOpt.map(params => LazyModule(new CtrlUnit(params))) 924 925 lazy val module = new DCacheImp(this) 926} 927 928 929class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParameters with HasPerfEvents with HasL1PrefetchSourceParameter { 930 931 val io = IO(new DCacheIO) 932 933 val (bus, edge) = outer.clientNode.out.head 934 require(bus.d.bits.data.getWidth == l1BusDataWidth, "DCache: tilelink width does not match") 935 936 println("DCache:") 937 println(" DCacheSets: " + DCacheSets) 938 println(" DCacheSetDiv: " + DCacheSetDiv) 939 println(" DCacheWays: " + DCacheWays) 940 println(" DCacheBanks: " + DCacheBanks) 941 println(" DCacheSRAMRowBits: " + DCacheSRAMRowBits) 942 println(" DCacheWordOffset: " + DCacheWordOffset) 943 println(" DCacheBankOffset: " + DCacheBankOffset) 944 println(" DCacheSetOffset: " + DCacheSetOffset) 945 println(" DCacheTagOffset: " + DCacheTagOffset) 946 println(" DCacheAboveIndexOffset: " + DCacheAboveIndexOffset) 947 println(" DcacheMaxPrefetchEntry: " + MaxPrefetchEntry) 948 println(" WPUEnable: " + dwpuParam.enWPU) 949 println(" WPUEnableCfPred: " + dwpuParam.enCfPred) 950 println(" WPUAlgorithm: " + dwpuParam.algoName) 951 println(" HasCMO: " + HasCMO) 952 953 // Enable L1 Store prefetch 954 val StorePrefetchL1Enabled = EnableStorePrefetchAtCommit || EnableStorePrefetchAtIssue || EnableStorePrefetchSPB 955 val MetaReadPort = 956 if (StorePrefetchL1Enabled) 957 1 + backendParams.LduCnt + backendParams.StaCnt + backendParams.HyuCnt 958 else 959 1 + backendParams.LduCnt + backendParams.HyuCnt 960 val TagReadPort = 961 if (StorePrefetchL1Enabled) 962 1 + backendParams.LduCnt + backendParams.StaCnt + backendParams.HyuCnt 963 else 964 1 + backendParams.LduCnt + backendParams.HyuCnt 965 966 // Enable L1 Load prefetch 967 val LoadPrefetchL1Enabled = true 968 val AccessArrayReadPort = if(LoadPrefetchL1Enabled) LoadPipelineWidth + 1 + 1 else LoadPipelineWidth + 1 969 val PrefetchArrayReadPort = if(LoadPrefetchL1Enabled) LoadPipelineWidth + 1 + 1 else LoadPipelineWidth + 1 970 971 //---------------------------------------- 972 // core data structures 973 val bankedDataArray = if(dwpuParam.enWPU) Module(new SramedDataArray) else Module(new BankedDataArray) 974 val metaArray = Module(new L1CohMetaArray(readPorts = LoadPipelineWidth + 1, writePorts = 1)) 975 val errorArray = Module(new L1FlagMetaArray(readPorts = LoadPipelineWidth + 1, writePorts = 1)) 976 val prefetchArray = Module(new L1PrefetchSourceArray(readPorts = PrefetchArrayReadPort, writePorts = 1 + LoadPipelineWidth)) // prefetch flag array 977 val accessArray = Module(new L1FlagMetaArray(readPorts = AccessArrayReadPort, writePorts = LoadPipelineWidth + 1)) 978 val tagArray = Module(new DuplicatedTagArray(readPorts = TagReadPort)) 979 val prefetcherMonitor = Module(new PrefetcherMonitor) 980 val fdpMonitor = Module(new FDPrefetcherMonitor) 981 val bloomFilter = Module(new BloomFilter(BLOOM_FILTER_ENTRY_NUM, true)) 982 val counterFilter = Module(new CounterFilter) 983 bankedDataArray.dump() 984 985 //---------------------------------------- 986 // miss queue 987 // missReqArb port: 988 // enableStorePrefetch: main pipe * 1 + load pipe * 2 + store pipe * 1 + 989 // hybrid * 1; disable: main pipe * 1 + load pipe * 2 + hybrid * 1 990 // higher priority is given to lower indices 991 val MissReqPortCount = if(StorePrefetchL1Enabled) 1 + backendParams.LduCnt + backendParams.StaCnt + backendParams.HyuCnt else 1 + backendParams.LduCnt + backendParams.HyuCnt 992 val MainPipeMissReqPort = 0 993 val HybridMissReqBase = MissReqPortCount - backendParams.HyuCnt 994 995 //---------------------------------------- 996 // core modules 997 val ldu = Seq.tabulate(LoadPipelineWidth)({ i => Module(new LoadPipe(i))}) 998 val stu = Seq.tabulate(StorePipelineWidth)({ i => Module(new StorePipe(i))}) 999 val mainPipe = Module(new MainPipe) 1000 // val refillPipe = Module(new RefillPipe) 1001 val missQueue = Module(new MissQueue(edge, MissReqPortCount)) 1002 val probeQueue = Module(new ProbeQueue(edge)) 1003 val wb = Module(new WritebackQueue(edge)) 1004 1005 missQueue.io.lqEmpty := io.lqEmpty 1006 missQueue.io.hartId := io.hartId 1007 missQueue.io.l2_pf_store_only := RegNext(io.l2_pf_store_only, false.B) 1008 missQueue.io.debugTopDown <> io.debugTopDown 1009 missQueue.io.l2_hint <> RegNext(io.l2_hint) 1010 missQueue.io.mainpipe_info := mainPipe.io.mainpipe_info 1011 mainPipe.io.refill_info := missQueue.io.refill_info 1012 mainPipe.io.replace_block := missQueue.io.replace_block 1013 mainPipe.io.sms_agt_evict_req <> io.sms_agt_evict_req 1014 io.memSetPattenDetected := missQueue.io.memSetPattenDetected 1015 1016 // l1 dcache controller 1017 outer.cacheCtrlOpt.foreach { 1018 case mod => 1019 mod.module.io_pseudoError.foreach { 1020 case x => x.ready := false.B 1021 } 1022 } 1023 ldu.foreach { 1024 case mod => 1025 mod.io.pseudo_error.valid := false.B 1026 mod.io.pseudo_error.bits := DontCare 1027 } 1028 mainPipe.io.pseudo_error.valid := false.B 1029 mainPipe.io.pseudo_error.bits := DontCare 1030 bankedDataArray.io.pseudo_error.valid := false.B 1031 bankedDataArray.io.pseudo_error.bits := DontCare 1032 1033 // pseudo tag ecc error 1034 if (outer.cacheCtrlOpt.nonEmpty && EnableTagEcc) { 1035 val ctrlUnit = outer.cacheCtrlOpt.head.module 1036 ldu.map(mod => mod.io.pseudo_error <> ctrlUnit.io_pseudoError(0)) 1037 mainPipe.io.pseudo_error <> ctrlUnit.io_pseudoError(0) 1038 ctrlUnit.io_pseudoError(0).ready := mainPipe.io.pseudo_tag_error_inj_done || 1039 ldu.map(_.io.pseudo_tag_error_inj_done).reduce(_|_) 1040 } 1041 1042 // pseudo data ecc error 1043 if (outer.cacheCtrlOpt.nonEmpty && EnableDataEcc) { 1044 val ctrlUnit = outer.cacheCtrlOpt.head.module 1045 bankedDataArray.io.pseudo_error <> ctrlUnit.io_pseudoError(1) 1046 ctrlUnit.io_pseudoError(1).ready := bankedDataArray.io.pseudo_error.ready && 1047 (mainPipe.io.pseudo_data_error_inj_done || 1048 ldu.map(_.io.pseudo_data_error_inj_done).reduce(_|_)) 1049 } 1050 1051 val errors = ldu.map(_.io.error) ++ // load error 1052 Seq(mainPipe.io.error) // store / misc error 1053 val error_valid = errors.map(e => e.valid).reduce(_|_) 1054 io.error.bits <> RegEnable( 1055 Mux1H(errors.map(e => RegNext(e.valid) -> RegEnable(e.bits, e.valid))), 1056 RegNext(error_valid)) 1057 io.error.valid := RegNext(RegNext(error_valid, init = false.B), init = false.B) 1058 1059 //---------------------------------------- 1060 // meta array 1061 val HybridLoadReadBase = LoadPipelineWidth - backendParams.HyuCnt 1062 val HybridStoreReadBase = StorePipelineWidth - backendParams.HyuCnt 1063 1064 val hybrid_meta_read_ports = Wire(Vec(backendParams.HyuCnt, DecoupledIO(new MetaReadReq))) 1065 val hybrid_meta_resp_ports = Wire(Vec(backendParams.HyuCnt, ldu(0).io.meta_resp.cloneType)) 1066 for (i <- 0 until backendParams.HyuCnt) { 1067 val HybridLoadMetaReadPort = HybridLoadReadBase + i 1068 val HybridStoreMetaReadPort = HybridStoreReadBase + i 1069 1070 hybrid_meta_read_ports(i).valid := ldu(HybridLoadMetaReadPort).io.meta_read.valid || 1071 (stu(HybridStoreMetaReadPort).io.meta_read.valid && StorePrefetchL1Enabled.B) 1072 hybrid_meta_read_ports(i).bits := Mux(ldu(HybridLoadMetaReadPort).io.meta_read.valid, ldu(HybridLoadMetaReadPort).io.meta_read.bits, 1073 stu(HybridStoreMetaReadPort).io.meta_read.bits) 1074 1075 ldu(HybridLoadMetaReadPort).io.meta_read.ready := hybrid_meta_read_ports(i).ready 1076 stu(HybridStoreMetaReadPort).io.meta_read.ready := hybrid_meta_read_ports(i).ready && StorePrefetchL1Enabled.B 1077 1078 ldu(HybridLoadMetaReadPort).io.meta_resp := hybrid_meta_resp_ports(i) 1079 stu(HybridStoreMetaReadPort).io.meta_resp := hybrid_meta_resp_ports(i) 1080 } 1081 1082 // read / write coh meta 1083 val meta_read_ports = ldu.map(_.io.meta_read).take(HybridLoadReadBase) ++ 1084 Seq(mainPipe.io.meta_read) ++ 1085 stu.map(_.io.meta_read).take(HybridStoreReadBase) ++ hybrid_meta_read_ports 1086 1087 val meta_resp_ports = ldu.map(_.io.meta_resp).take(HybridLoadReadBase) ++ 1088 Seq(mainPipe.io.meta_resp) ++ 1089 stu.map(_.io.meta_resp).take(HybridStoreReadBase) ++ hybrid_meta_resp_ports 1090 1091 val meta_write_ports = Seq( 1092 mainPipe.io.meta_write 1093 // refillPipe.io.meta_write 1094 ) 1095 if(StorePrefetchL1Enabled) { 1096 meta_read_ports.zip(metaArray.io.read).foreach { case (p, r) => r <> p } 1097 meta_resp_ports.zip(metaArray.io.resp).foreach { case (p, r) => p := r } 1098 } else { 1099 (meta_read_ports.take(HybridLoadReadBase + 1) ++ 1100 meta_read_ports.takeRight(backendParams.HyuCnt)).zip(metaArray.io.read).foreach { case (p, r) => r <> p } 1101 (meta_resp_ports.take(HybridLoadReadBase + 1) ++ 1102 meta_resp_ports.takeRight(backendParams.HyuCnt)).zip(metaArray.io.resp).foreach { case (p, r) => p := r } 1103 1104 meta_read_ports.drop(HybridLoadReadBase + 1).take(HybridStoreReadBase).foreach { case p => p.ready := false.B } 1105 meta_resp_ports.drop(HybridLoadReadBase + 1).take(HybridStoreReadBase).foreach { case p => p := 0.U.asTypeOf(p) } 1106 } 1107 meta_write_ports.zip(metaArray.io.write).foreach { case (p, w) => w <> p } 1108 1109 // read extra meta (exclude stu) 1110 (meta_read_ports.take(HybridLoadReadBase + 1) ++ 1111 meta_read_ports.takeRight(backendParams.HyuCnt)).zip(errorArray.io.read).foreach { case (p, r) => r <> p } 1112 (meta_read_ports.take(HybridLoadReadBase + 1) ++ 1113 meta_read_ports.takeRight(backendParams.HyuCnt)).zip(prefetchArray.io.read).foreach { case (p, r) => r <> p } 1114 (meta_read_ports.take(HybridLoadReadBase + 1) ++ 1115 meta_read_ports.takeRight(backendParams.HyuCnt)).zip(accessArray.io.read).foreach { case (p, r) => r <> p } 1116 val extra_meta_resp_ports = ldu.map(_.io.extra_meta_resp).take(HybridLoadReadBase) ++ 1117 Seq(mainPipe.io.extra_meta_resp) ++ 1118 ldu.map(_.io.extra_meta_resp).takeRight(backendParams.HyuCnt) 1119 extra_meta_resp_ports.zip(errorArray.io.resp).foreach { case (p, r) => { 1120 (0 until nWays).map(i => { p(i).error := r(i) }) 1121 }} 1122 extra_meta_resp_ports.zip(prefetchArray.io.resp).foreach { case (p, r) => { 1123 (0 until nWays).map(i => { p(i).prefetch := r(i) }) 1124 }} 1125 extra_meta_resp_ports.zip(accessArray.io.resp).foreach { case (p, r) => { 1126 (0 until nWays).map(i => { p(i).access := r(i) }) 1127 }} 1128 1129 if(LoadPrefetchL1Enabled) { 1130 // use last port to read prefetch and access flag 1131// prefetchArray.io.read.last.valid := refillPipe.io.prefetch_flag_write.valid 1132// prefetchArray.io.read.last.bits.idx := refillPipe.io.prefetch_flag_write.bits.idx 1133// prefetchArray.io.read.last.bits.way_en := refillPipe.io.prefetch_flag_write.bits.way_en 1134// 1135// accessArray.io.read.last.valid := refillPipe.io.prefetch_flag_write.valid 1136// accessArray.io.read.last.bits.idx := refillPipe.io.prefetch_flag_write.bits.idx 1137// accessArray.io.read.last.bits.way_en := refillPipe.io.prefetch_flag_write.bits.way_en 1138 prefetchArray.io.read.last.valid := mainPipe.io.prefetch_flag_write.valid 1139 prefetchArray.io.read.last.bits.idx := mainPipe.io.prefetch_flag_write.bits.idx 1140 prefetchArray.io.read.last.bits.way_en := mainPipe.io.prefetch_flag_write.bits.way_en 1141 1142 accessArray.io.read.last.valid := mainPipe.io.prefetch_flag_write.valid 1143 accessArray.io.read.last.bits.idx := mainPipe.io.prefetch_flag_write.bits.idx 1144 accessArray.io.read.last.bits.way_en := mainPipe.io.prefetch_flag_write.bits.way_en 1145 1146 val extra_flag_valid = RegNext(mainPipe.io.prefetch_flag_write.valid) 1147 val extra_flag_way_en = RegEnable(mainPipe.io.prefetch_flag_write.bits.way_en, mainPipe.io.prefetch_flag_write.valid) 1148 val extra_flag_prefetch = Mux1H(extra_flag_way_en, prefetchArray.io.resp.last) 1149 val extra_flag_access = Mux1H(extra_flag_way_en, accessArray.io.resp.last) 1150 1151 prefetcherMonitor.io.validity.good_prefetch := extra_flag_valid && isPrefetchRelated(extra_flag_prefetch) && extra_flag_access 1152 prefetcherMonitor.io.validity.bad_prefetch := extra_flag_valid && isPrefetchRelated(extra_flag_prefetch) && !extra_flag_access 1153 } 1154 1155 // write extra meta 1156 val error_flag_write_ports = Seq( 1157 mainPipe.io.error_flag_write // error flag generated by corrupted store 1158 // refillPipe.io.error_flag_write // corrupted signal from l2 1159 ) 1160 error_flag_write_ports.zip(errorArray.io.write).foreach { case (p, w) => w <> p } 1161 1162 val prefetch_flag_write_ports = ldu.map(_.io.prefetch_flag_write) ++ Seq( 1163 mainPipe.io.prefetch_flag_write // set prefetch_flag to false if coh is set to Nothing 1164 // refillPipe.io.prefetch_flag_write // refill required by prefetch will set prefetch_flag 1165 ) 1166 prefetch_flag_write_ports.zip(prefetchArray.io.write).foreach { case (p, w) => w <> p } 1167 1168 // FIXME: add hybrid unit? 1169 val same_cycle_update_pf_flag = ldu(0).io.prefetch_flag_write.valid && ldu(1).io.prefetch_flag_write.valid && (ldu(0).io.prefetch_flag_write.bits.idx === ldu(1).io.prefetch_flag_write.bits.idx) && (ldu(0).io.prefetch_flag_write.bits.way_en === ldu(1).io.prefetch_flag_write.bits.way_en) 1170 XSPerfAccumulate("same_cycle_update_pf_flag", same_cycle_update_pf_flag) 1171 1172 val access_flag_write_ports = ldu.map(_.io.access_flag_write) ++ Seq( 1173 mainPipe.io.access_flag_write 1174 // refillPipe.io.access_flag_write 1175 ) 1176 access_flag_write_ports.zip(accessArray.io.write).foreach { case (p, w) => w <> p } 1177 1178 //---------------------------------------- 1179 // tag array 1180 if(StorePrefetchL1Enabled) { 1181 require(tagArray.io.read.size == (LoadPipelineWidth + StorePipelineWidth - backendParams.HyuCnt + 1)) 1182 }else { 1183 require(tagArray.io.read.size == (LoadPipelineWidth + 1)) 1184 } 1185 // val tag_write_intend = missQueue.io.refill_pipe_req.valid || mainPipe.io.tag_write_intend 1186 val tag_write_intend = mainPipe.io.tag_write_intend 1187 assert(!RegNext(!tag_write_intend && tagArray.io.write.valid)) 1188 ldu.take(HybridLoadReadBase).zipWithIndex.foreach { 1189 case (ld, i) => 1190 tagArray.io.read(i) <> ld.io.tag_read 1191 ld.io.tag_resp := tagArray.io.resp(i) 1192 ld.io.tag_read.ready := !tag_write_intend 1193 } 1194 if(StorePrefetchL1Enabled) { 1195 stu.take(HybridStoreReadBase).zipWithIndex.foreach { 1196 case (st, i) => 1197 tagArray.io.read(HybridLoadReadBase + i) <> st.io.tag_read 1198 st.io.tag_resp := tagArray.io.resp(HybridLoadReadBase + i) 1199 st.io.tag_read.ready := !tag_write_intend 1200 } 1201 }else { 1202 stu.foreach { 1203 case st => 1204 st.io.tag_read.ready := false.B 1205 st.io.tag_resp := 0.U.asTypeOf(st.io.tag_resp) 1206 } 1207 } 1208 for (i <- 0 until backendParams.HyuCnt) { 1209 val HybridLoadTagReadPort = HybridLoadReadBase + i 1210 val HybridStoreTagReadPort = HybridStoreReadBase + i 1211 val TagReadPort = 1212 if (EnableStorePrefetchSPB) 1213 HybridLoadReadBase + HybridStoreReadBase + i 1214 else 1215 HybridLoadReadBase + i 1216 1217 // read tag 1218 ldu(HybridLoadTagReadPort).io.tag_read.ready := false.B 1219 stu(HybridStoreTagReadPort).io.tag_read.ready := false.B 1220 1221 if (StorePrefetchL1Enabled) { 1222 when (ldu(HybridLoadTagReadPort).io.tag_read.valid) { 1223 tagArray.io.read(TagReadPort) <> ldu(HybridLoadTagReadPort).io.tag_read 1224 ldu(HybridLoadTagReadPort).io.tag_read.ready := !tag_write_intend 1225 } .otherwise { 1226 tagArray.io.read(TagReadPort) <> stu(HybridStoreTagReadPort).io.tag_read 1227 stu(HybridStoreTagReadPort).io.tag_read.ready := !tag_write_intend 1228 } 1229 } else { 1230 tagArray.io.read(TagReadPort) <> ldu(HybridLoadTagReadPort).io.tag_read 1231 ldu(HybridLoadTagReadPort).io.tag_read.ready := !tag_write_intend 1232 } 1233 1234 // tag resp 1235 ldu(HybridLoadTagReadPort).io.tag_resp := tagArray.io.resp(TagReadPort) 1236 stu(HybridStoreTagReadPort).io.tag_resp := tagArray.io.resp(TagReadPort) 1237 } 1238 tagArray.io.read.last <> mainPipe.io.tag_read 1239 mainPipe.io.tag_resp := tagArray.io.resp.last 1240 1241 val fake_tag_read_conflict_this_cycle = PopCount(ldu.map(ld=> ld.io.tag_read.valid)) 1242 XSPerfAccumulate("fake_tag_read_conflict", fake_tag_read_conflict_this_cycle) 1243 1244 val tag_write_arb = Module(new Arbiter(new TagWriteReq, 1)) 1245 // tag_write_arb.io.in(0) <> refillPipe.io.tag_write 1246 tag_write_arb.io.in(0) <> mainPipe.io.tag_write 1247 tagArray.io.write <> tag_write_arb.io.out 1248 1249 ldu.map(m => { 1250 m.io.vtag_update.valid := tagArray.io.write.valid 1251 m.io.vtag_update.bits := tagArray.io.write.bits 1252 }) 1253 1254 //---------------------------------------- 1255 // data array 1256 mainPipe.io.data_read.zip(ldu).map(x => x._1 := x._2.io.lsu.req.valid) 1257 1258 val dataWriteArb = Module(new Arbiter(new L1BankedDataWriteReq, 1)) 1259 // dataWriteArb.io.in(0) <> refillPipe.io.data_write 1260 dataWriteArb.io.in(0) <> mainPipe.io.data_write 1261 1262 bankedDataArray.io.write <> dataWriteArb.io.out 1263 1264 for (bank <- 0 until DCacheBanks) { 1265 val dataWriteArb_dup = Module(new Arbiter(new L1BankedDataWriteReqCtrl, 1)) 1266 // dataWriteArb_dup.io.in(0).valid := refillPipe.io.data_write_dup(bank).valid 1267 // dataWriteArb_dup.io.in(0).bits := refillPipe.io.data_write_dup(bank).bits 1268 dataWriteArb_dup.io.in(0).valid := mainPipe.io.data_write_dup(bank).valid 1269 dataWriteArb_dup.io.in(0).bits := mainPipe.io.data_write_dup(bank).bits 1270 1271 bankedDataArray.io.write_dup(bank) <> dataWriteArb_dup.io.out 1272 } 1273 1274 bankedDataArray.io.readline <> mainPipe.io.data_readline 1275 bankedDataArray.io.readline_intend := mainPipe.io.data_read_intend 1276 mainPipe.io.readline_error_delayed := bankedDataArray.io.readline_error_delayed 1277 mainPipe.io.data_resp := bankedDataArray.io.readline_resp 1278 1279 (0 until LoadPipelineWidth).map(i => { 1280 bankedDataArray.io.read(i) <> ldu(i).io.banked_data_read 1281 bankedDataArray.io.is128Req(i) <> ldu(i).io.is128Req 1282 bankedDataArray.io.read_error_delayed(i) <> ldu(i).io.read_error_delayed 1283 1284 ldu(i).io.banked_data_resp := bankedDataArray.io.read_resp(i) 1285 1286 ldu(i).io.bank_conflict_slow := bankedDataArray.io.bank_conflict_slow(i) 1287 }) 1288 val isKeyword = bus.d.bits.echo.lift(IsKeywordKey).getOrElse(false.B) 1289 (0 until LoadPipelineWidth).map(i => { 1290 val (_, _, done, _) = edge.count(bus.d) 1291 when(bus.d.bits.opcode === TLMessages.GrantData) { 1292 io.lsu.forward_D(i).apply(bus.d.valid, bus.d.bits.data, bus.d.bits.source, isKeyword ^ done) 1293 // io.lsu.forward_D(i).apply(bus.d.valid, bus.d.bits.data, bus.d.bits.source,done) 1294 }.otherwise { 1295 io.lsu.forward_D(i).dontCare() 1296 } 1297 }) 1298 // tl D channel wakeup 1299 val (_, _, done, _) = edge.count(bus.d) 1300 when (bus.d.bits.opcode === TLMessages.GrantData || bus.d.bits.opcode === TLMessages.Grant) { 1301 io.lsu.tl_d_channel.apply(bus.d.valid, bus.d.bits.data, bus.d.bits.source, done) 1302 } .otherwise { 1303 io.lsu.tl_d_channel.dontCare() 1304 } 1305 mainPipe.io.force_write <> io.force_write 1306 1307 /** dwpu */ 1308 if (dwpuParam.enWPU) { 1309 val dwpu = Module(new DCacheWpuWrapper(LoadPipelineWidth)) 1310 for(i <- 0 until LoadPipelineWidth){ 1311 dwpu.io.req(i) <> ldu(i).io.dwpu.req(0) 1312 dwpu.io.resp(i) <> ldu(i).io.dwpu.resp(0) 1313 dwpu.io.lookup_upd(i) <> ldu(i).io.dwpu.lookup_upd(0) 1314 dwpu.io.cfpred(i) <> ldu(i).io.dwpu.cfpred(0) 1315 } 1316 dwpu.io.tagwrite_upd.valid := tagArray.io.write.valid 1317 dwpu.io.tagwrite_upd.bits.vaddr := tagArray.io.write.bits.vaddr 1318 dwpu.io.tagwrite_upd.bits.s1_real_way_en := tagArray.io.write.bits.way_en 1319 } else { 1320 for(i <- 0 until LoadPipelineWidth){ 1321 ldu(i).io.dwpu.req(0).ready := true.B 1322 ldu(i).io.dwpu.resp(0).valid := false.B 1323 ldu(i).io.dwpu.resp(0).bits := DontCare 1324 } 1325 } 1326 1327 //---------------------------------------- 1328 // load pipe 1329 // the s1 kill signal 1330 // only lsu uses this, replay never kills 1331 for (w <- 0 until LoadPipelineWidth) { 1332 ldu(w).io.lsu <> io.lsu.load(w) 1333 1334 // TODO:when have load128Req 1335 ldu(w).io.load128Req := io.lsu.load(w).is128Req 1336 1337 // replay and nack not needed anymore 1338 // TODO: remove replay and nack 1339 ldu(w).io.nack := false.B 1340 1341 ldu(w).io.disable_ld_fast_wakeup := 1342 bankedDataArray.io.disable_ld_fast_wakeup(w) // load pipe fast wake up should be disabled when bank conflict 1343 } 1344 1345 prefetcherMonitor.io.timely.total_prefetch := ldu.map(_.io.prefetch_info.naive.total_prefetch).reduce(_ || _) 1346 prefetcherMonitor.io.timely.late_hit_prefetch := ldu.map(_.io.prefetch_info.naive.late_hit_prefetch).reduce(_ || _) 1347 prefetcherMonitor.io.timely.late_miss_prefetch := missQueue.io.prefetch_info.naive.late_miss_prefetch 1348 prefetcherMonitor.io.timely.prefetch_hit := PopCount(ldu.map(_.io.prefetch_info.naive.prefetch_hit)) 1349 io.pf_ctrl <> prefetcherMonitor.io.pf_ctrl 1350 XSPerfAccumulate("useless_prefetch", ldu.map(_.io.prefetch_info.naive.total_prefetch).reduce(_ || _) && !(ldu.map(_.io.prefetch_info.naive.useful_prefetch).reduce(_ || _))) 1351 XSPerfAccumulate("useful_prefetch", ldu.map(_.io.prefetch_info.naive.useful_prefetch).reduce(_ || _)) 1352 XSPerfAccumulate("late_prefetch_hit", ldu.map(_.io.prefetch_info.naive.late_prefetch_hit).reduce(_ || _)) 1353 XSPerfAccumulate("late_load_hit", ldu.map(_.io.prefetch_info.naive.late_load_hit).reduce(_ || _)) 1354 1355 /** LoadMissDB: record load miss state */ 1356 val hartId = p(XSCoreParamsKey).HartId 1357 val isWriteLoadMissTable = Constantin.createRecord(s"isWriteLoadMissTable$hartId") 1358 val isFirstHitWrite = Constantin.createRecord(s"isFirstHitWrite$hartId") 1359 val tableName = s"LoadMissDB$hartId" 1360 val siteName = s"DcacheWrapper$hartId" 1361 val loadMissTable = ChiselDB.createTable(tableName, new LoadMissEntry) 1362 for( i <- 0 until LoadPipelineWidth){ 1363 val loadMissEntry = Wire(new LoadMissEntry) 1364 val loadMissWriteEn = 1365 (!ldu(i).io.lsu.resp.bits.replay && ldu(i).io.miss_req.fire) || 1366 (ldu(i).io.lsu.s2_first_hit && ldu(i).io.lsu.resp.valid && isFirstHitWrite.orR) 1367 loadMissEntry.timeCnt := GTimer() 1368 loadMissEntry.robIdx := ldu(i).io.lsu.resp.bits.debug_robIdx 1369 loadMissEntry.paddr := ldu(i).io.miss_req.bits.addr 1370 loadMissEntry.vaddr := ldu(i).io.miss_req.bits.vaddr 1371 loadMissEntry.missState := OHToUInt(Cat(Seq( 1372 ldu(i).io.miss_req.fire & ldu(i).io.miss_resp.merged, 1373 ldu(i).io.miss_req.fire & !ldu(i).io.miss_resp.merged, 1374 ldu(i).io.lsu.s2_first_hit && ldu(i).io.lsu.resp.valid 1375 ))) 1376 loadMissTable.log( 1377 data = loadMissEntry, 1378 en = isWriteLoadMissTable.orR && loadMissWriteEn, 1379 site = siteName, 1380 clock = clock, 1381 reset = reset 1382 ) 1383 } 1384 1385 val isWriteLoadAccessTable = Constantin.createRecord(s"isWriteLoadAccessTable$hartId") 1386 val loadAccessTable = ChiselDB.createTable(s"LoadAccessDB$hartId", new LoadAccessEntry) 1387 for (i <- 0 until LoadPipelineWidth) { 1388 val loadAccessEntry = Wire(new LoadAccessEntry) 1389 loadAccessEntry.timeCnt := GTimer() 1390 loadAccessEntry.robIdx := ldu(i).io.lsu.resp.bits.debug_robIdx 1391 loadAccessEntry.paddr := ldu(i).io.miss_req.bits.addr 1392 loadAccessEntry.vaddr := ldu(i).io.miss_req.bits.vaddr 1393 loadAccessEntry.missState := OHToUInt(Cat(Seq( 1394 ldu(i).io.miss_req.fire & ldu(i).io.miss_resp.merged, 1395 ldu(i).io.miss_req.fire & !ldu(i).io.miss_resp.merged, 1396 ldu(i).io.lsu.s2_first_hit && ldu(i).io.lsu.resp.valid 1397 ))) 1398 loadAccessEntry.pred_way_num := ldu(i).io.lsu.debug_s2_pred_way_num 1399 loadAccessEntry.real_way_num := ldu(i).io.lsu.debug_s2_real_way_num 1400 loadAccessEntry.dm_way_num := ldu(i).io.lsu.debug_s2_dm_way_num 1401 loadAccessTable.log( 1402 data = loadAccessEntry, 1403 en = isWriteLoadAccessTable.orR && ldu(i).io.lsu.resp.valid, 1404 site = siteName + "_loadpipe" + i.toString, 1405 clock = clock, 1406 reset = reset 1407 ) 1408 } 1409 1410 //---------------------------------------- 1411 // Sta pipe 1412 for (w <- 0 until StorePipelineWidth) { 1413 stu(w).io.lsu <> io.lsu.sta(w) 1414 } 1415 1416 //---------------------------------------- 1417 // atomics 1418 // atomics not finished yet 1419 val atomic_resp_valid = mainPipe.io.atomic_resp.valid && mainPipe.io.atomic_resp.bits.isAMO 1420 io.lsu.atomics.resp.valid := RegNext(atomic_resp_valid) 1421 io.lsu.atomics.resp.bits := RegEnable(mainPipe.io.atomic_resp.bits, atomic_resp_valid) 1422 io.lsu.atomics.block_lr := mainPipe.io.block_lr 1423 1424 // Request 1425 val missReqArb = Module(new TreeArbiter(new MissReq, MissReqPortCount)) 1426 // seperately generating miss queue enq ready for better timeing 1427 val missReadyGen = Module(new MissReadyGen(MissReqPortCount)) 1428 1429 missReqArb.io.in(MainPipeMissReqPort) <> mainPipe.io.miss_req 1430 missReadyGen.io.in(MainPipeMissReqPort) <> mainPipe.io.miss_req 1431 for (w <- 0 until backendParams.LduCnt) { 1432 missReqArb.io.in(w + 1) <> ldu(w).io.miss_req 1433 missReadyGen.io.in(w + 1) <> ldu(w).io.miss_req 1434 } 1435 1436 for (w <- 0 until LoadPipelineWidth) { ldu(w).io.miss_resp := missQueue.io.resp } 1437 mainPipe.io.miss_resp := missQueue.io.resp 1438 1439 if(StorePrefetchL1Enabled) { 1440 for (w <- 0 until backendParams.StaCnt) { 1441 missReqArb.io.in(1 + backendParams.LduCnt + w) <> stu(w).io.miss_req 1442 missReadyGen.io.in(1 + backendParams.LduCnt + w) <> stu(w).io.miss_req 1443 } 1444 }else { 1445 for (w <- 0 until backendParams.StaCnt) { stu(w).io.miss_req.ready := false.B } 1446 } 1447 1448 for (i <- 0 until backendParams.HyuCnt) { 1449 val HybridLoadReqPort = HybridLoadReadBase + i 1450 val HybridStoreReqPort = HybridStoreReadBase + i 1451 val HybridMissReqPort = HybridMissReqBase + i 1452 1453 ldu(HybridLoadReqPort).io.miss_req.ready := false.B 1454 stu(HybridStoreReqPort).io.miss_req.ready := false.B 1455 1456 if (StorePrefetchL1Enabled) { 1457 when (ldu(HybridLoadReqPort).io.miss_req.valid) { 1458 missReqArb.io.in(HybridMissReqPort) <> ldu(HybridLoadReqPort).io.miss_req 1459 missReadyGen.io.in(HybridMissReqPort) <> ldu(HybridLoadReqPort).io.miss_req 1460 } .otherwise { 1461 missReqArb.io.in(HybridMissReqPort) <> stu(HybridStoreReqPort).io.miss_req 1462 missReadyGen.io.in(HybridMissReqPort) <> stu(HybridStoreReqPort).io.miss_req 1463 } 1464 } else { 1465 missReqArb.io.in(HybridMissReqPort) <> ldu(HybridLoadReqPort).io.miss_req 1466 missReadyGen.io.in(HybridMissReqPort) <> ldu(HybridLoadReqPort).io.miss_req 1467 } 1468 } 1469 1470 for(w <- 0 until LoadPipelineWidth) { 1471 wb.io.miss_req_conflict_check(w) := ldu(w).io.wbq_conflict_check 1472 ldu(w).io.wbq_block_miss_req := wb.io.block_miss_req(w) 1473 } 1474 1475 wb.io.miss_req_conflict_check(3) := mainPipe.io.wbq_conflict_check 1476 mainPipe.io.wbq_block_miss_req := wb.io.block_miss_req(3) 1477 1478 wb.io.miss_req_conflict_check(4).valid := missReqArb.io.out.valid 1479 wb.io.miss_req_conflict_check(4).bits := missReqArb.io.out.bits.addr 1480 missQueue.io.wbq_block_miss_req := wb.io.block_miss_req(4) 1481 1482 missReqArb.io.out <> missQueue.io.req 1483 missReadyGen.io.queryMQ <> missQueue.io.queryMQ 1484 io.cmoOpReq <> missQueue.io.cmo_req 1485 io.cmoOpResp <> missQueue.io.cmo_resp 1486 1487 for (w <- 0 until LoadPipelineWidth) { ldu(w).io.mq_enq_cancel := missQueue.io.mq_enq_cancel } 1488 1489 XSPerfAccumulate("miss_queue_fire", PopCount(VecInit(missReqArb.io.in.map(_.fire))) >= 1.U) 1490 XSPerfAccumulate("miss_queue_muti_fire", PopCount(VecInit(missReqArb.io.in.map(_.fire))) > 1.U) 1491 1492 XSPerfAccumulate("miss_queue_has_enq_req", PopCount(VecInit(missReqArb.io.in.map(_.valid))) >= 1.U) 1493 XSPerfAccumulate("miss_queue_has_muti_enq_req", PopCount(VecInit(missReqArb.io.in.map(_.valid))) > 1.U) 1494 XSPerfAccumulate("miss_queue_has_muti_enq_but_not_fire", PopCount(VecInit(missReqArb.io.in.map(_.valid))) > 1.U && PopCount(VecInit(missReqArb.io.in.map(_.fire))) === 0.U) 1495 1496 // forward missqueue 1497 (0 until LoadPipelineWidth).map(i => io.lsu.forward_mshr(i).connect(missQueue.io.forward(i))) 1498 1499 // refill to load queue 1500 // io.lsu.lsq <> missQueue.io.refill_to_ldq 1501 1502 // tilelink stuff 1503 bus.a <> missQueue.io.mem_acquire 1504 bus.e <> missQueue.io.mem_finish 1505 missQueue.io.probe_addr := bus.b.bits.address 1506 missQueue.io.replace_addr := mainPipe.io.replace_addr 1507 1508 missQueue.io.main_pipe_resp.valid := RegNext(mainPipe.io.atomic_resp.valid) 1509 missQueue.io.main_pipe_resp.bits := RegEnable(mainPipe.io.atomic_resp.bits, mainPipe.io.atomic_resp.valid) 1510 1511 //---------------------------------------- 1512 // probe 1513 // probeQueue.io.mem_probe <> bus.b 1514 block_decoupled(bus.b, probeQueue.io.mem_probe, missQueue.io.probe_block) 1515 probeQueue.io.lrsc_locked_block <> mainPipe.io.lrsc_locked_block 1516 probeQueue.io.update_resv_set <> mainPipe.io.update_resv_set 1517 1518 val refill_req = RegNext(missQueue.io.main_pipe_req.valid && ((missQueue.io.main_pipe_req.bits.isLoad) | (missQueue.io.main_pipe_req.bits.isStore))) 1519 //---------------------------------------- 1520 // mainPipe 1521 // when a req enters main pipe, if it is set-conflict with replace pipe or refill pipe, 1522 // block the req in main pipe 1523 probeQueue.io.pipe_req <> mainPipe.io.probe_req 1524 io.lsu.store.req <> mainPipe.io.store_req 1525 1526 io.lsu.store.replay_resp.valid := RegNext(mainPipe.io.store_replay_resp.valid) 1527 io.lsu.store.replay_resp.bits := RegEnable(mainPipe.io.store_replay_resp.bits, mainPipe.io.store_replay_resp.valid) 1528 io.lsu.store.main_pipe_hit_resp := mainPipe.io.store_hit_resp 1529 1530 mainPipe.io.atomic_req <> io.lsu.atomics.req 1531 1532 mainPipe.io.invalid_resv_set := RegNext( 1533 wb.io.req.fire && 1534 wb.io.req.bits.addr === mainPipe.io.lrsc_locked_block.bits && 1535 mainPipe.io.lrsc_locked_block.valid 1536 ) 1537 1538 //---------------------------------------- 1539 // replace (main pipe) 1540 val mpStatus = mainPipe.io.status 1541 mainPipe.io.refill_req <> missQueue.io.main_pipe_req 1542 1543 mainPipe.io.data_write_ready_dup := VecInit(Seq.fill(nDupDataWriteReady)(true.B)) 1544 mainPipe.io.tag_write_ready_dup := VecInit(Seq.fill(nDupDataWriteReady)(true.B)) 1545 mainPipe.io.wb_ready_dup := wb.io.req_ready_dup 1546 1547 //---------------------------------------- 1548 // wb 1549 // add a queue between MainPipe and WritebackUnit to reduce MainPipe stalls due to WritebackUnit busy 1550 1551 wb.io.req <> mainPipe.io.wb 1552 bus.c <> wb.io.mem_release 1553 // wb.io.release_wakeup := refillPipe.io.release_wakeup 1554 // wb.io.release_update := mainPipe.io.release_update 1555 //wb.io.probe_ttob_check_req <> mainPipe.io.probe_ttob_check_req 1556 //wb.io.probe_ttob_check_resp <> mainPipe.io.probe_ttob_check_resp 1557 1558 io.lsu.release.valid := RegNext(wb.io.req.fire) 1559 io.lsu.release.bits.paddr := RegEnable(wb.io.req.bits.addr, wb.io.req.fire) 1560 // Note: RegNext() is required by: 1561 // * load queue released flag update logic 1562 // * load / load violation check logic 1563 // * and timing requirements 1564 // CHANGE IT WITH CARE 1565 1566 // connect bus d 1567 missQueue.io.mem_grant.valid := false.B 1568 missQueue.io.mem_grant.bits := DontCare 1569 1570 wb.io.mem_grant.valid := false.B 1571 wb.io.mem_grant.bits := DontCare 1572 1573 // in L1DCache, we ony expect Grant[Data] and ReleaseAck 1574 bus.d.ready := false.B 1575 when (bus.d.bits.opcode === TLMessages.Grant || bus.d.bits.opcode === TLMessages.GrantData || bus.d.bits.opcode === TLMessages.CBOAck) { 1576 missQueue.io.mem_grant <> bus.d 1577 } .elsewhen (bus.d.bits.opcode === TLMessages.ReleaseAck) { 1578 wb.io.mem_grant <> bus.d 1579 } .otherwise { 1580 assert (!bus.d.fire) 1581 } 1582 1583 //---------------------------------------- 1584 // Feedback Direct Prefetch Monitor 1585 fdpMonitor.io.refill := missQueue.io.prefetch_info.fdp.prefetch_monitor_cnt 1586 fdpMonitor.io.timely.late_prefetch := missQueue.io.prefetch_info.fdp.late_miss_prefetch 1587 fdpMonitor.io.accuracy.total_prefetch := missQueue.io.prefetch_info.fdp.total_prefetch 1588 for (w <- 0 until LoadPipelineWidth) { 1589 if(w == 0) { 1590 fdpMonitor.io.accuracy.useful_prefetch(w) := ldu(w).io.prefetch_info.fdp.useful_prefetch 1591 }else { 1592 fdpMonitor.io.accuracy.useful_prefetch(w) := Mux(same_cycle_update_pf_flag, false.B, ldu(w).io.prefetch_info.fdp.useful_prefetch) 1593 } 1594 } 1595 for (w <- 0 until LoadPipelineWidth) { fdpMonitor.io.pollution.cache_pollution(w) := ldu(w).io.prefetch_info.fdp.pollution } 1596 for (w <- 0 until LoadPipelineWidth) { fdpMonitor.io.pollution.demand_miss(w) := ldu(w).io.prefetch_info.fdp.demand_miss } 1597 fdpMonitor.io.debugRolling := io.debugRolling 1598 1599 //---------------------------------------- 1600 // Bloom Filter 1601 // bloomFilter.io.set <> missQueue.io.bloom_filter_query.set 1602 // bloomFilter.io.clr <> missQueue.io.bloom_filter_query.clr 1603 bloomFilter.io.set <> mainPipe.io.bloom_filter_query.set 1604 bloomFilter.io.clr <> mainPipe.io.bloom_filter_query.clr 1605 1606 for (w <- 0 until LoadPipelineWidth) { bloomFilter.io.query(w) <> ldu(w).io.bloom_filter_query.query } 1607 for (w <- 0 until LoadPipelineWidth) { bloomFilter.io.resp(w) <> ldu(w).io.bloom_filter_query.resp } 1608 1609 for (w <- 0 until LoadPipelineWidth) { counterFilter.io.ld_in(w) <> ldu(w).io.counter_filter_enq } 1610 for (w <- 0 until LoadPipelineWidth) { counterFilter.io.query(w) <> ldu(w).io.counter_filter_query } 1611 1612 //---------------------------------------- 1613 // replacement algorithm 1614 val replacer = ReplacementPolicy.fromString(cacheParams.replacer, nWays, nSets) 1615 val replWayReqs = ldu.map(_.io.replace_way) ++ Seq(mainPipe.io.replace_way) ++ stu.map(_.io.replace_way) 1616 1617 if (dwpuParam.enCfPred) { 1618 val victimList = VictimList(nSets) 1619 replWayReqs.foreach { 1620 case req => 1621 req.way := DontCare 1622 when(req.set.valid) { 1623 when(victimList.whether_sa(req.set.bits)) { 1624 req.way := replacer.way(req.set.bits) 1625 }.otherwise { 1626 req.way := req.dmWay 1627 } 1628 } 1629 } 1630 } else { 1631 replWayReqs.foreach { 1632 case req => 1633 req.way := DontCare 1634 when(req.set.valid) { 1635 req.way := replacer.way(req.set.bits) 1636 } 1637 } 1638 } 1639 1640 val replAccessReqs = ldu.map(_.io.replace_access) ++ Seq( 1641 mainPipe.io.replace_access 1642 ) ++ stu.map(_.io.replace_access) 1643 val touchWays = Seq.fill(replAccessReqs.size)(Wire(ValidIO(UInt(log2Up(nWays).W)))) 1644 touchWays.zip(replAccessReqs).foreach { 1645 case (w, req) => 1646 w.valid := req.valid 1647 w.bits := req.bits.way 1648 } 1649 val touchSets = replAccessReqs.map(_.bits.set) 1650 replacer.access(touchSets, touchWays) 1651 1652 //---------------------------------------- 1653 // assertions 1654 // dcache should only deal with DRAM addresses 1655 import freechips.rocketchip.util._ 1656 when (bus.a.fire) { 1657 assert(PmemRanges.map(range => bus.a.bits.address.inRange(range._1.U, range._2.U)).reduce(_ || _)) 1658 } 1659 when (bus.b.fire) { 1660 assert(PmemRanges.map(range => bus.b.bits.address.inRange(range._1.U, range._2.U)).reduce(_ || _)) 1661 } 1662 when (bus.c.fire) { 1663 assert(PmemRanges.map(range => bus.c.bits.address.inRange(range._1.U, range._2.U)).reduce(_ || _)) 1664 } 1665 1666 //---------------------------------------- 1667 // utility functions 1668 def block_decoupled[T <: Data](source: DecoupledIO[T], sink: DecoupledIO[T], block_signal: Bool) = { 1669 sink.valid := source.valid && !block_signal 1670 source.ready := sink.ready && !block_signal 1671 sink.bits := source.bits 1672 } 1673 1674 1675 //---------------------------------------- 1676 // Customized csr cache op support 1677 val cacheOpDecoder = Module(new CSRCacheOpDecoder("dcache", CacheInstrucion.COP_ID_DCACHE)) 1678 cacheOpDecoder.io.csr <> io.csr 1679 bankedDataArray.io.cacheOp.req := cacheOpDecoder.io.cache.req 1680 // dup cacheOp_req_valid 1681 bankedDataArray.io.cacheOp_req_dup.zipWithIndex.map{ case(dup, i) => dup := cacheOpDecoder.io.cache_req_dup(i) } 1682 // dup cacheOp_req_bits_opCode 1683 bankedDataArray.io.cacheOp_req_bits_opCode_dup.zipWithIndex.map{ case (dup, i) => dup := cacheOpDecoder.io.cacheOp_req_bits_opCode_dup(i) } 1684 1685 tagArray.io.cacheOp.req := cacheOpDecoder.io.cache.req 1686 // dup cacheOp_req_valid 1687 tagArray.io.cacheOp_req_dup.zipWithIndex.map{ case(dup, i) => dup := cacheOpDecoder.io.cache_req_dup(i) } 1688 // dup cacheOp_req_bits_opCode 1689 tagArray.io.cacheOp_req_bits_opCode_dup.zipWithIndex.map{ case (dup, i) => dup := cacheOpDecoder.io.cacheOp_req_bits_opCode_dup(i) } 1690 1691 cacheOpDecoder.io.cache.resp.valid := bankedDataArray.io.cacheOp.resp.valid || 1692 tagArray.io.cacheOp.resp.valid 1693 cacheOpDecoder.io.cache.resp.bits := Mux1H(List( 1694 bankedDataArray.io.cacheOp.resp.valid -> bankedDataArray.io.cacheOp.resp.bits, 1695 tagArray.io.cacheOp.resp.valid -> tagArray.io.cacheOp.resp.bits, 1696 )) 1697 cacheOpDecoder.io.error := io.error 1698 assert(!((bankedDataArray.io.cacheOp.resp.valid +& tagArray.io.cacheOp.resp.valid) > 1.U)) 1699 1700 //---------------------------------------- 1701 // performance counters 1702 val num_loads = PopCount(ldu.map(e => e.io.lsu.req.fire)) 1703 XSPerfAccumulate("num_loads", num_loads) 1704 1705 io.mshrFull := missQueue.io.full 1706 1707 // performance counter 1708 // val ld_access = Wire(Vec(LoadPipelineWidth, missQueue.io.debug_early_replace.last.cloneType)) 1709 // val st_access = Wire(ld_access.last.cloneType) 1710 // ld_access.zip(ldu).foreach { 1711 // case (a, u) => 1712 // a.valid := RegNext(u.io.lsu.req.fire) && !u.io.lsu.s1_kill 1713 // a.bits.idx := RegEnable(get_idx(u.io.lsu.req.bits.vaddr), u.io.lsu.req.fire) 1714 // a.bits.tag := get_tag(u.io.lsu.s1_paddr_dup_dcache) 1715 // } 1716 // st_access.valid := RegNext(mainPipe.io.store_req.fire) 1717 // st_access.bits.idx := RegEnable(get_idx(mainPipe.io.store_req.bits.vaddr), mainPipe.io.store_req.fire) 1718 // st_access.bits.tag := RegEnable(get_tag(mainPipe.io.store_req.bits.addr), mainPipe.io.store_req.fire) 1719 // val access_info = ld_access.toSeq ++ Seq(st_access) 1720 // val early_replace = RegNext(missQueue.io.debug_early_replace) // TODO: clock gate 1721 // val access_early_replace = access_info.map { 1722 // case acc => 1723 // Cat(early_replace.map { 1724 // case r => 1725 // acc.valid && r.valid && 1726 // acc.bits.tag === r.bits.tag && 1727 // acc.bits.idx === r.bits.idx 1728 // }) 1729 // } 1730 // XSPerfAccumulate("access_early_replace", PopCount(Cat(access_early_replace))) 1731 1732 val perfEvents = (Seq(wb, mainPipe, missQueue, probeQueue) ++ ldu).flatMap(_.getPerfEvents) 1733 generatePerfEvent() 1734} 1735 1736class AMOHelper() extends ExtModule { 1737 val clock = IO(Input(Clock())) 1738 val enable = IO(Input(Bool())) 1739 val cmd = IO(Input(UInt(5.W))) 1740 val addr = IO(Input(UInt(64.W))) 1741 val wdata = IO(Input(UInt(64.W))) 1742 val mask = IO(Input(UInt(8.W))) 1743 val rdata = IO(Output(UInt(64.W))) 1744} 1745 1746class DCacheWrapper()(implicit p: Parameters) extends LazyModule 1747 with HasXSParameter 1748 with HasDCacheParameters 1749{ 1750 override def shouldBeInlined: Boolean = false 1751 1752 val useDcache = coreParams.dcacheParametersOpt.nonEmpty 1753 val clientNode = if (useDcache) TLIdentityNode() else null 1754 val dcache = if (useDcache) LazyModule(new DCache()) else null 1755 if (useDcache) { 1756 clientNode := dcache.clientNode 1757 } 1758 val uncacheNode = OptionWrapper(cacheCtrlParamsOpt.isDefined, TLIdentityNode()) 1759 require( 1760 (uncacheNode.isDefined && dcache.cacheCtrlOpt.isDefined) || 1761 (!uncacheNode.isDefined && !dcache.cacheCtrlOpt.isDefined), "uncacheNode and ctrlUnitOpt are not connected!") 1762 if (uncacheNode.isDefined && dcache.cacheCtrlOpt.isDefined) { 1763 dcache.cacheCtrlOpt.get.node := uncacheNode.get 1764 } 1765 1766 class DCacheWrapperImp(wrapper: LazyModule) extends LazyModuleImp(wrapper) with HasPerfEvents { 1767 val io = IO(new DCacheIO) 1768 val perfEvents = if (!useDcache) { 1769 // a fake dcache which uses dpi-c to access memory, only for debug usage! 1770 val fake_dcache = Module(new FakeDCache()) 1771 io <> fake_dcache.io 1772 Seq() 1773 } 1774 else { 1775 io <> dcache.module.io 1776 dcache.module.getPerfEvents 1777 } 1778 generatePerfEvent() 1779 } 1780 1781 lazy val module = new DCacheWrapperImp(this) 1782}