1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils.{HasTLDump, XSDebug, XSPerfAccumulate, PerfEventsBundle, PipelineConnect} 23import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut} 24import freechips.rocketchip.tilelink.TLPermissions._ 25import huancun.{DirtyField, DirtyKey} 26 27class WritebackReq(implicit p: Parameters) extends DCacheBundle { 28 val addr = UInt(PAddrBits.W) 29 val param = UInt(cWidth.W) 30 val voluntary = Bool() 31 val hasData = Bool() 32 val dirty = Bool() 33 val data = UInt((cfg.blockBytes * 8).W) 34 35 val delay_release = Bool() 36 val miss_id = UInt(log2Up(cfg.nMissEntries).W) 37 38 def dump() = { 39 XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n", 40 addr, param, voluntary, hasData, data) 41 } 42} 43 44// While a Release sleeps and waits for a refill to wake it up, 45// main pipe might update meta & data during this time. 46// So the meta & data to be released need to be updated too. 47class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle { 48 // only consider store here 49 val addr = UInt(PAddrBits.W) 50 val mask = UInt(DCacheBanks.W) 51 val data = UInt((cfg.blockBytes * 8).W) 52} 53 54class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump 55{ 56 val io = IO(new Bundle { 57 val id = Input(UInt()) 58 // allocate this entry for new req 59 val primary_valid = Input(Bool()) 60 // this entry is free and can be allocated to new reqs 61 val primary_ready = Output(Bool()) 62 // this entry is busy, but it can merge the new req 63 val secondary_valid = Input(Bool()) 64 val secondary_ready = Output(Bool()) 65 val req = Flipped(DecoupledIO(new WritebackReq)) 66 67 val mem_release = DecoupledIO(new TLBundleC(edge.bundle)) 68 val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle))) 69 70 val block_addr = Output(Valid(UInt())) 71 72 val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W))) 73 val release_update = Flipped(ValidIO(new ReleaseUpdate)) 74 }) 75 76 val s_invalid :: s_sleep :: s_release_req :: s_release_resp :: Nil = Enum(4) 77 // ProbeAck: s_invalid -> s_release_req 78 // ProbeAck merge Release: s_invalid -> s_release_req 79 // Release: s_invalid -> s_sleep -> s_release_req -> s_release_resp 80 // Release merge ProbeAck: s_invalid -> s_sleep -> s_release_req 81 // (change Release into ProbeAck when Release is not fired) 82 // or: s_invalid -> s_sleep -> s_release_req -> s_release_resp -> s_release_req 83 // (send a ProbeAck after Release transaction is over) 84 val state = RegInit(s_invalid) 85 86 // internal regs 87 // remaining beats 88 val remain = RegInit(0.U(refillCycles.W)) 89 val remain_set = WireInit(0.U(refillCycles.W)) 90 val remain_clr = WireInit(0.U(refillCycles.W)) 91 remain := (remain | remain_set) & ~remain_clr 92 93 val busy = remain.orR 94 95 val req = Reg(new WritebackReq) 96 97 // assign default signals to output signals 98 io.req.ready := false.B 99 io.mem_release.valid := false.B 100 io.mem_release.bits := DontCare 101 io.mem_grant.ready := false.B 102 io.block_addr.valid := state =/= s_invalid 103 io.block_addr.bits := req.addr 104 105 106 when (state =/= s_invalid) { 107 XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits) 108 } 109 110 def mergeData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = { 111 val full_wmask = FillInterleaved(64, wmask) 112 (~full_wmask & old_data | full_wmask & new_data) 113 } 114 115 // -------------------------------------------------------------------------------- 116 // s_invalid: receive requests 117 // new req entering 118 when (io.req.valid && io.primary_valid && io.primary_ready) { 119 assert (remain === 0.U) 120 req := io.req.bits 121 when (io.req.bits.delay_release) { 122 state := s_sleep 123 }.otherwise { 124 state := s_release_req 125 remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 126 } 127 } 128 129 // -------------------------------------------------------------------------------- 130 // s_sleep: wait for refill pipe to inform me that I can keep releasing 131 val merge = io.secondary_valid && io.secondary_ready 132 when (state === s_sleep) { 133 assert(remain === 0.U) 134 // There shouldn't be a new Release with the same addr in sleep state 135 assert(!(merge && io.req.bits.voluntary)) 136 137 val update = io.release_update.valid && io.release_update.bits.addr === req.addr 138 when (update) { 139 req.hasData := req.hasData || io.release_update.bits.mask.orR 140 req.dirty := req.dirty || io.release_update.bits.mask.orR 141 req.data := mergeData(req.data, io.release_update.bits.data, io.release_update.bits.mask) 142 }.elsewhen (merge) { 143 state := s_release_req 144 req.voluntary := false.B 145 req.param := req.param 146 req.hasData := req.hasData || io.req.bits.hasData 147 req.dirty := req.dirty || io.req.bits.dirty 148 req.data := Mux( 149 io.req.bits.hasData, 150 io.req.bits.data, 151 req.data 152 ) 153 req.delay_release := false.B 154 remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 155 } 156 157 when (io.release_wakeup.valid && io.release_wakeup.bits === req.miss_id) { 158 state := s_release_req 159 req.delay_release := false.B 160 remain_set := Mux( 161 req.hasData || update && io.release_update.bits.mask.orR || merge && io.req.bits.hasData, 162 ~0.U(refillCycles.W), 163 1.U(refillCycles.W) 164 ) 165 } 166 } 167 168 // -------------------------------------------------------------------------------- 169 // while there beats remaining to be sent, we keep sending 170 // which beat to send in this cycle? 171 val beat = PriorityEncoder(remain) 172 173 val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W))) 174 for (i <- 0 until refillCycles) { 175 beat_data(i) := req.data((i + 1) * beatBits - 1, i * beatBits) 176 } 177 178 val probeResponse = edge.ProbeAck( 179 fromSource = io.id, 180 toAddress = req.addr, 181 lgSize = log2Ceil(cfg.blockBytes).U, 182 reportPermissions = req.param 183 ) 184 185 val probeResponseData = edge.ProbeAck( 186 fromSource = io.id, 187 toAddress = req.addr, 188 lgSize = log2Ceil(cfg.blockBytes).U, 189 reportPermissions = req.param, 190 data = beat_data(beat) 191 ) 192 193 val voluntaryRelease = edge.Release( 194 fromSource = io.id, 195 toAddress = req.addr, 196 lgSize = log2Ceil(cfg.blockBytes).U, 197 shrinkPermissions = req.param 198 )._2 199 200 val voluntaryReleaseData = edge.Release( 201 fromSource = io.id, 202 toAddress = req.addr, 203 lgSize = log2Ceil(cfg.blockBytes).U, 204 shrinkPermissions = req.param, 205 data = beat_data(beat) 206 )._2 207 208 voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty) 209 when(busy) { 210 assert(!req.dirty || req.hasData) 211 } 212 213 io.mem_release.valid := busy 214 io.mem_release.bits := Mux(req.voluntary, 215 Mux(req.hasData, voluntaryReleaseData, voluntaryRelease), 216 Mux(req.hasData, probeResponseData, probeResponse)) 217 218 when (io.mem_release.fire()) { remain_clr := PriorityEncoderOH(remain) } 219 220 val (_, _, release_done, _) = edge.count(io.mem_release) 221 222// when (state === s_release_req && release_done) { 223// state := Mux(req.voluntary, s_release_resp, s_invalid) 224// } 225 226 // Because now wbq merges a same-addr req unconditionally, when the req to be merged comes too late, 227 // the previous req might not be able to merge. Thus we have to handle the new req later after the 228 // previous one finishes. 229 // TODO: initiate these 230 val release_later = RegInit(false.B) 231 val c_already_sent = RegInit(false.B) 232 def tmp_req() = new Bundle { 233 val param = UInt(cWidth.W) 234 val voluntary = Bool() 235 val hasData = Bool() 236 val dirty = Bool() 237 val delay_release = Bool() 238 val miss_id = UInt(log2Up(cfg.nMissEntries).W) 239 240 def toWritebackReq = { 241 val r = Wire(new WritebackReq()) 242 r.data := req.data 243 r.addr := req.addr 244 r.param := param 245 r.voluntary := voluntary 246 r.hasData := hasData 247 r.dirty := dirty 248 r.delay_release := delay_release 249 r.miss_id := miss_id 250 r 251 } 252 } 253 val req_later = Reg(tmp_req()) 254 255 when (state === s_release_req) { 256 when (io.mem_release.fire()) { 257 c_already_sent := !release_done 258 } 259 260 when (req.voluntary) { 261 // The previous req is Release 262 when (release_done) { 263 state := s_release_resp 264 } 265 // merge a ProbeAck 266 when (merge) { 267 when (io.mem_release.fire() || c_already_sent) { 268 // too late to merge, handle the ProbeAck later 269 release_later := true.B 270 req_later.param := io.req.bits.param 271 req_later.voluntary := io.req.bits.voluntary 272 req_later.hasData := io.req.bits.hasData 273 req_later.dirty := io.req.bits.dirty 274 req_later.delay_release := io.req.bits.delay_release 275 req_later.miss_id := io.req.bits.miss_id 276 }.otherwise { 277 // Release hasn't been sent out yet, change Release to ProbeAck 278 req.voluntary := false.B 279 req.hasData := req.hasData || io.req.bits.hasData 280 req.dirty := req.dirty || io.req.bits.dirty 281 req.data := Mux( 282 io.req.bits.hasData, 283 io.req.bits.data, 284 req.data 285 ) 286 req.delay_release := false.B 287 remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 288 } 289 } 290 }.otherwise { 291 // The previous req is ProbeAck 292 when (merge) { 293 release_later := true.B 294 req_later.param := io.req.bits.param 295 req_later.voluntary := io.req.bits.voluntary 296 req_later.hasData := io.req.bits.hasData 297 req_later.dirty := io.req.bits.dirty 298 req_later.delay_release := io.req.bits.delay_release 299 req_later.miss_id := io.req.bits.miss_id 300 } 301 302 when (release_done) { 303 when (merge) { 304 // Send the Release after ProbeAck 305// state := s_release_req 306// req := Mux(merge, io.req.bits, req_later.toWritebackReq) 307// release_later := false.B 308 state := s_sleep 309 req := io.req.bits 310 release_later := false.B 311 }.elsewhen (release_later) { 312 state := Mux( 313 io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release, 314 s_release_req, 315 s_sleep 316 ) 317 req := req_later.toWritebackReq 318 when (io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id) { 319 req.delay_release := false.B 320 } 321 release_later := false.B 322 }.otherwise { 323 state := s_invalid 324 release_later := false.B 325 } 326 } 327 328 when (io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id) { 329 req_later.delay_release := false.B 330 } 331 } 332 } 333 334 // -------------------------------------------------------------------------------- 335 // receive ReleaseAck for Releases 336 when (state === s_release_resp) { 337 io.mem_grant.ready := true.B 338 339 when (merge) { 340 release_later := true.B 341 req_later.param := io.req.bits.param 342 req_later.voluntary := io.req.bits.voluntary 343 req_later.hasData := io.req.bits.hasData 344 req_later.dirty := io.req.bits.dirty 345 req_later.delay_release := io.req.bits.delay_release 346 req_later.miss_id := io.req.bits.miss_id 347 } 348 when (io.mem_grant.fire()) { 349 when (merge) { 350 state := s_release_req 351 req := io.req.bits 352 release_later := false.B 353 }.elsewhen(release_later) { 354 state := s_release_req 355 req := req_later.toWritebackReq 356 release_later := false.B 357 }.otherwise { 358 state := s_invalid 359 release_later := false.B 360 } 361 } 362 } 363 364 // When does this entry merge a new req? 365 // 1. When this entry is free 366 // 2. When this entry wants to release while still waiting for release_wakeup signal, 367 // and a probe req with the same addr comes. In this case we merge probe with release, 368 // handle this probe, so we don't need another release. 369 io.primary_ready := state === s_invalid 370 io.secondary_ready := state =/= s_invalid && io.req.bits.addr === req.addr 371 372 // performance counters 373 XSPerfAccumulate("wb_req", io.req.fire()) 374 XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary) 375 XSPerfAccumulate("wb_probe_resp", state === s_release_req && release_done && !req.voluntary) 376 XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready) 377 XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_release_resp) 378} 379 380class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump { 381 val io = IO(new Bundle { 382 val req = Flipped(DecoupledIO(new WritebackReq)) 383 val mem_release = DecoupledIO(new TLBundleC(edge.bundle)) 384 val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle))) 385 386 val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W))) 387 val release_update = Flipped(ValidIO(new ReleaseUpdate)) 388 389 val miss_req = Flipped(Valid(UInt())) 390 val block_miss_req = Output(Bool()) 391 }) 392 393 require(cfg.nReleaseEntries > cfg.nMissEntries) 394 395 val primary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool())) 396 val secondary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool())) 397 val accept = Cat(primary_ready_vec).orR 398 val merge = Cat(secondary_ready_vec).orR 399 val alloc = accept && !merge 400 // When there are empty entries, merge or allocate a new entry. 401 // When there is no empty entry, reject it even if it can be merged. 402 io.req.ready := accept 403 404 // assign default values to output signals 405 io.mem_release.valid := false.B 406 io.mem_release.bits := DontCare 407 io.mem_grant.ready := false.B 408 409 require(isPow2(cfg.nMissEntries)) 410 val grant_source = io.mem_grant.bits.source 411 val entries = Seq.fill(cfg.nReleaseEntries)(Module(new WritebackEntry(edge))) 412 entries.zipWithIndex.foreach { 413 case (entry, i) => 414 val former_primary_ready = if(i == 0) 415 false.B 416 else 417 Cat((0 until i).map(j => entries(j).io.primary_ready)).orR 418 val entry_id = (i + releaseIdBase).U 419 420 entry.io.id := entry_id 421 422 // entry req 423 entry.io.req.valid := io.req.valid 424 primary_ready_vec(i) := entry.io.primary_ready 425 secondary_ready_vec(i) := entry.io.secondary_ready 426 entry.io.req.bits := io.req.bits 427 428 entry.io.primary_valid := alloc && 429 !former_primary_ready && 430 entry.io.primary_ready 431 entry.io.secondary_valid := io.req.valid && accept 432 433 entry.io.mem_grant.valid := (entry_id === grant_source) && io.mem_grant.valid 434 entry.io.mem_grant.bits := io.mem_grant.bits 435// when (entry_id === grant_source) { 436// io.mem_grant.ready := entry.io.mem_grant.ready 437// } 438 439 entry.io.release_wakeup := io.release_wakeup 440 entry.io.release_update := io.release_update 441 } 442 assert(RegNext(!(io.mem_grant.valid && !io.mem_grant.ready))) 443 io.mem_grant.ready := true.B 444 445 val miss_req_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR 446 io.block_miss_req := io.miss_req.valid && miss_req_conflict 447 448 TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*) 449 450 // sanity check 451 // print all input/output requests for debug purpose 452 // print req 453 when (io.req.fire()) { 454 io.req.bits.dump() 455 } 456 457 when (io.mem_release.fire()) { 458 io.mem_release.bits.dump 459 } 460 461 when (io.mem_grant.fire()) { 462 io.mem_grant.bits.dump 463 } 464 465 when (io.miss_req.valid) { 466 XSDebug("miss_req: addr: %x\n", io.miss_req.bits) 467 } 468 469 when (io.block_miss_req) { 470 XSDebug("block_miss_req\n") 471 } 472 473 // performance counters 474 XSPerfAccumulate("wb_req", io.req.fire()) 475 476 val perfinfo = IO(new Bundle(){ 477 val perfEvents = Output(new PerfEventsBundle(5)) 478 }) 479 val perfEvents = Seq( 480 ("dcache_wbq_req ", io.req.fire() ), 481 ("dcache_wbq_1/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) < (cfg.nReleaseEntries.U/4.U)) ), 482 ("dcache_wbq_2/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/4.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U/2.U)) ), 483 ("dcache_wbq_3/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/2.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U*3.U/4.U)) ), 484 ("dcache_wbq_4/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U*3.U/4.U)) ), 485 ) 486 487 for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) { 488 perf_out.incr_step := RegNext(perf) 489 } 490} 491