1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import chisel3._ 20import chisel3.util._ 21import freechips.rocketchip.tilelink.TLPermissions._ 22import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut} 23import org.chipsalliance.cde.config.Parameters 24import utils.HasTLDump 25import utility.{XSDebug, XSPerfAccumulate, HasPerfEvents} 26 27 28class WritebackReqCtrl(implicit p: Parameters) extends DCacheBundle { 29 val param = UInt(cWidth.W) 30 val voluntary = Bool() 31 val hasData = Bool() 32 val corrupt = Bool() 33 val dirty = Bool() 34 35 val delay_release = Bool() 36 val miss_id = UInt(log2Up(cfg.nMissEntries).W) 37} 38 39class WritebackReqWodata(implicit p: Parameters) extends WritebackReqCtrl { 40 val addr = UInt(PAddrBits.W) 41 42 def dump() = { 43 XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b\n", 44 addr, param, voluntary, hasData) 45 } 46} 47 48class WritebackReqData(implicit p: Parameters) extends DCacheBundle { 49 val data = UInt((cfg.blockBytes * 8).W) 50} 51 52class WritebackReq(implicit p: Parameters) extends WritebackReqWodata { 53 val data = UInt((cfg.blockBytes * 8).W) 54 55 override def dump() = { 56 XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n", 57 addr, param, voluntary, hasData, data) 58 } 59 60 def toWritebackReqWodata(): WritebackReqWodata = { 61 val out = Wire(new WritebackReqWodata) 62 out.addr := addr 63 out.param := param 64 out.voluntary := voluntary 65 out.hasData := hasData 66 out.corrupt := corrupt 67 out.dirty := dirty 68 out.delay_release := delay_release 69 out.miss_id := miss_id 70 out 71 } 72 73 def toWritebackReqCtrl(): WritebackReqCtrl = { 74 val out = Wire(new WritebackReqCtrl) 75 out.param := param 76 out.voluntary := voluntary 77 out.hasData := hasData 78 out.corrupt := corrupt 79 out.dirty := dirty 80 out.delay_release := delay_release 81 out.miss_id := miss_id 82 out 83 } 84 85 def toWritebackReqData(): WritebackReqData = { 86 val out = Wire(new WritebackReqData) 87 out.data := data 88 out 89 } 90} 91 92// While a Release sleeps and waits for a refill to wake it up, 93// main pipe might update meta & data during this time. 94// So the meta & data to be released need to be updated too. 95class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle { 96 // only consider store here 97 val addr = UInt(PAddrBits.W) 98 val mask = UInt(DCacheBanks.W) 99 val data = UInt((cfg.blockBytes * 8).W) 100} 101 102// To reduce fanout, writeback queue entry data is updated 1 cycle 103// after ReleaseUpdate.fire 104class WBQEntryReleaseUpdate(implicit p: Parameters) extends DCacheBundle { 105 // only consider store here 106 val addr = UInt(PAddrBits.W) 107 val mask_delayed = UInt(DCacheBanks.W) 108 val data_delayed = UInt((cfg.blockBytes * 8).W) 109 val mask_orr = Bool() 110} 111 112// When a probe TtoB req enter dcache main pipe, check if that cacheline 113// is waiting for release. If it is so, change TtoB to TtoN, set dcache 114// coh to N. 115class ProbeToBCheckReq(implicit p: Parameters) extends DCacheBundle { 116 val addr = UInt(PAddrBits.W) // paddr from mainpipe s1 117} 118 119class ProbeToBCheckResp(implicit p: Parameters) extends DCacheBundle { 120 val toN = Bool() // need to set dcache coh to N 121} 122 123class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump 124{ 125 val io = IO(new Bundle { 126 val id = Input(UInt()) 127 128 val req = Flipped(DecoupledIO(new WritebackReqWodata)) 129 val req_data = Input(new WritebackReqData) 130 131 val mem_release = DecoupledIO(new TLBundleC(edge.bundle)) 132 val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle))) 133 val primary_valid = Input(Bool()) 134 val primary_ready = Output(Bool()) 135 val primary_ready_dup = Vec(nDupWbReady, Output(Bool())) 136 137 val block_addr = Output(Valid(UInt())) 138 }) 139 140 val s_invalid :: s_release_req :: s_release_resp ::Nil = Enum(3) 141 // ProbeAck: s_invalid -> s_release_req 142 // ProbeAck merge Release: s_invalid -> s_release_req 143 // Release: s_invalid -> s_sleep -> s_release_req -> s_release_resp 144 // Release merge ProbeAck: s_invalid -> s_sleep -> s_release_req 145 // (change Release into ProbeAck when Release is not fired) 146 // or: s_invalid -> s_sleep -> s_release_req -> s_release_resp -> s_release_req 147 // (send a ProbeAck after Release transaction is over) 148 149 val state = RegInit(s_invalid) 150 val state_dup_0 = RegInit(s_invalid) 151 val state_dup_1 = RegInit(s_invalid) 152 val state_dup_for_mp = RegInit(VecInit(Seq.fill(nDupWbReady)(s_invalid))) //TODO: clock gate 153 154 val remain = RegInit(0.U(refillCycles.W)) 155 val remain_dup_0 = RegInit(0.U(refillCycles.W)) 156 val remain_dup_1 = RegInit(0.U(refillCycles.W)) 157 val remain_set = WireInit(0.U(refillCycles.W)) 158 val remain_clr = WireInit(0.U(refillCycles.W)) 159 remain := (remain | remain_set) & ~remain_clr 160 remain_dup_0 := (remain_dup_0 | remain_set) & ~remain_clr 161 remain_dup_1 := (remain_dup_1 | remain_set) & ~remain_clr 162 163 // writeback queue data 164 val data = Reg(UInt((cfg.blockBytes * 8).W)) 165 166 // writeback queue paddr 167 val paddr_dup_0 = Reg(UInt(PAddrBits.W)) 168 val paddr_dup_1 = Reg(UInt(PAddrBits.W)) 169 val paddr_dup_2 = Reg(UInt(PAddrBits.W)) 170 171 // pending data write 172 // !s_data_override means there is an in-progress data write 173 val s_data_override = RegInit(true.B) 174 // !s_data_merge means there is an in-progress data merge 175 //val s_data_merge = RegInit(true.B) 176 177 // there are valid request that can be sent to release bus 178 //val busy = remain.orR && s_data_override && s_data_merge // have remain beats and data write finished 179 val busy = remain.orR && s_data_override // have remain beats and data write finished 180 val req = Reg(new WritebackReqWodata) 181 182 // assign default signals to output signals 183 io.req.ready := false.B 184 io.mem_release.valid := false.B 185 io.mem_release.bits := DontCare 186 io.mem_grant.ready := false.B 187 io.block_addr.valid := state =/= s_invalid 188 io.block_addr.bits := req.addr 189 190 s_data_override := true.B // data_override takes only 1 cycle 191 //s_data_merge := true.B // data_merge takes only 1 cycle 192 193 when (state =/= s_invalid) { 194 XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits) 195 } 196 197 198 // -------------------------------------------------------------------------------- 199 // s_invalid: receive requests 200 // new req entering 201 io.req.ready := state === s_invalid 202 val alloc = io.req.valid && io.primary_valid && io.primary_ready 203 when (alloc) { 204 assert (remain === 0.U) 205 req := io.req.bits 206 s_data_override := false.B 207 // only update paddr when allocate a new missqueue entry 208 paddr_dup_0 := io.req.bits.addr 209 paddr_dup_1 := io.req.bits.addr 210 paddr_dup_2 := io.req.bits.addr 211 212 remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 213 state := s_release_req 214 state_dup_0 := s_release_req 215 state_dup_1 := s_release_req 216 state_dup_for_mp.foreach(_ := s_release_req) 217 } 218 219 // -------------------------------------------------------------------------------- 220 // while there beats remaining to be sent, we keep sending 221 // which beat to send in this cycle? 222 val beat = PriorityEncoder(remain_dup_0) 223 224 val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W))) 225 for (i <- 0 until refillCycles) { 226 beat_data(i) := data((i + 1) * beatBits - 1, i * beatBits) 227 } 228 229 val probeResponse = edge.ProbeAck( 230 fromSource = io.id, 231 toAddress = paddr_dup_1, 232 lgSize = log2Ceil(cfg.blockBytes).U, 233 reportPermissions = req.param 234 ) 235 probeResponse.corrupt := req.corrupt 236 237 val probeResponseData = edge.ProbeAck( 238 fromSource = io.id, 239 toAddress = paddr_dup_1, 240 lgSize = log2Ceil(cfg.blockBytes).U, 241 reportPermissions = req.param, 242 data = beat_data(beat), 243 corrupt = req.corrupt 244 ) 245 246 val voluntaryRelease = edge.Release( 247 fromSource = io.id, 248 toAddress = paddr_dup_2, 249 lgSize = log2Ceil(cfg.blockBytes).U, 250 shrinkPermissions = req.param 251 )._2 252 voluntaryRelease.corrupt := req.corrupt 253 254 val voluntaryReleaseData = edge.Release( 255 fromSource = io.id, 256 toAddress = paddr_dup_2, 257 lgSize = log2Ceil(cfg.blockBytes).U, 258 shrinkPermissions = req.param, 259 data = beat_data(beat), 260 corrupt = req.corrupt 261 )._2 262 263 // voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty) 264 when(busy) { 265 assert(!req.dirty || req.hasData) 266 } 267 268 io.mem_release.valid := busy 269 io.mem_release.bits := Mux(req.voluntary, 270 Mux(req.hasData, voluntaryReleaseData, voluntaryRelease), 271 Mux(req.hasData, probeResponseData, probeResponse)) 272 273 274 when (io.mem_release.fire) {remain_clr := PriorityEncoderOH(remain_dup_1)} 275 276 val (_, _, release_done, _) = edge.count(io.mem_release) 277 278 when(state === s_release_req && release_done){ 279 state := Mux(req.voluntary, s_release_resp, s_invalid) 280 when(req.voluntary){ 281 state_dup_for_mp.foreach(_ := s_release_resp) 282 } .otherwise{ 283 state_dup_for_mp.foreach(_ := s_invalid) 284 } 285 } 286 287 io.primary_ready := state === s_invalid 288 io.primary_ready_dup.zip(state_dup_for_mp).foreach { case (rdy, st) => rdy := st === s_invalid } 289 // -------------------------------------------------------------------------------- 290 // receive ReleaseAck for Releases 291 when (state === s_release_resp) { 292 io.mem_grant.ready := true.B 293 when (io.mem_grant.fire) { 294 state := s_invalid 295 state_dup_for_mp.foreach(_ := s_invalid) 296 } 297 } 298 299 // data update logic 300 when(!s_data_override && (req.hasData || RegNext(alloc))) { 301 data := io.req_data.data 302 } 303 304 // assert(!RegNext(!s_data_merge && !s_data_override)) 305 306 // performance counters 307 XSPerfAccumulate("wb_req", io.req.fire) 308 XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary) 309 XSPerfAccumulate("wb_probe_resp", state === s_release_req && release_done && !req.voluntary) 310 XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready) 311 XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_release_resp) 312} 313 314class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents 315{ 316 val io = IO(new Bundle { 317 val req = Flipped(DecoupledIO(new WritebackReq)) 318 val req_ready_dup = Vec(nDupWbReady, Output(Bool())) 319 val mem_release = DecoupledIO(new TLBundleC(edge.bundle)) 320 val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle))) 321 322 //val probe_ttob_check_req = Flipped(ValidIO(new ProbeToBCheckReq)) 323 //val probe_ttob_check_resp = ValidIO(new ProbeToBCheckResp) 324 325 // 5 miss_req to check: 3*LoadPipe + 1*MainPipe + 1*missReqArb_out 326 val miss_req_conflict_check = Vec(LoadPipelineWidth + 2, Flipped(Valid(UInt()))) 327 val block_miss_req = Vec(LoadPipelineWidth + 2, Output(Bool())) 328 }) 329 330 require(cfg.nReleaseEntries > cfg.nMissEntries) 331 332 val primary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool())) 333 val alloc = Cat(primary_ready_vec).orR 334 335 val req = io.req 336 val block_conflict = Wire(Bool()) 337 338 req.ready := alloc && !block_conflict 339 340 // assign default values to output signals 341 io.mem_release.valid := false.B 342 io.mem_release.bits := DontCare 343 io.mem_grant.ready := false.B 344 345 // delay data write in writeback req for 1 cycle 346 val req_data = RegEnable(io.req.bits.toWritebackReqData(), io.req.valid) 347 348 require(isPow2(cfg.nMissEntries)) 349 val grant_source = io.mem_grant.bits.source 350 val entries = Seq.fill(cfg.nReleaseEntries)(Module(new WritebackEntry(edge))) 351 entries.zipWithIndex.foreach { 352 case (entry, i) => 353 val former_primary_ready = if(i == 0) 354 false.B 355 else 356 Cat((0 until i).map(j => entries(j).io.primary_ready)).orR 357 val entry_id = (i + releaseIdBase).U 358 359 entry.io.id := entry_id 360 361 // entry req 362 entry.io.req.valid := req.valid && !block_conflict 363 primary_ready_vec(i) := entry.io.primary_ready 364 entry.io.req.bits := req.bits 365 entry.io.req_data := req_data 366 367 entry.io.primary_valid := alloc && 368 !former_primary_ready && 369 entry.io.primary_ready 370 371 entry.io.mem_grant.valid := (entry_id === grant_source) && io.mem_grant.valid 372 entry.io.mem_grant.bits := io.mem_grant.bits 373 //when (i.U === io.mem_grant.bits.source) { 374 // io.mem_grant.ready := entry.io.mem_grant.ready 375 //} 376 } 377 378 io.req_ready_dup.zipWithIndex.foreach { case (rdy, i) => 379 rdy := Cat(entries.map(_.io.primary_ready_dup(i))).orR && !block_conflict 380 } 381 382 io.mem_grant.ready := true.B 383 block_conflict := VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.req.bits.addr)).asUInt.orR 384 val miss_req_conflict = io.miss_req_conflict_check.map{ r => 385 VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === r.bits)).asUInt.orR 386 } 387 io.block_miss_req.zipWithIndex.foreach{ case(blk, i) => 388 blk := io.miss_req_conflict_check(i).valid && miss_req_conflict(i) 389 } 390 391 TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*) 392 393 // sanity check 394 // print all input/output requests for debug purpose 395 // print req 396 when(io.req.fire) { 397 io.req.bits.dump() 398 } 399 400 when(io.mem_release.fire){ 401 io.mem_grant.bits.dump 402 } 403 404 // when (io.miss_req.valid) { 405 // XSDebug("miss_req: addr: %x\n", io.miss_req.bits) 406 // } 407 408 // when (io.block_miss_req) { 409 // XSDebug("block_miss_req\n") 410 // } 411 412 // performance counters 413 XSPerfAccumulate("wb_req", io.req.fire) 414 415 val perfValidCount = RegNext(PopCount(entries.map(e => e.io.block_addr.valid))) 416 val perfEvents = Seq( 417 ("dcache_wbq_req ", io.req.fire), 418 ("dcache_wbq_1_4_valid", (perfValidCount < (cfg.nReleaseEntries.U/4.U))), 419 ("dcache_wbq_2_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/4.U)) & (perfValidCount <= (cfg.nReleaseEntries.U/2.U))), 420 ("dcache_wbq_3_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/2.U)) & (perfValidCount <= (cfg.nReleaseEntries.U*3.U/4.U))), 421 ("dcache_wbq_4_4_valid", (perfValidCount > (cfg.nReleaseEntries.U*3.U/4.U))), 422 ) 423 generatePerfEvent() 424 425}