1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils.{HasTLDump, XSDebug, XSPerfAccumulate, PerfEventsBundle, PipelineConnect} 23import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut, TLPermissions} 24import huancun.{DirtyField, DirtyKey} 25 26class WritebackReq(implicit p: Parameters) extends DCacheBundle { 27 val addr = UInt(PAddrBits.W) 28 val param = UInt(TLPermissions.cWidth.W) 29 val voluntary = Bool() 30 val hasData = Bool() 31 val dirty = Bool() 32 val data = UInt((cfg.blockBytes * 8).W) 33 34 val delay_release = Bool() 35 val miss_id = UInt(log2Up(cfg.nMissEntries).W) 36 37 def dump() = { 38 XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n", 39 addr, param, voluntary, hasData, data) 40 } 41} 42 43// While a Release sleeps and waits for a refill to wake it up, 44// main pipe might update meta & data during this time. 45// So the meta & data to be released need to be updated too. 46class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle { 47 // only consider store here 48 val addr = UInt(PAddrBits.W) 49 val mask = UInt(DCacheBanks.W) 50 val data = UInt((cfg.blockBytes * 8).W) 51} 52 53class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump 54{ 55 val io = IO(new Bundle { 56 val id = Input(UInt()) 57 // allocate this entry for new req 58 val primary_valid = Input(Bool()) 59 // this entry is free and can be allocated to new reqs 60 val primary_ready = Output(Bool()) 61 // this entry is busy, but it can merge the new req 62 val secondary_ready = Output(Bool()) 63 val req = Flipped(DecoupledIO(new WritebackReq)) 64 65 val mem_release = DecoupledIO(new TLBundleC(edge.bundle)) 66 val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle))) 67 68 val block_addr = Output(Valid(UInt())) 69 70 val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W))) 71 val release_update = Flipped(ValidIO(new ReleaseUpdate)) 72 }) 73 74 val s_invalid :: s_sleep :: s_release_req :: s_release_resp :: Nil = Enum(4) 75 val state = RegInit(s_invalid) 76 77 // internal regs 78 // remaining beats 79 val remain = RegInit(0.U(refillCycles.W)) 80 val remain_set = WireInit(0.U(refillCycles.W)) 81 val remain_clr = WireInit(0.U(refillCycles.W)) 82 remain := (remain | remain_set) & ~remain_clr 83 84 val busy = remain.orR 85 86 val req = Reg(new WritebackReq) 87 88 // assign default signals to output signals 89 io.req.ready := false.B 90 io.mem_release.valid := false.B 91 io.mem_release.bits := DontCare 92 io.mem_grant.ready := false.B 93 io.block_addr.valid := state =/= s_invalid 94 io.block_addr.bits := req.addr 95 96 97 when (state =/= s_invalid) { 98 XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits) 99 } 100 101 def mergeData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = { 102 val full_wmask = FillInterleaved(64, wmask) 103 (~full_wmask & old_data | full_wmask & new_data) 104 } 105 106 // -------------------------------------------------------------------------------- 107 // s_invalid: receive requests 108 // new req entering 109 when (io.req.valid && io.primary_valid && io.primary_ready) { 110 assert (remain === 0.U) 111 req := io.req.bits 112 when (io.req.bits.delay_release) { 113 state := s_sleep 114 }.otherwise { 115 state := s_release_req 116 remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 117 } 118 } 119 120 // -------------------------------------------------------------------------------- 121 // s_sleep: wait for refill pipe to inform me that I can keep releasing 122 when (state === s_sleep) { 123 assert (remain === 0.U) 124 125 val update = io.release_update.valid && io.release_update.bits.addr === req.addr 126 when (update) { 127 req.hasData := req.hasData || io.release_update.bits.mask.orR 128 req.dirty := req.dirty || io.release_update.bits.mask.orR 129 req.data := mergeData(req.data, io.release_update.bits.data, io.release_update.bits.mask) 130 } 131 132 when (io.req.valid && io.secondary_ready) { 133 state := s_release_req 134 req.voluntary := false.B 135 req.param := req.param 136 req.hasData := req.hasData || io.req.bits.hasData 137 req.dirty := req.dirty || io.req.bits.dirty 138 req.data := Mux( 139 io.req.bits.hasData, 140 io.req.bits.data, 141 req.data 142 ) 143 req.delay_release := false.B 144 remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 145 }.elsewhen (io.release_wakeup.valid && io.release_wakeup.bits === req.miss_id) { 146 state := s_release_req 147 req.delay_release := false.B 148 remain_set := Mux(req.hasData || update && io.release_update.bits.mask.orR, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 149 } 150 } 151 152 // -------------------------------------------------------------------------------- 153 // while there beats remaining to be sent, we keep sending 154 // which beat to send in this cycle? 155 val beat = PriorityEncoder(remain) 156 157 val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W))) 158 for (i <- 0 until refillCycles) { 159 beat_data(i) := req.data((i + 1) * beatBits - 1, i * beatBits) 160 } 161 162 val probeResponse = edge.ProbeAck( 163 fromSource = io.id, 164 toAddress = req.addr, 165 lgSize = log2Ceil(cfg.blockBytes).U, 166 reportPermissions = req.param 167 ) 168 169 val probeResponseData = edge.ProbeAck( 170 fromSource = io.id, 171 toAddress = req.addr, 172 lgSize = log2Ceil(cfg.blockBytes).U, 173 reportPermissions = req.param, 174 data = beat_data(beat) 175 ) 176 177 val voluntaryRelease = edge.Release( 178 fromSource = io.id, 179 toAddress = req.addr, 180 lgSize = log2Ceil(cfg.blockBytes).U, 181 shrinkPermissions = req.param 182 )._2 183 184 val voluntaryReleaseData = edge.Release( 185 fromSource = io.id, 186 toAddress = req.addr, 187 lgSize = log2Ceil(cfg.blockBytes).U, 188 shrinkPermissions = req.param, 189 data = beat_data(beat) 190 )._2 191 192 voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty) 193 when(busy) { 194 assert(!req.dirty || req.hasData) 195 } 196 197 io.mem_release.valid := busy 198 io.mem_release.bits := Mux(req.voluntary, 199 Mux(req.hasData, voluntaryReleaseData, voluntaryRelease), 200 Mux(req.hasData, probeResponseData, probeResponse)) 201 202 when (io.mem_release.fire()) { remain_clr := PriorityEncoderOH(remain) } 203 204 val (_, _, release_done, _) = edge.count(io.mem_release) 205 206 when (state === s_release_req && release_done) { 207 state := Mux(req.voluntary, s_release_resp, s_invalid) 208 } 209 210 // -------------------------------------------------------------------------------- 211 // receive ReleaseAck for Releases 212 when (state === s_release_resp) { 213 io.mem_grant.ready := true.B 214 when (io.mem_grant.fire()) { 215 state := s_invalid 216 } 217 } 218 219 // When does this entry merge a new req? 220 // 1. When this entry is free 221 // 2. When this entry wants to release while still waiting for release_wakeup signal, 222 // and a probe req with the same addr comes. In this case we merge probe with release, 223 // handle this probe, so we don't need another release. 224 io.primary_ready := state === s_invalid 225 io.secondary_ready := state === s_sleep && !io.req.bits.voluntary && io.req.bits.addr === req.addr 226 227 // performance counters 228 XSPerfAccumulate("wb_req", io.req.fire()) 229 XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary) 230 XSPerfAccumulate("wb_probe_resp", state === s_release_req && release_done && !req.voluntary) 231 XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready) 232 XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_release_resp) 233} 234 235class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump 236{ 237 val io = IO(new Bundle { 238 val req = Flipped(DecoupledIO(new WritebackReq)) 239 val mem_release = DecoupledIO(new TLBundleC(edge.bundle)) 240 val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle))) 241 242 val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W))) 243 val release_update = Flipped(ValidIO(new ReleaseUpdate)) 244 245 val miss_req = Flipped(Valid(UInt())) 246 val block_miss_req = Output(Bool()) 247 }) 248 249 require(cfg.nReleaseEntries > cfg.nMissEntries) 250 251 // delay writeback req 252 val DelayWritebackReq = true 253 val req_delayed = Wire(Flipped(DecoupledIO(new WritebackReq))) 254 val req_delayed_valid = RegInit(false.B) 255 val req_delayed_bits = Reg(io.req.bits.cloneType) 256 req_delayed.valid := req_delayed_valid 257 req_delayed.bits := req_delayed_bits 258 when(req_delayed.fire()){ 259 req_delayed_valid := false.B 260 } 261 // We delayed writeback queue enq for 1 cycle, missQ req does not 262 // depend on wbQ enqueue. As a result, missQ req may be blocked in 263 // req_delayed. When grant comes, that req should also be updated. 264 when( 265 req_delayed_valid && 266 io.release_wakeup.valid && 267 io.release_wakeup.bits === req_delayed_bits.miss_id 268 ){ 269 // TODO: it is dirty 270 req_delayed_bits.delay_release := false.B // update pipe reg 271 req_delayed.bits.delay_release := false.B // update entry write req in current cycle 272 } 273 when(io.req.fire()){ 274 req_delayed_valid := true.B 275 req_delayed_bits := io.req.bits 276 } 277 io.req.ready := !req_delayed_valid || req_delayed.fire() 278 dontTouch(req_delayed) 279 280 // allocate a free entry for incoming request 281 val block_conflict = Wire(Bool()) 282 val primary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool())) 283 val secondary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool())) 284 val merge = Cat(secondary_ready_vec).orR 285 val alloc = !merge && Cat(primary_ready_vec).orR && !block_conflict 286 // Now we block release until last release of that block is finished 287 // TODO: Is it possible to merge these release req? 288 289 val req = req_delayed 290 val accept = merge || alloc 291 req.ready := accept 292 293 // assign default values to output signals 294 io.mem_release.valid := false.B 295 io.mem_release.bits := DontCare 296 io.mem_grant.ready := false.B 297 298 require(isPow2(cfg.nMissEntries)) 299 val grant_source = io.mem_grant.bits.source 300 val entries = Seq.fill(cfg.nReleaseEntries)(Module(new WritebackEntry(edge))) 301 entries.zipWithIndex.foreach { 302 case (entry, i) => 303 val former_primary_ready = if(i == 0) 304 false.B 305 else 306 Cat((0 until i).map(j => entries(j).io.primary_ready)).orR 307 val entry_id = (i + releaseIdBase).U 308 309 entry.io.id := entry_id 310 311 // entry req 312 entry.io.req.valid := req.valid 313 primary_ready_vec(i) := entry.io.primary_ready 314 secondary_ready_vec(i) := entry.io.secondary_ready 315 entry.io.req.bits := req.bits 316 317 entry.io.primary_valid := alloc && 318 !former_primary_ready && 319 entry.io.primary_ready 320 321 entry.io.mem_grant.valid := (entry_id === grant_source) && io.mem_grant.valid 322 entry.io.mem_grant.bits := io.mem_grant.bits 323 when (entry_id === grant_source) { 324 io.mem_grant.ready := entry.io.mem_grant.ready 325 } 326 327 entry.io.release_wakeup := io.release_wakeup 328 entry.io.release_update := io.release_update 329 } 330 331 block_conflict := VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === req.bits.addr)).asUInt.orR 332 val miss_req_conflict = if(DelayWritebackReq) 333 req.bits.addr === io.miss_req.bits && req.valid || 334 VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR 335 else 336 VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR 337 io.block_miss_req := io.miss_req.valid && miss_req_conflict 338 339 TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*) 340 341 // sanity check 342 // print all input/output requests for debug purpose 343 // print req 344 when (req.fire()) { 345 req.bits.dump() 346 } 347 348 when (io.mem_release.fire()) { 349 io.mem_release.bits.dump 350 } 351 352 when (io.mem_grant.fire()) { 353 io.mem_grant.bits.dump 354 } 355 356 when (io.miss_req.valid) { 357 XSDebug("miss_req: addr: %x\n", io.miss_req.bits) 358 } 359 360 when (io.block_miss_req) { 361 XSDebug("block_miss_req\n") 362 } 363 364 // performance counters 365 XSPerfAccumulate("wb_req", req.fire()) 366 367 val perfinfo = IO(new Bundle(){ 368 val perfEvents = Output(new PerfEventsBundle(5)) 369 }) 370 val perfEvents = Seq( 371 ("dcache_wbq_req ", req.fire() ), 372 ("dcache_wbq_1/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) < (cfg.nReleaseEntries.U/4.U)) ), 373 ("dcache_wbq_2/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/4.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U/2.U)) ), 374 ("dcache_wbq_3/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/2.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U*3.U/4.U)) ), 375 ("dcache_wbq_4/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U*3.U/4.U)) ), 376 ) 377 378 for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) { 379 perf_out.incr_step := RegNext(perf) 380 } 381} 382