1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils.{HasTLDump, XSDebug, XSPerfAccumulate, PerfEventsBundle, PipelineConnect} 23import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut, TLPermissions} 24import huancun.{DirtyField, DirtyKey} 25 26class WritebackReq(implicit p: Parameters) extends DCacheBundle { 27 val addr = UInt(PAddrBits.W) 28 val param = UInt(TLPermissions.cWidth.W) 29 val voluntary = Bool() 30 val hasData = Bool() 31 val dirty = Bool() 32 val data = UInt((cfg.blockBytes * 8).W) 33 34 val delay_release = Bool() 35 val miss_id = UInt(log2Up(cfg.nMissEntries).W) 36 37 def dump() = { 38 XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n", 39 addr, param, voluntary, hasData, data) 40 } 41} 42 43// While a Release sleeps and waits for a refill to wake it up, 44// main pipe might update meta & data during this time. 45// So the meta & data to be released need to be updated too. 46class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle { 47 // only consider store here 48 val addr = UInt(PAddrBits.W) 49 val mask = UInt(DCacheBanks.W) 50 val data = UInt((cfg.blockBytes * 8).W) 51} 52 53class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump 54{ 55 val io = IO(new Bundle { 56 val id = Input(UInt()) 57 58 val req = Flipped(DecoupledIO(new WritebackReq)) 59 val merge = Output(Bool()) 60 val mem_release = DecoupledIO(new TLBundleC(edge.bundle)) 61 val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle))) 62 63 val block_addr = Output(Valid(UInt())) 64 65 val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W))) 66 val release_update = Flipped(ValidIO(new ReleaseUpdate)) 67 }) 68 69 val s_invalid :: s_sleep :: s_release_req :: s_release_resp :: Nil = Enum(4) 70 val state = RegInit(s_invalid) 71 72 // internal regs 73 // remaining beats 74 val remain = RegInit(0.U(refillCycles.W)) 75 val remain_set = WireInit(0.U(refillCycles.W)) 76 val remain_clr = WireInit(0.U(refillCycles.W)) 77 remain := (remain | remain_set) & ~remain_clr 78 79 val busy = remain.orR 80 81 val req = Reg(new WritebackReq) 82 83 // assign default signals to output signals 84 io.req.ready := false.B 85 io.mem_release.valid := false.B 86 io.mem_release.bits := DontCare 87 io.mem_grant.ready := false.B 88 io.block_addr.valid := state =/= s_invalid 89 io.block_addr.bits := req.addr 90 91 92 when (state =/= s_invalid) { 93 XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits) 94 } 95 96 def mergeData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = { 97 val full_wmask = FillInterleaved(64, wmask) 98 (~full_wmask & old_data | full_wmask & new_data) 99 } 100 101 // -------------------------------------------------------------------------------- 102 // s_invalid: receive requests 103 // new req entering 104 when (io.req.fire()) { 105 assert (remain === 0.U) 106 req := io.req.bits 107 when (io.req.bits.delay_release) { 108 state := s_sleep 109 }.otherwise { 110 state := s_release_req 111 remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 112 } 113 } 114 115 // -------------------------------------------------------------------------------- 116 // s_sleep: wait for refill pipe to inform me that I can keep releasing 117 val merge_probe = WireInit(false.B) 118 io.merge := WireInit(false.B) 119 when (state === s_sleep) { 120 assert (remain === 0.U) 121 122 val update = io.release_update.valid && io.release_update.bits.addr === req.addr 123 when (update) { 124 req.hasData := req.hasData || io.release_update.bits.mask.orR 125 req.dirty := req.dirty || io.release_update.bits.mask.orR 126 req.data := mergeData(req.data, io.release_update.bits.data, io.release_update.bits.mask) 127 } 128 129 io.merge := !io.req.bits.voluntary && io.req.bits.addr === req.addr 130 merge_probe := io.req.valid && io.merge 131 when (merge_probe) { 132 state := s_release_req 133 req.voluntary := false.B 134 req.hasData := req.hasData || io.req.bits.hasData 135 req.dirty := req.dirty || io.req.bits.dirty 136 req.data := Mux( 137 io.req.bits.hasData, 138 io.req.bits.data, 139 req.data 140 ) 141 req.delay_release := false.B 142 remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 143 }.elsewhen (io.release_wakeup.valid && io.release_wakeup.bits === req.miss_id) { 144 state := s_release_req 145 req.delay_release := false.B 146 remain_set := Mux(req.hasData || update && io.release_update.bits.mask.orR, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 147 } 148 } 149 150 // -------------------------------------------------------------------------------- 151 // while there beats remaining to be sent, we keep sending 152 // which beat to send in this cycle? 153 val beat = PriorityEncoder(remain) 154 155 val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W))) 156 for (i <- 0 until refillCycles) { 157 beat_data(i) := req.data((i + 1) * beatBits - 1, i * beatBits) 158 } 159 160 val probeResponse = edge.ProbeAck( 161 fromSource = io.id, 162 toAddress = req.addr, 163 lgSize = log2Ceil(cfg.blockBytes).U, 164 reportPermissions = req.param 165 ) 166 167 val probeResponseData = edge.ProbeAck( 168 fromSource = io.id, 169 toAddress = req.addr, 170 lgSize = log2Ceil(cfg.blockBytes).U, 171 reportPermissions = req.param, 172 data = beat_data(beat) 173 ) 174 175 val voluntaryRelease = edge.Release( 176 fromSource = io.id, 177 toAddress = req.addr, 178 lgSize = log2Ceil(cfg.blockBytes).U, 179 shrinkPermissions = req.param 180 )._2 181 182 val voluntaryReleaseData = edge.Release( 183 fromSource = io.id, 184 toAddress = req.addr, 185 lgSize = log2Ceil(cfg.blockBytes).U, 186 shrinkPermissions = req.param, 187 data = beat_data(beat) 188 )._2 189 190 voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty) 191 when(busy) { 192 assert(!req.dirty || req.hasData) 193 } 194 195 io.mem_release.valid := busy 196 io.mem_release.bits := Mux(req.voluntary, 197 Mux(req.hasData, voluntaryReleaseData, voluntaryRelease), 198 Mux(req.hasData, probeResponseData, probeResponse)) 199 200 when (io.mem_release.fire()) { remain_clr := PriorityEncoderOH(remain) } 201 202 val (_, _, release_done, _) = edge.count(io.mem_release) 203 204 when (state === s_release_req && release_done) { 205 state := Mux(req.voluntary, s_release_resp, s_invalid) 206 } 207 208 // -------------------------------------------------------------------------------- 209 // receive ReleaseAck for Releases 210 when (state === s_release_resp) { 211 io.mem_grant.ready := true.B 212 when (io.mem_grant.fire()) { 213 state := s_invalid 214 } 215 } 216 217 // When does this entry merge a new req? 218 // 1. When this entry is free 219 // 2. When this entry wants to release while still waiting for release_wakeup signal, 220 // and a probe req with the same addr comes. In this case we merge probe with release, 221 // handle this probe, so we don't need another release. 222 io.req.ready := state === s_invalid || 223 state === s_sleep && !io.req.bits.voluntary && io.req.bits.addr === req.addr 224 225 // performance counters 226 XSPerfAccumulate("wb_req", io.req.fire()) 227 XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary) 228 XSPerfAccumulate("wb_probe_resp", state === s_release_req && release_done && !req.voluntary) 229 XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready) 230 XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_release_resp) 231} 232 233class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump 234{ 235 val io = IO(new Bundle { 236 val req = Flipped(DecoupledIO(new WritebackReq)) 237 val mem_release = DecoupledIO(new TLBundleC(edge.bundle)) 238 val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle))) 239 240 val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W))) 241 val release_update = Flipped(ValidIO(new ReleaseUpdate)) 242 243 val miss_req = Flipped(Valid(UInt())) 244 val block_miss_req = Output(Bool()) 245 }) 246 247 require(cfg.nReleaseEntries > cfg.nMissEntries) 248 249 250 // allocate a free entry for incoming request 251 val primary_ready = Wire(Vec(cfg.nReleaseEntries, Bool())) 252 val merge_vec = Wire(Vec(cfg.nReleaseEntries, Bool())) 253 val allocate = primary_ready.asUInt.orR 254 val merge = merge_vec.asUInt.orR 255 val alloc_idx = PriorityEncoder(Mux(merge, merge_vec, primary_ready)) 256 257 // delay writeback req 258 val DelayWritebackReq = true 259 val req_delayed = Wire(Flipped(DecoupledIO(new WritebackReq))) 260 val req_delayed_valid = RegInit(false.B) 261 val req_delayed_bits = Reg(io.req.bits.cloneType) 262 req_delayed.valid := req_delayed_valid 263 req_delayed.bits := req_delayed_bits 264 when(req_delayed.fire()){ 265 req_delayed_valid := false.B 266 } 267 // We delayed writeback queue enq for 1 cycle, missQ req does not 268 // depend on wbQ enqueue. As a result, missQ req may be blocked in 269 // req_delayed. When grant comes, that req should also be updated. 270 when( 271 req_delayed_valid && 272 io.release_wakeup.valid && 273 io.release_wakeup.bits === req_delayed_bits.miss_id 274 ){ 275 // TODO: it is dirty 276 req_delayed_bits.delay_release := false.B // update pipe reg 277 req_delayed.bits.delay_release := false.B // update entry write req in current cycle 278 } 279 when(io.req.fire()){ 280 req_delayed_valid := true.B 281 req_delayed_bits := io.req.bits 282 } 283 io.req.ready := !req_delayed_valid || req_delayed.fire() 284 dontTouch(req_delayed) 285 286 val req = req_delayed 287 val block_conflict = Wire(Bool()) 288 val accept = merge || allocate && !block_conflict 289 req.ready := accept 290 291 // assign default values to output signals 292 io.mem_release.valid := false.B 293 io.mem_release.bits := DontCare 294 io.mem_grant.ready := false.B 295 296 require(isPow2(cfg.nMissEntries)) 297 val grant_source = io.mem_grant.bits.source 298 val entries = (0 until cfg.nReleaseEntries) map { i => 299 val entry = Module(new WritebackEntry(edge)) 300 val entry_id = (i + releaseIdBase).U 301 302 entry.io.id := entry_id 303 304 // entry req 305 entry.io.req.valid := (i.U === alloc_idx) && req.valid && accept 306 primary_ready(i) := entry.io.req.ready 307 merge_vec(i) := entry.io.merge 308 entry.io.req.bits := req.bits 309 310 entry.io.mem_grant.valid := (entry_id === grant_source) && io.mem_grant.valid 311 entry.io.mem_grant.bits := io.mem_grant.bits 312 when (entry_id === grant_source) { 313 io.mem_grant.ready := entry.io.mem_grant.ready 314 } 315 316 entry.io.release_wakeup := io.release_wakeup 317 entry.io.release_update := io.release_update 318 319 entry 320 } 321 322 block_conflict := VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === req.bits.addr)).asUInt.orR 323 val miss_req_conflict = if(DelayWritebackReq) 324 req.bits.addr === io.miss_req.bits && req.valid || 325 VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR 326 else 327 VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR 328 io.block_miss_req := io.miss_req.valid && miss_req_conflict 329 330 TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*) 331 332 // sanity check 333 // print all input/output requests for debug purpose 334 // print req 335 when (req.fire()) { 336 req.bits.dump() 337 } 338 339 when (io.mem_release.fire()) { 340 io.mem_release.bits.dump 341 } 342 343 when (io.mem_grant.fire()) { 344 io.mem_grant.bits.dump 345 } 346 347 when (io.miss_req.valid) { 348 XSDebug("miss_req: addr: %x\n", io.miss_req.bits) 349 } 350 351 when (io.block_miss_req) { 352 XSDebug("block_miss_req\n") 353 } 354 355 // performance counters 356 XSPerfAccumulate("wb_req", req.fire()) 357 358 val perfinfo = IO(new Bundle(){ 359 val perfEvents = Output(new PerfEventsBundle(5)) 360 }) 361 val perfEvents = Seq( 362 ("dcache_wbq_req ", req.fire() ), 363 ("dcache_wbq_1/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) < (cfg.nReleaseEntries.U/4.U)) ), 364 ("dcache_wbq_2/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/4.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U/2.U)) ), 365 ("dcache_wbq_3/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/2.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U*3.U/4.U)) ), 366 ("dcache_wbq_4/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U*3.U/4.U)) ), 367 ) 368 369 for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) { 370 perf_out.incr_step := RegNext(perf) 371 } 372} 373