1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils.{HasTLDump, XSDebug, XSPerfAccumulate, PerfEventsBundle} 23import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut, TLPermissions} 24import huancun.{DirtyField, DirtyKey} 25 26class WritebackReq(implicit p: Parameters) extends DCacheBundle { 27 val addr = UInt(PAddrBits.W) 28 val param = UInt(TLPermissions.cWidth.W) 29 val voluntary = Bool() 30 val hasData = Bool() 31 val dirty = Bool() 32 val data = UInt((cfg.blockBytes * 8).W) 33 34 val delay_release = Bool() 35 val miss_id = UInt(log2Up(cfg.nMissEntries).W) 36 37 def dump() = { 38 XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n", 39 addr, param, voluntary, hasData, data) 40 } 41} 42 43// While a Release sleeps and waits for a refill to wake it up, 44// main pipe might update meta & data during this time. 45// So the meta & data to be released need to be updated too. 46class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle { 47 // only consider store here 48 val addr = UInt(PAddrBits.W) 49 val mask = UInt(DCacheBanks.W) 50 val data = UInt((cfg.blockBytes * 8).W) 51} 52 53class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump 54{ 55 val io = IO(new Bundle { 56 val id = Input(UInt()) 57 58 val req = Flipped(DecoupledIO(new WritebackReq)) 59 val merge = Output(Bool()) 60 val mem_release = DecoupledIO(new TLBundleC(edge.bundle)) 61 val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle))) 62 63 val block_addr = Output(Valid(UInt())) 64 65 val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W))) 66 val release_update = Flipped(ValidIO(new ReleaseUpdate)) 67 }) 68 69 val s_invalid :: s_sleep :: s_release_req :: s_release_resp :: Nil = Enum(4) 70 val state = RegInit(s_invalid) 71 72 // internal regs 73 // remaining beats 74 val remain = RegInit(0.U(refillCycles.W)) 75 val remain_set = WireInit(0.U(refillCycles.W)) 76 val remain_clr = WireInit(0.U(refillCycles.W)) 77 remain := (remain | remain_set) & ~remain_clr 78 79 val busy = remain.orR 80 81 val req = Reg(new WritebackReq) 82 83 // assign default signals to output signals 84 io.req.ready := false.B 85 io.mem_release.valid := false.B 86 io.mem_release.bits := DontCare 87 io.mem_grant.ready := false.B 88 io.block_addr.valid := state =/= s_invalid 89 io.block_addr.bits := req.addr 90 91 92 when (state =/= s_invalid) { 93 XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits) 94 } 95 96 def mergeData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = { 97 val full_wmask = FillInterleaved(64, wmask) 98 (~full_wmask & old_data | full_wmask & new_data) 99 } 100 101 // -------------------------------------------------------------------------------- 102 // s_invalid: receive requests 103 // new req entering 104 when (io.req.fire()) { 105 assert (remain === 0.U) 106 req := io.req.bits 107 when (io.req.bits.delay_release) { 108 state := s_sleep 109 }.otherwise { 110 state := s_release_req 111 remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 112 } 113 } 114 115 // -------------------------------------------------------------------------------- 116 // s_sleep: wait for refill pipe to inform me that I can keep releasing 117 val merge_probe = WireInit(false.B) 118 io.merge := WireInit(false.B) 119 when (state === s_sleep) { 120 assert (remain === 0.U) 121 122 val update = io.release_update.valid && io.release_update.bits.addr === req.addr 123 when (update) { 124 req.hasData := req.hasData || io.release_update.bits.mask.orR 125 req.dirty := req.dirty || io.release_update.bits.mask.orR 126 req.data := mergeData(req.data, io.release_update.bits.data, io.release_update.bits.mask) 127 } 128 129 io.merge := !io.req.bits.voluntary && io.req.bits.addr === req.addr 130 merge_probe := io.req.valid && io.merge 131 when (merge_probe) { 132 state := s_release_req 133 req.voluntary := false.B 134 req.hasData := req.hasData || io.req.bits.hasData 135 req.dirty := req.dirty || io.req.bits.dirty 136 req.data := Mux( 137 io.req.bits.hasData, 138 io.req.bits.data, 139 req.data 140 ) 141 req.delay_release := false.B 142 remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 143 }.elsewhen (io.release_wakeup.valid && io.release_wakeup.bits === req.miss_id) { 144 state := s_release_req 145 req.delay_release := false.B 146 remain_set := Mux(req.hasData || update && io.release_update.bits.mask.orR, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 147 } 148 } 149 150 // -------------------------------------------------------------------------------- 151 // while there beats remaining to be sent, we keep sending 152 // which beat to send in this cycle? 153 val beat = PriorityEncoder(remain) 154 155 val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W))) 156 for (i <- 0 until refillCycles) { 157 beat_data(i) := req.data((i + 1) * beatBits - 1, i * beatBits) 158 } 159 160 val probeResponse = edge.ProbeAck( 161 fromSource = io.id, 162 toAddress = req.addr, 163 lgSize = log2Ceil(cfg.blockBytes).U, 164 reportPermissions = req.param 165 ) 166 167 val probeResponseData = edge.ProbeAck( 168 fromSource = io.id, 169 toAddress = req.addr, 170 lgSize = log2Ceil(cfg.blockBytes).U, 171 reportPermissions = req.param, 172 data = beat_data(beat) 173 ) 174 175 val voluntaryRelease = edge.Release( 176 fromSource = io.id, 177 toAddress = req.addr, 178 lgSize = log2Ceil(cfg.blockBytes).U, 179 shrinkPermissions = req.param 180 )._2 181 182 val voluntaryReleaseData = edge.Release( 183 fromSource = io.id, 184 toAddress = req.addr, 185 lgSize = log2Ceil(cfg.blockBytes).U, 186 shrinkPermissions = req.param, 187 data = beat_data(beat) 188 )._2 189 190 voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty) 191 when(busy) { 192 assert(!req.dirty || req.hasData) 193 } 194 195 io.mem_release.valid := busy 196 io.mem_release.bits := Mux(req.voluntary, 197 Mux(req.hasData, voluntaryReleaseData, voluntaryRelease), 198 Mux(req.hasData, probeResponseData, probeResponse)) 199 200 when (io.mem_release.fire()) { remain_clr := PriorityEncoderOH(remain) } 201 202 val (_, _, release_done, _) = edge.count(io.mem_release) 203 204 when (state === s_release_req && release_done) { 205 state := Mux(req.voluntary, s_release_resp, s_invalid) 206 } 207 208 // -------------------------------------------------------------------------------- 209 // receive ReleaseAck for Releases 210 when (state === s_release_resp) { 211 io.mem_grant.ready := true.B 212 when (io.mem_grant.fire()) { 213 state := s_invalid 214 } 215 } 216 217 // When does this entry merge a new req? 218 // 1. When this entry is free 219 // 2. When this entry wants to release while still waiting for release_wakeup signal, 220 // and a probe req with the same addr comes. In this case we merge probe with release, 221 // handle this probe, so we don't need another release. 222 io.req.ready := state === s_invalid || 223 state === s_sleep && !io.req.bits.voluntary && io.req.bits.addr === req.addr 224 225 // performance counters 226 XSPerfAccumulate("wb_req", io.req.fire()) 227 XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary) 228 XSPerfAccumulate("wb_probe_resp", state === s_release_req && release_done && !req.voluntary) 229 XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready) 230 XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_release_resp) 231} 232 233class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump 234{ 235 val io = IO(new Bundle { 236 val req = Flipped(DecoupledIO(new WritebackReq)) 237 val mem_release = DecoupledIO(new TLBundleC(edge.bundle)) 238 val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle))) 239 240 val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W))) 241 val release_update = Flipped(ValidIO(new ReleaseUpdate)) 242 243 val miss_req = Flipped(Valid(UInt())) 244 val block_miss_req = Output(Bool()) 245 }) 246 247 // allocate a free entry for incoming request 248 val primary_ready = Wire(Vec(cfg.nReleaseEntries, Bool())) 249 val merge_vec = Wire(Vec(cfg.nReleaseEntries, Bool())) 250 val allocate = primary_ready.asUInt.orR 251 val merge = merge_vec.asUInt.orR 252 val alloc_idx = PriorityEncoder(Mux(merge, merge_vec, primary_ready)) 253 254 val req = io.req 255 val block_conflict = Wire(Bool()) 256 val accept = merge || allocate && !block_conflict 257 req.ready := accept 258 259 // assign default values to output signals 260 io.mem_release.valid := false.B 261 io.mem_release.bits := DontCare 262 io.mem_grant.ready := false.B 263 264 require(isPow2(cfg.nMissEntries)) 265 val grant_source = io.mem_grant.bits.source(log2Up(cfg.nReleaseEntries) - 1, 0) 266 val entries = (0 until cfg.nReleaseEntries) map { i => 267 val entry = Module(new WritebackEntry(edge)) 268 269 entry.io.id := (i + releaseIdBase).U 270 271 // entry req 272 entry.io.req.valid := (i.U === alloc_idx) && req.valid && accept 273 primary_ready(i) := entry.io.req.ready 274 merge_vec(i) := entry.io.merge 275 entry.io.req.bits := req.bits 276 277 entry.io.mem_grant.valid := (i.U === grant_source) && io.mem_grant.valid 278 entry.io.mem_grant.bits := io.mem_grant.bits 279 when (i.U === grant_source) { 280 io.mem_grant.ready := entry.io.mem_grant.ready 281 } 282 283 entry.io.release_wakeup := io.release_wakeup 284 entry.io.release_update := io.release_update 285 286 entry 287 } 288 289 block_conflict := VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.req.bits.addr)).asUInt.orR 290 val miss_req_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR 291 io.block_miss_req := io.miss_req.valid && miss_req_conflict 292 293 TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*) 294 295 // sanity check 296 // print all input/output requests for debug purpose 297 // print req 298 when (io.req.fire()) { 299 io.req.bits.dump() 300 } 301 302 when (io.mem_release.fire()) { 303 io.mem_release.bits.dump 304 } 305 306 when (io.mem_grant.fire()) { 307 io.mem_grant.bits.dump 308 } 309 310 when (io.miss_req.valid) { 311 XSDebug("miss_req: addr: %x\n", io.miss_req.bits) 312 } 313 314 when (io.block_miss_req) { 315 XSDebug("block_miss_req\n") 316 } 317 318 // performance counters 319 XSPerfAccumulate("wb_req", io.req.fire()) 320 321 val perfinfo = IO(new Bundle(){ 322 val perfEvents = Output(new PerfEventsBundle(5)) 323 }) 324 val perfEvents = Seq( 325 ("dcache_wbq_req ", io.req.fire() ), 326 ("dcache_wbq_1/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) < (cfg.nReleaseEntries.U/4.U)) ), 327 ("dcache_wbq_2/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/4.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U/2.U)) ), 328 ("dcache_wbq_3/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/2.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U*3.U/4.U)) ), 329 ("dcache_wbq_4/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U*3.U/4.U)) ), 330 ) 331 332 for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) { 333 perf_out.incr_step := RegNext(perf) 334 } 335} 336