xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/WritebackQueue.scala (revision 5668a921eb594c3ea72da43594b3fb54e05959a3)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils.{HasTLDump, XSDebug, XSPerfAccumulate, PerfEventsBundle, PipelineConnect}
23import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut, TLPermissions}
24import huancun.{DirtyField, DirtyKey}
25
26class WritebackReq(implicit p: Parameters) extends DCacheBundle {
27  val addr = UInt(PAddrBits.W)
28  val param  = UInt(TLPermissions.cWidth.W)
29  val voluntary = Bool()
30  val hasData = Bool()
31  val dirty = Bool()
32  val data = UInt((cfg.blockBytes * 8).W)
33
34  val delay_release = Bool()
35  val miss_id = UInt(log2Up(cfg.nMissEntries).W)
36
37  def dump() = {
38    XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n",
39      addr, param, voluntary, hasData, data)
40  }
41}
42
43// While a Release sleeps and waits for a refill to wake it up,
44// main pipe might update meta & data during this time.
45// So the meta & data to be released need to be updated too.
46class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle {
47  // only consider store here
48  val addr = UInt(PAddrBits.W)
49  val mask = UInt(DCacheBanks.W)
50  val data = UInt((cfg.blockBytes * 8).W)
51}
52
53class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump
54{
55  val io = IO(new Bundle {
56    val id = Input(UInt())
57
58    val req = Flipped(DecoupledIO(new WritebackReq))
59    val merge = Output(Bool())
60    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
61    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
62
63    val block_addr  = Output(Valid(UInt()))
64
65    val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W)))
66    val release_update = Flipped(ValidIO(new ReleaseUpdate))
67  })
68
69  val s_invalid :: s_sleep :: s_release_req :: s_release_resp :: Nil = Enum(4)
70  val state = RegInit(s_invalid)
71
72  // internal regs
73  // remaining beats
74  val remain = RegInit(0.U(refillCycles.W))
75  val remain_set = WireInit(0.U(refillCycles.W))
76  val remain_clr = WireInit(0.U(refillCycles.W))
77  remain := (remain | remain_set) & ~remain_clr
78
79  val busy = remain.orR
80
81  val req  = Reg(new WritebackReq)
82
83  // assign default signals to output signals
84  io.req.ready := false.B
85  io.mem_release.valid := false.B
86  io.mem_release.bits  := DontCare
87  io.mem_grant.ready   := false.B
88  io.block_addr.valid  := state =/= s_invalid
89  io.block_addr.bits   := req.addr
90
91
92  when (state =/= s_invalid) {
93    XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits)
94  }
95
96  def mergeData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = {
97    val full_wmask = FillInterleaved(64, wmask)
98    (~full_wmask & old_data | full_wmask & new_data)
99  }
100
101  // --------------------------------------------------------------------------------
102  // s_invalid: receive requests
103  // new req entering
104  when (io.req.fire()) {
105    assert (remain === 0.U)
106    req := io.req.bits
107    when (io.req.bits.delay_release) {
108      state := s_sleep
109    }.otherwise {
110      state := s_release_req
111      remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
112    }
113  }
114
115  // --------------------------------------------------------------------------------
116  // s_sleep: wait for refill pipe to inform me that I can keep releasing
117  val merge_probe = WireInit(false.B)
118  io.merge := WireInit(false.B)
119  when (state === s_sleep) {
120    assert (remain === 0.U)
121
122    val update = io.release_update.valid && io.release_update.bits.addr === req.addr
123    when (update) {
124      req.hasData := req.hasData || io.release_update.bits.mask.orR
125      req.dirty := req.dirty || io.release_update.bits.mask.orR
126      req.data := mergeData(req.data, io.release_update.bits.data, io.release_update.bits.mask)
127    }
128
129    io.merge := !io.req.bits.voluntary && io.req.bits.addr === req.addr
130    merge_probe := io.req.valid && io.merge
131    when (merge_probe) {
132      state := s_release_req
133      req.voluntary := false.B
134      req.param := req.param
135      req.hasData := req.hasData || io.req.bits.hasData
136      req.dirty := req.dirty || io.req.bits.dirty
137      req.data := Mux(
138        io.req.bits.hasData,
139        io.req.bits.data,
140        req.data
141      )
142      req.delay_release := false.B
143      remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
144    }.elsewhen (io.release_wakeup.valid && io.release_wakeup.bits === req.miss_id) {
145      state := s_release_req
146      req.delay_release := false.B
147      remain_set := Mux(req.hasData || update && io.release_update.bits.mask.orR, ~0.U(refillCycles.W), 1.U(refillCycles.W))
148    }
149  }
150
151  // --------------------------------------------------------------------------------
152  // while there beats remaining to be sent, we keep sending
153  // which beat to send in this cycle?
154  val beat = PriorityEncoder(remain)
155
156  val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W)))
157  for (i <- 0 until refillCycles) {
158    beat_data(i) := req.data((i + 1) * beatBits - 1, i * beatBits)
159  }
160
161  val probeResponse = edge.ProbeAck(
162    fromSource = io.id,
163    toAddress = req.addr,
164    lgSize = log2Ceil(cfg.blockBytes).U,
165    reportPermissions = req.param
166  )
167
168  val probeResponseData = edge.ProbeAck(
169    fromSource = io.id,
170    toAddress = req.addr,
171    lgSize = log2Ceil(cfg.blockBytes).U,
172    reportPermissions = req.param,
173    data = beat_data(beat)
174  )
175
176  val voluntaryRelease = edge.Release(
177    fromSource = io.id,
178    toAddress = req.addr,
179    lgSize = log2Ceil(cfg.blockBytes).U,
180    shrinkPermissions = req.param
181  )._2
182
183  val voluntaryReleaseData = edge.Release(
184    fromSource = io.id,
185    toAddress = req.addr,
186    lgSize = log2Ceil(cfg.blockBytes).U,
187    shrinkPermissions = req.param,
188    data = beat_data(beat)
189  )._2
190
191  voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty)
192  when(busy) {
193    assert(!req.dirty || req.hasData)
194  }
195
196  io.mem_release.valid := busy
197  io.mem_release.bits  := Mux(req.voluntary,
198    Mux(req.hasData, voluntaryReleaseData, voluntaryRelease),
199    Mux(req.hasData, probeResponseData, probeResponse))
200
201  when (io.mem_release.fire()) { remain_clr := PriorityEncoderOH(remain) }
202
203  val (_, _, release_done, _) = edge.count(io.mem_release)
204
205  when (state === s_release_req && release_done) {
206    state := Mux(req.voluntary, s_release_resp, s_invalid)
207  }
208
209  // --------------------------------------------------------------------------------
210  // receive ReleaseAck for Releases
211  when (state === s_release_resp) {
212    io.mem_grant.ready := true.B
213    when (io.mem_grant.fire()) {
214      state := s_invalid
215    }
216  }
217
218  // When does this entry merge a new req?
219  // 1. When this entry is free
220  // 2. When this entry wants to release while still waiting for release_wakeup signal,
221  //    and a probe req with the same addr comes. In this case we merge probe with release,
222  //    handle this probe, so we don't need another release.
223  io.req.ready := state === s_invalid ||
224    state === s_sleep && !io.req.bits.voluntary && io.req.bits.addr === req.addr
225
226  // performance counters
227  XSPerfAccumulate("wb_req", io.req.fire())
228  XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary)
229  XSPerfAccumulate("wb_probe_resp", state === s_release_req && release_done && !req.voluntary)
230  XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready)
231  XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_release_resp)
232}
233
234class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump
235{
236  val io = IO(new Bundle {
237    val req = Flipped(DecoupledIO(new WritebackReq))
238    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
239    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
240
241    val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W)))
242    val release_update = Flipped(ValidIO(new ReleaseUpdate))
243
244    val miss_req  = Flipped(Valid(UInt()))
245    val block_miss_req  = Output(Bool())
246  })
247
248  require(cfg.nReleaseEntries > cfg.nMissEntries)
249
250
251  // allocate a free entry for incoming request
252  val primary_ready  = Wire(Vec(cfg.nReleaseEntries, Bool()))
253  val merge_vec = Wire(Vec(cfg.nReleaseEntries, Bool()))
254  val allocate = primary_ready.asUInt.orR
255  val merge = merge_vec.asUInt.orR
256  val alloc_idx = PriorityEncoder(Mux(merge, merge_vec, primary_ready))
257
258  // delay writeback req
259  val DelayWritebackReq = true
260  val req_delayed = Wire(Flipped(DecoupledIO(new WritebackReq)))
261  val req_delayed_valid = RegInit(false.B)
262  val req_delayed_bits = Reg(io.req.bits.cloneType)
263  req_delayed.valid := req_delayed_valid
264  req_delayed.bits := req_delayed_bits
265  when(req_delayed.fire()){
266    req_delayed_valid := false.B
267  }
268  // We delayed writeback queue enq for 1 cycle, missQ req does not
269  // depend on wbQ enqueue. As a result, missQ req may be blocked in
270  // req_delayed. When grant comes, that req should also be updated.
271  when(
272    req_delayed_valid &&
273    io.release_wakeup.valid &&
274    io.release_wakeup.bits === req_delayed_bits.miss_id
275  ){
276    // TODO: it is dirty
277    req_delayed_bits.delay_release := false.B // update pipe reg
278    req_delayed.bits.delay_release := false.B // update entry write req in current cycle
279  }
280  when(io.req.fire()){
281    req_delayed_valid := true.B
282    req_delayed_bits := io.req.bits
283  }
284  io.req.ready := !req_delayed_valid || req_delayed.fire()
285  dontTouch(req_delayed)
286
287  val req = req_delayed
288  val block_conflict = Wire(Bool())
289  val accept = merge || allocate && !block_conflict
290  req.ready := accept
291
292  // assign default values to output signals
293  io.mem_release.valid := false.B
294  io.mem_release.bits  := DontCare
295  io.mem_grant.ready   := false.B
296
297  require(isPow2(cfg.nMissEntries))
298  val grant_source = io.mem_grant.bits.source
299  val entries = (0 until cfg.nReleaseEntries) map { i =>
300    val entry = Module(new WritebackEntry(edge))
301    val entry_id = (i + releaseIdBase).U
302
303    entry.io.id := entry_id
304
305    // entry req
306    entry.io.req.valid := (i.U === alloc_idx) && req.valid && accept
307    primary_ready(i)   := entry.io.req.ready
308    merge_vec(i) := entry.io.merge
309    entry.io.req.bits  := req.bits
310
311    entry.io.mem_grant.valid := (entry_id === grant_source) && io.mem_grant.valid
312    entry.io.mem_grant.bits  := io.mem_grant.bits
313    when (entry_id === grant_source) {
314      io.mem_grant.ready := entry.io.mem_grant.ready
315    }
316
317    entry.io.release_wakeup := io.release_wakeup
318    entry.io.release_update := io.release_update
319
320    entry
321  }
322
323  block_conflict := VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === req.bits.addr)).asUInt.orR
324  val miss_req_conflict = if(DelayWritebackReq)
325    req.bits.addr === io.miss_req.bits && req.valid ||
326    VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR
327  else
328    VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR
329  io.block_miss_req := io.miss_req.valid && miss_req_conflict
330
331  TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*)
332
333  // sanity check
334  // print all input/output requests for debug purpose
335  // print req
336  when (req.fire()) {
337    req.bits.dump()
338  }
339
340  when (io.mem_release.fire()) {
341    io.mem_release.bits.dump
342  }
343
344  when (io.mem_grant.fire()) {
345    io.mem_grant.bits.dump
346  }
347
348  when (io.miss_req.valid) {
349    XSDebug("miss_req: addr: %x\n", io.miss_req.bits)
350  }
351
352  when (io.block_miss_req) {
353    XSDebug("block_miss_req\n")
354  }
355
356  // performance counters
357  XSPerfAccumulate("wb_req", req.fire())
358
359  val perfinfo = IO(new Bundle(){
360    val perfEvents = Output(new PerfEventsBundle(5))
361  })
362  val perfEvents = Seq(
363    ("dcache_wbq_req          ", req.fire()                                                                                                                                                              ),
364    ("dcache_wbq_1/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) < (cfg.nReleaseEntries.U/4.U))                                                                                          ),
365    ("dcache_wbq_2/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/4.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U/2.U))     ),
366    ("dcache_wbq_3/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/2.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U*3.U/4.U)) ),
367    ("dcache_wbq_4/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U*3.U/4.U))                                                                                      ),
368  )
369
370  for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
371    perf_out.incr_step := RegNext(perf)
372  }
373}
374