xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/WritebackQueue.scala (revision 066ac8a465b27b54ba22458ff1a67bcd28215d73)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils.{HasTLDump, XSDebug, XSPerfAccumulate, PerfEventsBundle, PipelineConnect}
23import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut, TLPermissions}
24import huancun.{DirtyField, DirtyKey}
25
26class WritebackReq(implicit p: Parameters) extends DCacheBundle {
27  val addr = UInt(PAddrBits.W)
28  val param  = UInt(TLPermissions.cWidth.W)
29  val voluntary = Bool()
30  val hasData = Bool()
31  val dirty = Bool()
32  val data = UInt((cfg.blockBytes * 8).W)
33
34  val delay_release = Bool()
35  val miss_id = UInt(log2Up(cfg.nMissEntries).W)
36
37  def dump() = {
38    XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n",
39      addr, param, voluntary, hasData, data)
40  }
41}
42
43// While a Release sleeps and waits for a refill to wake it up,
44// main pipe might update meta & data during this time.
45// So the meta & data to be released need to be updated too.
46class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle {
47  // only consider store here
48  val addr = UInt(PAddrBits.W)
49  val mask = UInt(DCacheBanks.W)
50  val data = UInt((cfg.blockBytes * 8).W)
51}
52
53class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump
54{
55  val io = IO(new Bundle {
56    val id = Input(UInt())
57
58    val req = Flipped(DecoupledIO(new WritebackReq))
59    val merge = Output(Bool())
60    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
61    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
62
63    val block_addr  = Output(Valid(UInt()))
64
65    val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W)))
66    val release_update = Flipped(ValidIO(new ReleaseUpdate))
67  })
68
69  val s_invalid :: s_sleep :: s_release_req :: s_release_resp :: Nil = Enum(4)
70  val state = RegInit(s_invalid)
71
72  // internal regs
73  // remaining beats
74  val remain = RegInit(0.U(refillCycles.W))
75  val remain_set = WireInit(0.U(refillCycles.W))
76  val remain_clr = WireInit(0.U(refillCycles.W))
77  remain := (remain | remain_set) & ~remain_clr
78
79  val busy = remain.orR
80
81  val req  = Reg(new WritebackReq)
82
83  // assign default signals to output signals
84  io.req.ready := false.B
85  io.mem_release.valid := false.B
86  io.mem_release.bits  := DontCare
87  io.mem_grant.ready   := false.B
88  io.block_addr.valid  := state =/= s_invalid
89  io.block_addr.bits   := req.addr
90
91
92  when (state =/= s_invalid) {
93    XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits)
94  }
95
96  def mergeData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = {
97    val full_wmask = FillInterleaved(64, wmask)
98    (~full_wmask & old_data | full_wmask & new_data)
99  }
100
101  // --------------------------------------------------------------------------------
102  // s_invalid: receive requests
103  // new req entering
104  when (io.req.fire()) {
105    assert (remain === 0.U)
106    req := io.req.bits
107    when (io.req.bits.delay_release) {
108      state := s_sleep
109    }.otherwise {
110      state := s_release_req
111      remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
112    }
113  }
114
115  // --------------------------------------------------------------------------------
116  // s_sleep: wait for refill pipe to inform me that I can keep releasing
117  val merge_probe = WireInit(false.B)
118  io.merge := WireInit(false.B)
119  when (state === s_sleep) {
120    assert (remain === 0.U)
121
122    val update = io.release_update.valid && io.release_update.bits.addr === req.addr
123    when (update) {
124      req.hasData := req.hasData || io.release_update.bits.mask.orR
125      req.dirty := req.dirty || io.release_update.bits.mask.orR
126      req.data := mergeData(req.data, io.release_update.bits.data, io.release_update.bits.mask)
127    }
128
129    io.merge := !io.req.bits.voluntary && io.req.bits.addr === req.addr
130    merge_probe := io.req.valid && io.merge
131    when (merge_probe) {
132      state := s_release_req
133      req.voluntary := false.B
134      req.hasData := req.hasData || io.req.bits.hasData
135      req.dirty := req.dirty || io.req.bits.dirty
136      req.data := Mux(
137        io.req.bits.hasData,
138        io.req.bits.data,
139        req.data
140      )
141      req.delay_release := false.B
142      remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
143    }.elsewhen (io.release_wakeup.valid && io.release_wakeup.bits === req.miss_id) {
144      state := s_release_req
145      req.delay_release := false.B
146      remain_set := Mux(req.hasData || update && io.release_update.bits.mask.orR, ~0.U(refillCycles.W), 1.U(refillCycles.W))
147    }
148  }
149
150  // --------------------------------------------------------------------------------
151  // while there beats remaining to be sent, we keep sending
152  // which beat to send in this cycle?
153  val beat = PriorityEncoder(remain)
154
155  val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W)))
156  for (i <- 0 until refillCycles) {
157    beat_data(i) := req.data((i + 1) * beatBits - 1, i * beatBits)
158  }
159
160  val probeResponse = edge.ProbeAck(
161    fromSource = io.id,
162    toAddress = req.addr,
163    lgSize = log2Ceil(cfg.blockBytes).U,
164    reportPermissions = req.param
165  )
166
167  val probeResponseData = edge.ProbeAck(
168    fromSource = io.id,
169    toAddress = req.addr,
170    lgSize = log2Ceil(cfg.blockBytes).U,
171    reportPermissions = req.param,
172    data = beat_data(beat)
173  )
174
175  val voluntaryRelease = edge.Release(
176    fromSource = io.id,
177    toAddress = req.addr,
178    lgSize = log2Ceil(cfg.blockBytes).U,
179    shrinkPermissions = req.param
180  )._2
181
182  val voluntaryReleaseData = edge.Release(
183    fromSource = io.id,
184    toAddress = req.addr,
185    lgSize = log2Ceil(cfg.blockBytes).U,
186    shrinkPermissions = req.param,
187    data = beat_data(beat)
188  )._2
189
190  voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty)
191  when(busy) {
192    assert(!req.dirty || req.hasData)
193  }
194
195  io.mem_release.valid := busy
196  io.mem_release.bits  := Mux(req.voluntary,
197    Mux(req.hasData, voluntaryReleaseData, voluntaryRelease),
198    Mux(req.hasData, probeResponseData, probeResponse))
199
200  when (io.mem_release.fire()) { remain_clr := PriorityEncoderOH(remain) }
201
202  val (_, _, release_done, _) = edge.count(io.mem_release)
203
204  when (state === s_release_req && release_done) {
205    state := Mux(req.voluntary, s_release_resp, s_invalid)
206  }
207
208  // --------------------------------------------------------------------------------
209  // receive ReleaseAck for Releases
210  when (state === s_release_resp) {
211    io.mem_grant.ready := true.B
212    when (io.mem_grant.fire()) {
213      state := s_invalid
214    }
215  }
216
217  // When does this entry merge a new req?
218  // 1. When this entry is free
219  // 2. When this entry wants to release while still waiting for release_wakeup signal,
220  //    and a probe req with the same addr comes. In this case we merge probe with release,
221  //    handle this probe, so we don't need another release.
222  io.req.ready := state === s_invalid ||
223    state === s_sleep && !io.req.bits.voluntary && io.req.bits.addr === req.addr
224
225  // performance counters
226  XSPerfAccumulate("wb_req", io.req.fire())
227  XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary)
228  XSPerfAccumulate("wb_probe_resp", state === s_release_req && release_done && !req.voluntary)
229  XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready)
230  XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_release_resp)
231}
232
233class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump
234{
235  val io = IO(new Bundle {
236    val req = Flipped(DecoupledIO(new WritebackReq))
237    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
238    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
239
240    val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W)))
241    val release_update = Flipped(ValidIO(new ReleaseUpdate))
242
243    val miss_req  = Flipped(Valid(UInt()))
244    val block_miss_req  = Output(Bool())
245  })
246
247  require(cfg.nReleaseEntries > cfg.nMissEntries)
248
249
250  // allocate a free entry for incoming request
251  val primary_ready  = Wire(Vec(cfg.nReleaseEntries, Bool()))
252  val merge_vec = Wire(Vec(cfg.nReleaseEntries, Bool()))
253  val allocate = primary_ready.asUInt.orR
254  val merge = merge_vec.asUInt.orR
255  val alloc_idx = PriorityEncoder(Mux(merge, merge_vec, primary_ready))
256
257  // delay writeback req
258  val DelayWritebackReq = true
259  val req_delayed = Wire(Flipped(DecoupledIO(new WritebackReq)))
260  val req_delayed_valid = RegInit(false.B)
261  val req_delayed_bits = Reg(io.req.bits.cloneType)
262  req_delayed.valid := req_delayed_valid
263  req_delayed.bits := req_delayed_bits
264  when(req_delayed.fire()){
265    req_delayed_valid := false.B
266  }
267  // We delayed writeback queue enq for 1 cycle, missQ req does not
268  // depend on wbQ enqueue. As a result, missQ req may be blocked in
269  // req_delayed. When grant comes, that req should also be updated.
270  when(
271    req_delayed_valid &&
272    io.release_wakeup.valid &&
273    io.release_wakeup.bits === req_delayed_bits.miss_id
274  ){
275    // TODO: it is dirty
276    req_delayed_bits.delay_release := false.B // update pipe reg
277    req_delayed.bits.delay_release := false.B // update entry write req in current cycle
278  }
279  when(io.req.fire()){
280    req_delayed_valid := true.B
281    req_delayed_bits := io.req.bits
282  }
283  io.req.ready := !req_delayed_valid || req_delayed.fire()
284  dontTouch(req_delayed)
285
286  val req = req_delayed
287  val block_conflict = Wire(Bool())
288  val accept = merge || allocate && !block_conflict
289  req.ready := accept
290
291  // assign default values to output signals
292  io.mem_release.valid := false.B
293  io.mem_release.bits  := DontCare
294  io.mem_grant.ready   := false.B
295
296  require(isPow2(cfg.nMissEntries))
297  val grant_source = io.mem_grant.bits.source
298  val entries = (0 until cfg.nReleaseEntries) map { i =>
299    val entry = Module(new WritebackEntry(edge))
300    val entry_id = (i + releaseIdBase).U
301
302    entry.io.id := entry_id
303
304    // entry req
305    entry.io.req.valid := (i.U === alloc_idx) && req.valid && accept
306    primary_ready(i)   := entry.io.req.ready
307    merge_vec(i) := entry.io.merge
308    entry.io.req.bits  := req.bits
309
310    entry.io.mem_grant.valid := (entry_id === grant_source) && io.mem_grant.valid
311    entry.io.mem_grant.bits  := io.mem_grant.bits
312    when (entry_id === grant_source) {
313      io.mem_grant.ready := entry.io.mem_grant.ready
314    }
315
316    entry.io.release_wakeup := io.release_wakeup
317    entry.io.release_update := io.release_update
318
319    entry
320  }
321
322  block_conflict := VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === req.bits.addr)).asUInt.orR
323  val miss_req_conflict = if(DelayWritebackReq)
324    req.bits.addr === io.miss_req.bits && req.valid ||
325    VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR
326  else
327    VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR
328  io.block_miss_req := io.miss_req.valid && miss_req_conflict
329
330  TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*)
331
332  // sanity check
333  // print all input/output requests for debug purpose
334  // print req
335  when (req.fire()) {
336    req.bits.dump()
337  }
338
339  when (io.mem_release.fire()) {
340    io.mem_release.bits.dump
341  }
342
343  when (io.mem_grant.fire()) {
344    io.mem_grant.bits.dump
345  }
346
347  when (io.miss_req.valid) {
348    XSDebug("miss_req: addr: %x\n", io.miss_req.bits)
349  }
350
351  when (io.block_miss_req) {
352    XSDebug("block_miss_req\n")
353  }
354
355  // performance counters
356  XSPerfAccumulate("wb_req", req.fire())
357
358  val perfinfo = IO(new Bundle(){
359    val perfEvents = Output(new PerfEventsBundle(5))
360  })
361  val perfEvents = Seq(
362    ("dcache_wbq_req          ", req.fire()                                                                                                                                                              ),
363    ("dcache_wbq_1/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) < (cfg.nReleaseEntries.U/4.U))                                                                                          ),
364    ("dcache_wbq_2/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/4.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U/2.U))     ),
365    ("dcache_wbq_3/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/2.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U*3.U/4.U)) ),
366    ("dcache_wbq_4/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U*3.U/4.U))                                                                                      ),
367  )
368
369  for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
370    perf_out.incr_step := RegNext(perf)
371  }
372}
373