xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/WritebackQueue.scala (revision 7f37d55fc418d3b4583220e634da7e459802c6d8)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chisel3._
20import chisel3.util._
21import freechips.rocketchip.tilelink.TLPermissions._
22import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut}
23import org.chipsalliance.cde.config.Parameters
24import utils.{HasPerfEvents, HasTLDump, XSDebug, XSPerfAccumulate}
25
26class WritebackReqCtrl(implicit p: Parameters) extends DCacheBundle {
27  val param  = UInt(cWidth.W)
28  val voluntary = Bool()
29  val hasData = Bool()
30  val dirty = Bool()
31
32  val delay_release = Bool()
33  val miss_id = UInt(log2Up(cfg.nMissEntries).W)
34}
35
36class WritebackReqWodata(implicit p: Parameters) extends WritebackReqCtrl {
37  val addr = UInt(PAddrBits.W)
38
39  def dump() = {
40    XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b\n",
41      addr, param, voluntary, hasData)
42  }
43}
44
45class WritebackReqData(implicit p: Parameters) extends DCacheBundle {
46  val data = UInt((cfg.blockBytes * 8).W)
47}
48
49class WritebackReq(implicit p: Parameters) extends WritebackReqWodata {
50  val data = UInt((cfg.blockBytes * 8).W)
51
52  override def dump() = {
53    XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n",
54      addr, param, voluntary, hasData, data)
55  }
56
57  def toWritebackReqWodata(): WritebackReqWodata = {
58    val out = Wire(new WritebackReqWodata)
59    out.addr := addr
60    out.param := param
61    out.voluntary := voluntary
62    out.hasData := hasData
63    out.dirty := dirty
64    out.delay_release := delay_release
65    out.miss_id := miss_id
66    out
67  }
68
69  def toWritebackReqCtrl(): WritebackReqCtrl = {
70    val out = Wire(new WritebackReqCtrl)
71    out.param := param
72    out.voluntary := voluntary
73    out.hasData := hasData
74    out.dirty := dirty
75    out.delay_release := delay_release
76    out.miss_id := miss_id
77    out
78  }
79
80  def toWritebackReqData(): WritebackReqData = {
81    val out = Wire(new WritebackReqData)
82    out.data := data
83    out
84  }
85}
86
87// While a Release sleeps and waits for a refill to wake it up,
88// main pipe might update meta & data during this time.
89// So the meta & data to be released need to be updated too.
90class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle {
91  // only consider store here
92  val addr = UInt(PAddrBits.W)
93  val mask = UInt(DCacheBanks.W)
94  val data = UInt((cfg.blockBytes * 8).W)
95}
96
97// To reduce fanout, writeback queue entry data is updated 1 cycle
98// after ReleaseUpdate.fire
99class WBQEntryReleaseUpdate(implicit p: Parameters) extends DCacheBundle {
100  // only consider store here
101  val addr = UInt(PAddrBits.W)
102  val mask_delayed = UInt(DCacheBanks.W)
103  val data_delayed = UInt((cfg.blockBytes * 8).W)
104  val mask_orr = Bool()
105}
106
107// When a probe TtoB req enter dcache main pipe, check if that cacheline
108// is waiting for release. If it is so, change TtoB to TtoN, set dcache
109// coh to N.
110class ProbeToBCheckReq(implicit p: Parameters) extends DCacheBundle {
111  val addr = UInt(PAddrBits.W) // paddr from mainpipe s1
112}
113
114class ProbeToBCheckResp(implicit p: Parameters) extends DCacheBundle {
115  val toN = Bool() // need to set dcache coh to N
116}
117
118class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump
119{
120  val io = IO(new Bundle {
121    val id = Input(UInt())
122    // allocate this entry for new req
123    val primary_valid = Input(Bool())
124    // this entry is free and can be allocated to new reqs
125    val primary_ready = Output(Bool())
126    val primary_ready_dup = Vec(nDupWbReady, Output(Bool()))
127    // this entry is busy, but it can merge the new req
128    val secondary_valid = Input(Bool())
129    val secondary_ready = Output(Bool())
130    val req = Flipped(DecoupledIO(new WritebackReqWodata))
131    val req_data = Input(new WritebackReqData)
132
133    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
134    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
135
136    val block_addr  = Output(Valid(UInt()))
137
138    val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W)))
139    val release_update = Flipped(ValidIO(new WBQEntryReleaseUpdate))
140
141    val probe_ttob_check_req = Flipped(ValidIO(new ProbeToBCheckReq))
142    val probe_ttob_check_resp = ValidIO(new ProbeToBCheckResp)
143  })
144
145  val s_invalid :: s_sleep :: s_release_req :: s_release_resp :: Nil = Enum(4)
146  // ProbeAck:               s_invalid ->            s_release_req
147  // ProbeAck merge Release: s_invalid ->            s_release_req
148  // Release:                s_invalid -> s_sleep -> s_release_req -> s_release_resp
149  // Release merge ProbeAck: s_invalid -> s_sleep -> s_release_req
150  //                        (change Release into ProbeAck when Release is not fired)
151  //                     or: s_invalid -> s_sleep -> s_release_req -> s_release_resp -> s_release_req
152  //                        (send a ProbeAck after Release transaction is over)
153  val state = RegInit(s_invalid)
154  val state_dup_0 = RegInit(s_invalid)
155  val state_dup_1 = RegInit(s_invalid)
156  val state_dup_for_mp = RegInit(VecInit(Seq.fill(nDupWbReady)(s_invalid)))
157
158  // internal regs
159  // remaining beats
160  val remain = RegInit(0.U(refillCycles.W))
161  val remain_dup_0 = RegInit(0.U(refillCycles.W))
162  val remain_dup_1 = RegInit(0.U(refillCycles.W))
163  val remain_set = WireInit(0.U(refillCycles.W))
164  val remain_clr = WireInit(0.U(refillCycles.W))
165  remain := (remain | remain_set) & ~remain_clr
166  remain_dup_0 := (remain_dup_0 | remain_set) & ~remain_clr
167  remain_dup_1 := (remain_dup_1 | remain_set) & ~remain_clr
168
169  // writeback queue data
170  val data = Reg(UInt((cfg.blockBytes * 8).W))
171
172  // writeback queue paddr
173  val paddr_dup_0 = Reg(UInt(PAddrBits.W))
174  val paddr_dup_1 = Reg(UInt(PAddrBits.W))
175  val paddr_dup_2 = Reg(UInt(PAddrBits.W))
176
177  // pending data write
178  // !s_data_override means there is an in-progress data write
179  val s_data_override = RegInit(true.B)
180  // !s_data_merge means there is an in-progress data merge
181  val s_data_merge = RegInit(true.B)
182
183  // there are valid request that can be sent to release bus
184  val busy = remain.orR && s_data_override && s_data_merge // have remain beats and data write finished
185
186  val req  = Reg(new WritebackReqCtrl)
187
188  // assign default signals to output signals
189  io.req.ready := false.B
190  io.mem_release.valid := false.B
191  io.mem_release.bits  := DontCare
192  io.mem_grant.ready   := false.B
193  io.block_addr.valid  := state =/= s_invalid
194  io.block_addr.bits   := paddr_dup_0
195
196  s_data_override := true.B // data_override takes only 1 cycle
197  s_data_merge := true.B // data_merge takes only 1 cycle
198
199
200  when (state =/= s_invalid) {
201    XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits)
202  }
203
204  def mergeData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = {
205    val full_wmask = FillInterleaved(64, wmask)
206    (~full_wmask & old_data | full_wmask & new_data)
207  }
208
209  // --------------------------------------------------------------------------------
210  // s_invalid: receive requests
211  // new req entering
212  val alloc = io.req.valid && io.primary_valid && io.primary_ready
213  when (alloc) {
214    assert (remain === 0.U)
215    req := io.req.bits
216    s_data_override := false.B
217    // only update paddr when allocate a new missqueue entry
218    paddr_dup_0 := io.req.bits.addr
219    paddr_dup_1 := io.req.bits.addr
220    paddr_dup_2 := io.req.bits.addr
221    when (io.req.bits.delay_release) {
222      state := s_sleep
223      state_dup_0 := s_sleep
224      state_dup_1 := s_sleep
225      state_dup_for_mp.foreach(_ := s_sleep)
226    }.otherwise {
227      state := s_release_req
228      state_dup_0 := s_release_req
229      state_dup_1 := s_release_req
230      state_dup_for_mp.foreach(_ := s_release_req)
231      remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
232    }
233  }
234
235  // --------------------------------------------------------------------------------
236  // s_sleep: wait for refill pipe to inform me that I can keep releasing
237  val merge = io.secondary_valid && io.secondary_ready
238  when (state === s_sleep) {
239    assert(remain === 0.U)
240    // There shouldn't be a new Release with the same addr in sleep state
241    assert(!(merge && io.req.bits.voluntary))
242
243    val update = io.release_update.valid && io.release_update.bits.addr === paddr_dup_0
244    when (update) {
245      req.hasData := req.hasData || io.release_update.bits.mask_orr
246      req.dirty := req.dirty || io.release_update.bits.mask_orr
247      s_data_merge := false.B
248    }.elsewhen (merge) {
249      state := s_release_req
250      state_dup_0 := s_release_req
251      state_dup_1 := s_release_req
252      state_dup_for_mp.foreach(_ := s_release_req)
253      req.voluntary := false.B
254      req.param := req.param
255      req.hasData := req.hasData || io.req.bits.hasData
256      req.dirty := req.dirty || io.req.bits.dirty
257      s_data_override := !io.req.bits.hasData // update data when io.req.bits.hasData
258      req.delay_release := false.B
259      remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
260    }
261
262    when (io.release_wakeup.valid && io.release_wakeup.bits === req.miss_id) {
263      state := s_release_req
264      state_dup_0 := s_release_req
265      state_dup_1 := s_release_req
266      state_dup_for_mp.foreach(_ := s_release_req)
267      req.delay_release := false.B
268      remain_set := Mux(
269        req.hasData || update && io.release_update.bits.mask_orr || merge && io.req.bits.hasData,
270        ~0.U(refillCycles.W),
271        1.U(refillCycles.W)
272      )
273    }
274  }
275
276  // --------------------------------------------------------------------------------
277  // while there beats remaining to be sent, we keep sending
278  // which beat to send in this cycle?
279  val beat = PriorityEncoder(remain_dup_0)
280
281  val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W)))
282  for (i <- 0 until refillCycles) {
283    beat_data(i) := data((i + 1) * beatBits - 1, i * beatBits)
284  }
285
286  val probeResponse = edge.ProbeAck(
287    fromSource = io.id,
288    toAddress = paddr_dup_1,
289    lgSize = log2Ceil(cfg.blockBytes).U,
290    reportPermissions = req.param
291  )
292
293  val probeResponseData = edge.ProbeAck(
294    fromSource = io.id,
295    toAddress = paddr_dup_1,
296    lgSize = log2Ceil(cfg.blockBytes).U,
297    reportPermissions = req.param,
298    data = beat_data(beat)
299  )
300
301  val voluntaryRelease = edge.Release(
302    fromSource = io.id,
303    toAddress = paddr_dup_2,
304    lgSize = log2Ceil(cfg.blockBytes).U,
305    shrinkPermissions = req.param
306  )._2
307
308  val voluntaryReleaseData = edge.Release(
309    fromSource = io.id,
310    toAddress = paddr_dup_2,
311    lgSize = log2Ceil(cfg.blockBytes).U,
312    shrinkPermissions = req.param,
313    data = beat_data(beat)
314  )._2
315
316  // voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty)
317  when(busy) {
318    assert(!req.dirty || req.hasData)
319  }
320
321  io.mem_release.valid := busy
322  io.mem_release.bits  := Mux(req.voluntary,
323    Mux(req.hasData, voluntaryReleaseData, voluntaryRelease),
324    Mux(req.hasData, probeResponseData, probeResponse))
325
326  when (io.mem_release.fire) { remain_clr := PriorityEncoderOH(remain_dup_1) }
327
328  val (_, _, release_done, _) = edge.count(io.mem_release)
329
330//  when (state === s_release_req && release_done) {
331//    state := Mux(req.voluntary, s_release_resp, s_invalid)
332//  }
333
334  // Because now wbq merges a same-addr req unconditionally, when the req to be merged comes too late,
335  // the previous req might not be able to merge. Thus we have to handle the new req later after the
336  // previous one finishes.
337  // TODO: initiate these
338  val release_later = RegInit(false.B)
339  val c_already_sent = RegInit(false.B)
340  def tmp_req() = new Bundle {
341    val param = UInt(cWidth.W)
342    val voluntary = Bool()
343    val hasData = Bool()
344    val dirty = Bool()
345    val delay_release = Bool()
346    val miss_id = UInt(log2Up(cfg.nMissEntries).W)
347
348    def toWritebackReqCtrl = {
349      val r = Wire(new WritebackReqCtrl())
350      r.param := param
351      r.voluntary := voluntary
352      r.hasData := hasData
353      r.dirty := dirty
354      r.delay_release := delay_release
355      r.miss_id := miss_id
356      r
357    }
358  }
359  val req_later = Reg(tmp_req())
360
361  when (state_dup_0 === s_release_req) {
362    when (io.mem_release.fire) {
363      c_already_sent := !release_done
364    }
365
366    when (req.voluntary) {
367      // The previous req is Release
368      when (release_done) {
369        state := s_release_resp
370        state_dup_0 := s_release_resp
371        state_dup_1 := s_release_resp
372        state_dup_for_mp.foreach(_ := s_release_resp)
373      }
374      // merge a ProbeAck
375      when (merge) {
376        when (io.mem_release.fire || c_already_sent) {
377          // too late to merge, handle the ProbeAck later
378          release_later := true.B
379          req_later.param := io.req.bits.param
380          req_later.voluntary := io.req.bits.voluntary
381          req_later.hasData := io.req.bits.hasData
382          req_later.dirty := io.req.bits.dirty
383          req_later.delay_release := io.req.bits.delay_release
384          req_later.miss_id := io.req.bits.miss_id
385        }.otherwise {
386          // Release hasn't been sent out yet, change Release to ProbeAck
387          req.voluntary := false.B
388          req.hasData := req.hasData || io.req.bits.hasData
389          req.dirty := req.dirty || io.req.bits.dirty
390          // s_data_override := false.B
391          req.delay_release := false.B
392          remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
393        }
394      }
395    }.otherwise {
396      // The previous req is ProbeAck
397      when (merge) {
398        release_later := true.B
399        req_later.param := io.req.bits.param
400        req_later.voluntary := io.req.bits.voluntary
401        req_later.hasData := io.req.bits.hasData
402        req_later.dirty := io.req.bits.dirty
403        req_later.delay_release := io.req.bits.delay_release
404        req_later.miss_id := io.req.bits.miss_id
405      }
406
407      when (release_done) {
408        when (merge) {
409          // Send the Release after ProbeAck
410//          state := s_release_req
411//          req := Mux(merge, io.req.bits, req_later.toWritebackReqCtrl)
412//          release_later := false.B
413          state := s_sleep
414          state_dup_0 := s_sleep
415          state_dup_1 := s_sleep
416          state_dup_for_mp.foreach(_ := s_sleep)
417          req := io.req.bits
418          release_later := false.B
419        }.elsewhen (release_later) {
420          state := Mux(
421            io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release,
422            s_release_req,
423            s_sleep
424          )
425          state_dup_0 := Mux(
426            io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release,
427            s_release_req,
428            s_sleep
429          )
430          state_dup_1 := Mux(
431            io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release,
432            s_release_req,
433            s_sleep
434          )
435          state_dup_for_mp.foreach(_ := Mux(
436            io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release,
437            s_release_req,
438            s_sleep
439          ))
440          req := req_later.toWritebackReqCtrl
441          when(io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release) {
442            remain_set := Mux(req_later.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
443            remain_clr := 0.U
444          }.otherwise {
445            remain_set := 0.U
446          }
447          when (io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id) {
448            req.delay_release := false.B
449          }
450          release_later := false.B
451        }.otherwise {
452          state := s_invalid
453          state_dup_0 := s_invalid
454          state_dup_1 := s_invalid
455          state_dup_for_mp.foreach(_ := s_invalid)
456          release_later := false.B
457        }
458      }
459
460      when (io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id) {
461        req_later.delay_release := false.B
462      }
463    }
464  }
465
466  // --------------------------------------------------------------------------------
467  // receive ReleaseAck for Releases
468  when (state_dup_0 === s_release_resp) {
469    io.mem_grant.ready := true.B
470
471    when (merge) {
472      release_later := true.B
473      req_later.param := io.req.bits.param
474      req_later.voluntary := io.req.bits.voluntary
475      req_later.hasData := io.req.bits.hasData
476      req_later.dirty := io.req.bits.dirty
477      req_later.delay_release := io.req.bits.delay_release
478      req_later.miss_id := io.req.bits.miss_id
479    }
480    when (io.mem_grant.fire) {
481      when (merge) {
482        state := s_release_req
483        state_dup_0 := s_release_req
484        state_dup_1 := s_release_req
485        state_dup_for_mp.foreach(_ := s_release_req)
486        req := io.req.bits
487        remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
488        release_later := false.B
489      }.elsewhen(release_later) {
490        state := s_release_req
491        state_dup_0 := s_release_req
492        state_dup_1 := s_release_req
493        state_dup_for_mp.foreach(_ := s_release_req)
494        req := req_later.toWritebackReqCtrl
495        remain_set := Mux(req_later.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
496        release_later := false.B
497      }.otherwise {
498        state := s_invalid
499        state_dup_0 := s_invalid
500        state_dup_1 := s_invalid
501        state_dup_for_mp.foreach(_ := s_invalid)
502        release_later := false.B
503      }
504    }
505  }
506
507  // When does this entry merge a new req?
508  // 1. When this entry is free
509  // 2. When this entry wants to release while still waiting for release_wakeup signal,
510  //    and a probe req with the same addr comes. In this case we merge probe with release,
511  //    handle this probe, so we don't need another release.
512  io.primary_ready := state_dup_1 === s_invalid
513  io.primary_ready_dup.zip(state_dup_for_mp).foreach { case (rdy, st) => rdy := st === s_invalid }
514  io.secondary_ready := state_dup_1 =/= s_invalid && io.req.bits.addr === paddr_dup_0
515
516  io.probe_ttob_check_resp.valid := RegNext(io.probe_ttob_check_req.valid) // for debug only
517  io.probe_ttob_check_resp.bits.toN := RegNext(
518    state_dup_1 === s_sleep &&
519    io.probe_ttob_check_req.bits.addr === paddr_dup_0 &&
520    io.probe_ttob_check_req.valid
521  )
522
523  // data update logic
524  when (!s_data_merge) {
525    data := mergeData(data, io.release_update.bits.data_delayed, io.release_update.bits.mask_delayed)
526  }
527
528  when (!s_data_override && (req.hasData || RegNext(alloc))) {
529    data := io.req_data.data
530  }
531
532  assert(!RegNext(!s_data_merge && !s_data_override))
533
534  // performance counters
535  XSPerfAccumulate("wb_req", io.req.fire)
536  XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary)
537  XSPerfAccumulate("wb_probe_resp", state_dup_0 === s_release_req && release_done && !req.voluntary)
538  XSPerfAccumulate("wb_probe_ttob_fix", io.probe_ttob_check_resp.valid && io.probe_ttob_check_resp.bits.toN)
539  XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready)
540  XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state_dup_1 === s_release_resp)
541}
542
543class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents {
544  val io = IO(new Bundle {
545    val req = Flipped(DecoupledIO(new WritebackReq))
546    val req_ready_dup = Vec(nDupWbReady, Output(Bool()))
547    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
548    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
549
550    val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W)))
551    val release_update = Flipped(ValidIO(new ReleaseUpdate))
552
553    val probe_ttob_check_req = Flipped(ValidIO(new ProbeToBCheckReq))
554    val probe_ttob_check_resp = ValidIO(new ProbeToBCheckResp)
555
556    val miss_req = Flipped(Valid(UInt()))
557    val block_miss_req = Output(Bool())
558  })
559
560  require(cfg.nReleaseEntries > cfg.nMissEntries)
561
562  val primary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool()))
563  val secondary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool()))
564  val accept = Cat(primary_ready_vec).orR
565  val merge = Cat(secondary_ready_vec).orR
566  val alloc = accept && !merge
567  // When there are empty entries, merge or allocate a new entry.
568  // When there is no empty entry, reject it even if it can be merged.
569  io.req.ready := accept
570
571  // assign default values to output signals
572  io.mem_release.valid := false.B
573  io.mem_release.bits  := DontCare
574  io.mem_grant.ready   := false.B
575
576  // dalay data write in miss queue release update for 1 cycle
577  val release_update_bits_for_entry = Wire(new WBQEntryReleaseUpdate)
578  release_update_bits_for_entry.addr := io.release_update.bits.addr
579  release_update_bits_for_entry.mask_delayed := RegEnable(io.release_update.bits.mask, io.release_update.valid)
580  release_update_bits_for_entry.data_delayed := RegEnable(io.release_update.bits.data, io.release_update.valid)
581  release_update_bits_for_entry.mask_orr := io.release_update.bits.mask.orR
582
583  // delay data write in writeback req for 1 cycle
584  val req_data = RegEnable(io.req.bits.toWritebackReqData(), io.req.valid)
585
586  require(isPow2(cfg.nMissEntries))
587  val grant_source = io.mem_grant.bits.source
588  val entries = Seq.fill(cfg.nReleaseEntries)(Module(new WritebackEntry(edge)))
589  entries.zipWithIndex.foreach {
590    case (entry, i) =>
591      val former_primary_ready = if(i == 0)
592        false.B
593      else
594        Cat((0 until i).map(j => entries(j).io.primary_ready)).orR
595      val entry_id = (i + releaseIdBase).U
596
597      entry.io.id := entry_id
598
599      // entry req
600      entry.io.req.valid := io.req.valid
601      primary_ready_vec(i)   := entry.io.primary_ready
602      secondary_ready_vec(i) := entry.io.secondary_ready
603      entry.io.req.bits  := io.req.bits
604      entry.io.req_data  := req_data
605
606      entry.io.primary_valid := alloc &&
607        !former_primary_ready &&
608        entry.io.primary_ready
609      entry.io.secondary_valid := io.req.valid && accept
610
611      entry.io.mem_grant.valid := (entry_id === grant_source) && io.mem_grant.valid
612      entry.io.mem_grant.bits  := io.mem_grant.bits
613
614      entry.io.release_wakeup := io.release_wakeup
615      entry.io.release_update.valid := io.release_update.valid
616      entry.io.release_update.bits := release_update_bits_for_entry // data write delayed
617
618      entry.io.probe_ttob_check_req := io.probe_ttob_check_req
619  }
620
621  io.req_ready_dup.zipWithIndex.foreach { case (rdy, i) =>
622    rdy := Cat(entries.map(_.io.primary_ready_dup(i))).orR
623  }
624
625  io.probe_ttob_check_resp.valid := RegNext(io.probe_ttob_check_req.valid) // for debug only
626  io.probe_ttob_check_resp.bits.toN := VecInit(entries.map(e => e.io.probe_ttob_check_resp.bits.toN)).asUInt.orR
627
628  assert(RegNext(!(io.mem_grant.valid && !io.mem_grant.ready)))
629  io.mem_grant.ready := true.B
630
631  val miss_req_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR
632  io.block_miss_req := io.miss_req.valid && miss_req_conflict
633
634  TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*)
635
636  // sanity check
637  // print all input/output requests for debug purpose
638  // print req
639  when (io.req.fire) {
640    io.req.bits.dump()
641  }
642
643  when (io.mem_release.fire) {
644    io.mem_release.bits.dump
645  }
646
647  when (io.mem_grant.fire) {
648    io.mem_grant.bits.dump
649  }
650
651  when (io.miss_req.valid) {
652    XSDebug("miss_req: addr: %x\n", io.miss_req.bits)
653  }
654
655  when (io.block_miss_req) {
656    XSDebug("block_miss_req\n")
657  }
658
659  // performance counters
660  XSPerfAccumulate("wb_req", io.req.fire)
661
662  val perfValidCount = RegNext(PopCount(entries.map(e => e.io.block_addr.valid)))
663  val perfEvents = Seq(
664    ("dcache_wbq_req      ", io.req.fire),
665    ("dcache_wbq_1_4_valid", (perfValidCount < (cfg.nReleaseEntries.U/4.U))),
666    ("dcache_wbq_2_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/4.U)) & (perfValidCount <= (cfg.nReleaseEntries.U/2.U))),
667    ("dcache_wbq_3_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/2.U)) & (perfValidCount <= (cfg.nReleaseEntries.U*3.U/4.U))),
668    ("dcache_wbq_4_4_valid", (perfValidCount > (cfg.nReleaseEntries.U*3.U/4.U))),
669  )
670  generatePerfEvent()
671}
672