xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/WritebackQueue.scala (revision 67ba96b4871c459c09df20e3052738174021a830)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.tilelink.TLPermissions._
23import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut}
24import huancun.DirtyKey
25import utils.{HasPerfEvents, HasTLDump, XSDebug, XSPerfAccumulate}
26
27class WritebackReqCtrl(implicit p: Parameters) extends DCacheBundle {
28  val param  = UInt(cWidth.W)
29  val voluntary = Bool()
30  val hasData = Bool()
31  val dirty = Bool()
32
33  val delay_release = Bool()
34  val miss_id = UInt(log2Up(cfg.nMissEntries).W)
35}
36
37class WritebackReqWodata(implicit p: Parameters) extends WritebackReqCtrl {
38  val addr = UInt(PAddrBits.W)
39
40  def dump() = {
41    XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b\n",
42      addr, param, voluntary, hasData)
43  }
44}
45
46class WritebackReqData(implicit p: Parameters) extends DCacheBundle {
47  val data = UInt((cfg.blockBytes * 8).W)
48}
49
50class WritebackReq(implicit p: Parameters) extends WritebackReqWodata {
51  val data = UInt((cfg.blockBytes * 8).W)
52
53  override def dump() = {
54    XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n",
55      addr, param, voluntary, hasData, data)
56  }
57
58  def toWritebackReqWodata(): WritebackReqWodata = {
59    val out = Wire(new WritebackReqWodata)
60    out.addr := addr
61    out.param := param
62    out.voluntary := voluntary
63    out.hasData := hasData
64    out.dirty := dirty
65    out.delay_release := delay_release
66    out.miss_id := miss_id
67    out
68  }
69
70  def toWritebackReqCtrl(): WritebackReqCtrl = {
71    val out = Wire(new WritebackReqCtrl)
72    out.param := param
73    out.voluntary := voluntary
74    out.hasData := hasData
75    out.dirty := dirty
76    out.delay_release := delay_release
77    out.miss_id := miss_id
78    out
79  }
80
81  def toWritebackReqData(): WritebackReqData = {
82    val out = Wire(new WritebackReqData)
83    out.data := data
84    out
85  }
86}
87
88// While a Release sleeps and waits for a refill to wake it up,
89// main pipe might update meta & data during this time.
90// So the meta & data to be released need to be updated too.
91class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle {
92  // only consider store here
93  val addr = UInt(PAddrBits.W)
94  val mask = UInt(DCacheBanks.W)
95  val data = UInt((cfg.blockBytes * 8).W)
96}
97
98// To reduce fanout, writeback queue entry data is updated 1 cycle
99// after ReleaseUpdate.fire()
100class WBQEntryReleaseUpdate(implicit p: Parameters) extends DCacheBundle {
101  // only consider store here
102  val addr = UInt(PAddrBits.W)
103  val mask_delayed = UInt(DCacheBanks.W)
104  val data_delayed = UInt((cfg.blockBytes * 8).W)
105  val mask_orr = Bool()
106}
107
108// When a probe TtoB req enter dcache main pipe, check if that cacheline
109// is waiting for release. If it is so, change TtoB to TtoN, set dcache
110// coh to N.
111class ProbeToBCheckReq(implicit p: Parameters) extends DCacheBundle {
112  val addr = UInt(PAddrBits.W) // paddr from mainpipe s1
113}
114
115class ProbeToBCheckResp(implicit p: Parameters) extends DCacheBundle {
116  val toN = Bool() // need to set dcache coh to N
117}
118
119class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump
120{
121  val io = IO(new Bundle {
122    val id = Input(UInt())
123    // allocate this entry for new req
124    val primary_valid = Input(Bool())
125    // this entry is free and can be allocated to new reqs
126    val primary_ready = Output(Bool())
127    val primary_ready_dup = Vec(nDupWbReady, Output(Bool()))
128    // this entry is busy, but it can merge the new req
129    val secondary_valid = Input(Bool())
130    val secondary_ready = Output(Bool())
131    val req = Flipped(DecoupledIO(new WritebackReqWodata))
132    val req_data = Input(new WritebackReqData)
133
134    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
135    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
136
137    val block_addr  = Output(Valid(UInt()))
138
139    val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W)))
140    val release_update = Flipped(ValidIO(new WBQEntryReleaseUpdate))
141
142    val probe_ttob_check_req = Flipped(ValidIO(new ProbeToBCheckReq))
143    val probe_ttob_check_resp = ValidIO(new ProbeToBCheckResp)
144  })
145
146  val s_invalid :: s_sleep :: s_release_req :: s_release_resp :: Nil = Enum(4)
147  // ProbeAck:               s_invalid ->            s_release_req
148  // ProbeAck merge Release: s_invalid ->            s_release_req
149  // Release:                s_invalid -> s_sleep -> s_release_req -> s_release_resp
150  // Release merge ProbeAck: s_invalid -> s_sleep -> s_release_req
151  //                        (change Release into ProbeAck when Release is not fired)
152  //                     or: s_invalid -> s_sleep -> s_release_req -> s_release_resp -> s_release_req
153  //                        (send a ProbeAck after Release transaction is over)
154  val state = RegInit(s_invalid)
155  val state_dup_0 = RegInit(s_invalid)
156  val state_dup_1 = RegInit(s_invalid)
157  val state_dup_for_mp = RegInit(VecInit(Seq.fill(nDupWbReady)(s_invalid)))
158
159  // internal regs
160  // remaining beats
161  val remain = RegInit(0.U(refillCycles.W))
162  val remain_dup_0 = RegInit(0.U(refillCycles.W))
163  val remain_dup_1 = RegInit(0.U(refillCycles.W))
164  val remain_set = WireInit(0.U(refillCycles.W))
165  val remain_clr = WireInit(0.U(refillCycles.W))
166  remain := (remain | remain_set) & ~remain_clr
167  remain_dup_0 := (remain_dup_0 | remain_set) & ~remain_clr
168  remain_dup_1 := (remain_dup_1 | remain_set) & ~remain_clr
169
170  // writeback queue data
171  val data = Reg(UInt((cfg.blockBytes * 8).W))
172
173  // writeback queue paddr
174  val paddr_dup_0 = Reg(UInt(PAddrBits.W))
175  val paddr_dup_1 = Reg(UInt(PAddrBits.W))
176  val paddr_dup_2 = Reg(UInt(PAddrBits.W))
177
178  // pending data write
179  // !s_data_override means there is an in-progress data write
180  val s_data_override = RegInit(true.B)
181  // !s_data_merge means there is an in-progress data merge
182  val s_data_merge = RegInit(true.B)
183
184  // there are valid request that can be sent to release bus
185  val busy = remain.orR && s_data_override && s_data_merge // have remain beats and data write finished
186
187  val req  = Reg(new WritebackReqCtrl)
188
189  // assign default signals to output signals
190  io.req.ready := false.B
191  io.mem_release.valid := false.B
192  io.mem_release.bits  := DontCare
193  io.mem_grant.ready   := false.B
194  io.block_addr.valid  := state =/= s_invalid
195  io.block_addr.bits   := paddr_dup_0
196
197  s_data_override := true.B // data_override takes only 1 cycle
198  s_data_merge := true.B // data_merge takes only 1 cycle
199
200
201  when (state =/= s_invalid) {
202    XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits)
203  }
204
205  def mergeData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = {
206    val full_wmask = FillInterleaved(64, wmask)
207    (~full_wmask & old_data | full_wmask & new_data)
208  }
209
210  // --------------------------------------------------------------------------------
211  // s_invalid: receive requests
212  // new req entering
213  when (io.req.valid && io.primary_valid && io.primary_ready) {
214    assert (remain === 0.U)
215    req := io.req.bits
216    s_data_override := false.B
217    // only update paddr when allocate a new missqueue entry
218    paddr_dup_0 := io.req.bits.addr
219    paddr_dup_1 := io.req.bits.addr
220    paddr_dup_2 := io.req.bits.addr
221    when (io.req.bits.delay_release) {
222      state := s_sleep
223      state_dup_0 := s_sleep
224      state_dup_1 := s_sleep
225      state_dup_for_mp.foreach(_ := s_sleep)
226    }.otherwise {
227      state := s_release_req
228      state_dup_0 := s_release_req
229      state_dup_1 := s_release_req
230      state_dup_for_mp.foreach(_ := s_release_req)
231      remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
232    }
233  }
234
235  // --------------------------------------------------------------------------------
236  // s_sleep: wait for refill pipe to inform me that I can keep releasing
237  val merge = io.secondary_valid && io.secondary_ready
238  when (state === s_sleep) {
239    assert(remain === 0.U)
240    // There shouldn't be a new Release with the same addr in sleep state
241    assert(!(merge && io.req.bits.voluntary))
242
243    val update = io.release_update.valid && io.release_update.bits.addr === paddr_dup_0
244    when (update) {
245      req.hasData := req.hasData || io.release_update.bits.mask_orr
246      req.dirty := req.dirty || io.release_update.bits.mask_orr
247      s_data_merge := false.B
248    }.elsewhen (merge) {
249      state := s_release_req
250      state_dup_0 := s_release_req
251      state_dup_1 := s_release_req
252      state_dup_for_mp.foreach(_ := s_release_req)
253      req.voluntary := false.B
254      req.param := req.param
255      req.hasData := req.hasData || io.req.bits.hasData
256      req.dirty := req.dirty || io.req.bits.dirty
257      s_data_override := !io.req.bits.hasData // update data when io.req.bits.hasData
258      req.delay_release := false.B
259      remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
260    }
261
262    when (io.release_wakeup.valid && io.release_wakeup.bits === req.miss_id) {
263      state := s_release_req
264      state_dup_0 := s_release_req
265      state_dup_1 := s_release_req
266      state_dup_for_mp.foreach(_ := s_release_req)
267      req.delay_release := false.B
268      remain_set := Mux(
269        req.hasData || update && io.release_update.bits.mask_orr || merge && io.req.bits.hasData,
270        ~0.U(refillCycles.W),
271        1.U(refillCycles.W)
272      )
273    }
274  }
275
276  // --------------------------------------------------------------------------------
277  // while there beats remaining to be sent, we keep sending
278  // which beat to send in this cycle?
279  val beat = PriorityEncoder(remain_dup_0)
280
281  val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W)))
282  for (i <- 0 until refillCycles) {
283    beat_data(i) := data((i + 1) * beatBits - 1, i * beatBits)
284  }
285
286  val probeResponse = edge.ProbeAck(
287    fromSource = io.id,
288    toAddress = paddr_dup_1,
289    lgSize = log2Ceil(cfg.blockBytes).U,
290    reportPermissions = req.param
291  )
292
293  val probeResponseData = edge.ProbeAck(
294    fromSource = io.id,
295    toAddress = paddr_dup_1,
296    lgSize = log2Ceil(cfg.blockBytes).U,
297    reportPermissions = req.param,
298    data = beat_data(beat)
299  )
300
301  val voluntaryRelease = edge.Release(
302    fromSource = io.id,
303    toAddress = paddr_dup_2,
304    lgSize = log2Ceil(cfg.blockBytes).U,
305    shrinkPermissions = req.param
306  )._2
307
308  val voluntaryReleaseData = edge.Release(
309    fromSource = io.id,
310    toAddress = paddr_dup_2,
311    lgSize = log2Ceil(cfg.blockBytes).U,
312    shrinkPermissions = req.param,
313    data = beat_data(beat)
314  )._2
315
316  voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty)
317  when(busy) {
318    assert(!req.dirty || req.hasData)
319  }
320
321  io.mem_release.valid := busy
322  io.mem_release.bits  := Mux(req.voluntary,
323    Mux(req.hasData, voluntaryReleaseData, voluntaryRelease),
324    Mux(req.hasData, probeResponseData, probeResponse))
325
326  when (io.mem_release.fire()) { remain_clr := PriorityEncoderOH(remain_dup_1) }
327
328  val (_, _, release_done, _) = edge.count(io.mem_release)
329
330//  when (state === s_release_req && release_done) {
331//    state := Mux(req.voluntary, s_release_resp, s_invalid)
332//  }
333
334  // Because now wbq merges a same-addr req unconditionally, when the req to be merged comes too late,
335  // the previous req might not be able to merge. Thus we have to handle the new req later after the
336  // previous one finishes.
337  // TODO: initiate these
338  val release_later = RegInit(false.B)
339  val c_already_sent = RegInit(false.B)
340  def tmp_req() = new Bundle {
341    val param = UInt(cWidth.W)
342    val voluntary = Bool()
343    val hasData = Bool()
344    val dirty = Bool()
345    val delay_release = Bool()
346    val miss_id = UInt(log2Up(cfg.nMissEntries).W)
347
348    def toWritebackReqCtrl = {
349      val r = Wire(new WritebackReqCtrl())
350      r.param := param
351      r.voluntary := voluntary
352      r.hasData := hasData
353      r.dirty := dirty
354      r.delay_release := delay_release
355      r.miss_id := miss_id
356      r
357    }
358  }
359  val req_later = Reg(tmp_req())
360
361  when (state_dup_0 === s_release_req) {
362    when (io.mem_release.fire()) {
363      c_already_sent := !release_done
364    }
365
366    when (req.voluntary) {
367      // The previous req is Release
368      when (release_done) {
369        state := s_release_resp
370        state_dup_0 := s_release_resp
371        state_dup_1 := s_release_resp
372        state_dup_for_mp.foreach(_ := s_release_resp)
373      }
374      // merge a ProbeAck
375      when (merge) {
376        when (io.mem_release.fire() || c_already_sent) {
377          // too late to merge, handle the ProbeAck later
378          release_later := true.B
379          req_later.param := io.req.bits.param
380          req_later.voluntary := io.req.bits.voluntary
381          req_later.hasData := io.req.bits.hasData
382          req_later.dirty := io.req.bits.dirty
383          req_later.delay_release := io.req.bits.delay_release
384          req_later.miss_id := io.req.bits.miss_id
385        }.otherwise {
386          // Release hasn't been sent out yet, change Release to ProbeAck
387          req.voluntary := false.B
388          req.hasData := req.hasData || io.req.bits.hasData
389          req.dirty := req.dirty || io.req.bits.dirty
390          // s_data_override := false.B
391          req.delay_release := false.B
392          remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
393        }
394      }
395    }.otherwise {
396      // The previous req is ProbeAck
397      when (merge) {
398        release_later := true.B
399        req_later.param := io.req.bits.param
400        req_later.voluntary := io.req.bits.voluntary
401        req_later.hasData := io.req.bits.hasData
402        req_later.dirty := io.req.bits.dirty
403        req_later.delay_release := io.req.bits.delay_release
404        req_later.miss_id := io.req.bits.miss_id
405      }
406
407      when (release_done) {
408        when (merge) {
409          // Send the Release after ProbeAck
410//          state := s_release_req
411//          req := Mux(merge, io.req.bits, req_later.toWritebackReqCtrl)
412//          release_later := false.B
413          state := s_sleep
414          state_dup_0 := s_sleep
415          state_dup_1 := s_sleep
416          state_dup_for_mp.foreach(_ := s_sleep)
417          req := io.req.bits
418          release_later := false.B
419        }.elsewhen (release_later) {
420          state := Mux(
421            io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release,
422            s_release_req,
423            s_sleep
424          )
425          state_dup_0 := Mux(
426            io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release,
427            s_release_req,
428            s_sleep
429          )
430          state_dup_1 := Mux(
431            io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release,
432            s_release_req,
433            s_sleep
434          )
435          state_dup_for_mp.foreach(_ := Mux(
436            io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release,
437            s_release_req,
438            s_sleep
439          ))
440          req := req_later.toWritebackReqCtrl
441          when (io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id) {
442            req.delay_release := false.B
443          }
444          release_later := false.B
445        }.otherwise {
446          state := s_invalid
447          state_dup_0 := s_invalid
448          state_dup_1 := s_invalid
449          state_dup_for_mp.foreach(_ := s_invalid)
450          release_later := false.B
451        }
452      }
453
454      when (io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id) {
455        req_later.delay_release := false.B
456      }
457    }
458  }
459
460  // --------------------------------------------------------------------------------
461  // receive ReleaseAck for Releases
462  when (state_dup_0 === s_release_resp) {
463    io.mem_grant.ready := true.B
464
465    when (merge) {
466      release_later := true.B
467      req_later.param := io.req.bits.param
468      req_later.voluntary := io.req.bits.voluntary
469      req_later.hasData := io.req.bits.hasData
470      req_later.dirty := io.req.bits.dirty
471      req_later.delay_release := io.req.bits.delay_release
472      req_later.miss_id := io.req.bits.miss_id
473    }
474    when (io.mem_grant.fire()) {
475      when (merge) {
476        state := s_release_req
477        state_dup_0 := s_release_req
478        state_dup_1 := s_release_req
479        state_dup_for_mp.foreach(_ := s_release_req)
480        req := io.req.bits
481        remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
482        release_later := false.B
483      }.elsewhen(release_later) {
484        state := s_release_req
485        state_dup_0 := s_release_req
486        state_dup_1 := s_release_req
487        state_dup_for_mp.foreach(_ := s_release_req)
488        req := req_later.toWritebackReqCtrl
489        remain_set := Mux(req_later.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
490        release_later := false.B
491      }.otherwise {
492        state := s_invalid
493        state_dup_0 := s_invalid
494        state_dup_1 := s_invalid
495        state_dup_for_mp.foreach(_ := s_invalid)
496        release_later := false.B
497      }
498    }
499  }
500
501  // When does this entry merge a new req?
502  // 1. When this entry is free
503  // 2. When this entry wants to release while still waiting for release_wakeup signal,
504  //    and a probe req with the same addr comes. In this case we merge probe with release,
505  //    handle this probe, so we don't need another release.
506  io.primary_ready := state_dup_1 === s_invalid
507  io.primary_ready_dup.zip(state_dup_for_mp).foreach { case (rdy, st) => rdy := st === s_invalid }
508  io.secondary_ready := state_dup_1 =/= s_invalid && io.req.bits.addr === paddr_dup_0
509
510  io.probe_ttob_check_resp.valid := RegNext(io.probe_ttob_check_req.valid) // for debug only
511  io.probe_ttob_check_resp.bits.toN := state_dup_1 === s_sleep &&
512    RegNext(io.probe_ttob_check_req.bits.addr) === paddr_dup_0 &&
513    RegNext(io.probe_ttob_check_req.valid)
514
515  // data update logic
516  when (!s_data_merge) {
517    data := mergeData(data, io.release_update.bits.data_delayed, io.release_update.bits.mask_delayed)
518  }
519
520  when (!s_data_override && req.hasData) {
521    data := io.req_data.data
522  }
523
524  assert(!RegNext(!s_data_merge && !s_data_override))
525
526  // performance counters
527  XSPerfAccumulate("wb_req", io.req.fire())
528  XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary)
529  XSPerfAccumulate("wb_probe_resp", state_dup_0 === s_release_req && release_done && !req.voluntary)
530  XSPerfAccumulate("wb_probe_ttob_fix", io.probe_ttob_check_resp.valid && io.probe_ttob_check_resp.bits.toN)
531  XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready)
532  XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state_dup_1 === s_release_resp)
533}
534
535class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents {
536  val io = IO(new Bundle {
537    val req = Flipped(DecoupledIO(new WritebackReq))
538    val req_ready_dup = Vec(nDupWbReady, Output(Bool()))
539    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
540    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
541
542    val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W)))
543    val release_update = Flipped(ValidIO(new ReleaseUpdate))
544
545    val probe_ttob_check_req = Flipped(ValidIO(new ProbeToBCheckReq))
546    val probe_ttob_check_resp = ValidIO(new ProbeToBCheckResp)
547
548    val miss_req = Flipped(Valid(UInt()))
549    val block_miss_req = Output(Bool())
550  })
551
552  require(cfg.nReleaseEntries > cfg.nMissEntries)
553
554  val primary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool()))
555  val secondary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool()))
556  val accept = Cat(primary_ready_vec).orR
557  val merge = Cat(secondary_ready_vec).orR
558  val alloc = accept && !merge
559  // When there are empty entries, merge or allocate a new entry.
560  // When there is no empty entry, reject it even if it can be merged.
561  io.req.ready := accept
562
563  // assign default values to output signals
564  io.mem_release.valid := false.B
565  io.mem_release.bits  := DontCare
566  io.mem_grant.ready   := false.B
567
568  // dalay data write in miss queue release update for 1 cycle
569  val release_update_bits_for_entry = Wire(new WBQEntryReleaseUpdate)
570  release_update_bits_for_entry.addr := io.release_update.bits.addr
571  release_update_bits_for_entry.mask_delayed := RegEnable(io.release_update.bits.mask, io.release_update.valid)
572  release_update_bits_for_entry.data_delayed := RegEnable(io.release_update.bits.data, io.release_update.valid)
573  release_update_bits_for_entry.mask_orr := io.release_update.bits.mask.orR
574
575  // delay data write in writeback req for 1 cycle
576  val req_data = RegEnable(io.req.bits.toWritebackReqData(), io.req.valid)
577
578  require(isPow2(cfg.nMissEntries))
579  val grant_source = io.mem_grant.bits.source
580  val entries = Seq.fill(cfg.nReleaseEntries)(Module(new WritebackEntry(edge)))
581  entries.zipWithIndex.foreach {
582    case (entry, i) =>
583      val former_primary_ready = if(i == 0)
584        false.B
585      else
586        Cat((0 until i).map(j => entries(j).io.primary_ready)).orR
587      val entry_id = (i + releaseIdBase).U
588
589      entry.io.id := entry_id
590
591      // entry req
592      entry.io.req.valid := io.req.valid
593      primary_ready_vec(i)   := entry.io.primary_ready
594      secondary_ready_vec(i) := entry.io.secondary_ready
595      entry.io.req.bits  := io.req.bits
596      entry.io.req_data  := req_data
597
598      entry.io.primary_valid := alloc &&
599        !former_primary_ready &&
600        entry.io.primary_ready
601      entry.io.secondary_valid := io.req.valid && accept
602
603      entry.io.mem_grant.valid := (entry_id === grant_source) && io.mem_grant.valid
604      entry.io.mem_grant.bits  := io.mem_grant.bits
605
606      entry.io.release_wakeup := io.release_wakeup
607      entry.io.release_update.valid := io.release_update.valid
608      entry.io.release_update.bits := release_update_bits_for_entry // data write delayed
609
610      entry.io.probe_ttob_check_req := io.probe_ttob_check_req
611  }
612
613  io.req_ready_dup.zipWithIndex.foreach { case (rdy, i) =>
614    rdy := Cat(entries.map(_.io.primary_ready_dup(i))).orR
615  }
616
617  io.probe_ttob_check_resp.valid := RegNext(io.probe_ttob_check_req.valid) // for debug only
618  io.probe_ttob_check_resp.bits.toN := VecInit(entries.map(e => e.io.probe_ttob_check_resp.bits.toN)).asUInt.orR
619
620  assert(RegNext(!(io.mem_grant.valid && !io.mem_grant.ready)))
621  io.mem_grant.ready := true.B
622
623  val miss_req_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR
624  io.block_miss_req := io.miss_req.valid && miss_req_conflict
625
626  TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*)
627
628  // sanity check
629  // print all input/output requests for debug purpose
630  // print req
631  when (io.req.fire()) {
632    io.req.bits.dump()
633  }
634
635  when (io.mem_release.fire()) {
636    io.mem_release.bits.dump
637  }
638
639  when (io.mem_grant.fire()) {
640    io.mem_grant.bits.dump
641  }
642
643  when (io.miss_req.valid) {
644    XSDebug("miss_req: addr: %x\n", io.miss_req.bits)
645  }
646
647  when (io.block_miss_req) {
648    XSDebug("block_miss_req\n")
649  }
650
651  // performance counters
652  XSPerfAccumulate("wb_req", io.req.fire())
653
654  val perfValidCount = RegNext(PopCount(entries.map(e => e.io.block_addr.valid)))
655  val perfEvents = Seq(
656    ("dcache_wbq_req      ", io.req.fire()),
657    ("dcache_wbq_1_4_valid", (perfValidCount < (cfg.nReleaseEntries.U/4.U))),
658    ("dcache_wbq_2_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/4.U)) & (perfValidCount <= (cfg.nReleaseEntries.U/2.U))),
659    ("dcache_wbq_3_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/2.U)) & (perfValidCount <= (cfg.nReleaseEntries.U*3.U/4.U))),
660    ("dcache_wbq_4_4_valid", (perfValidCount > (cfg.nReleaseEntries.U*3.U/4.U))),
661  )
662  generatePerfEvent()
663}
664