xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/WritebackQueue.scala (revision dcbc69cb2a7ea07707ede3d8f7c74421ef450202)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils.{HasTLDump, XSDebug, XSPerfAccumulate, PerfEventsBundle, PipelineConnect}
23import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut, TLPermissions}
24import huancun.{DirtyField, DirtyKey}
25
26class WritebackReq(implicit p: Parameters) extends DCacheBundle {
27  val addr = UInt(PAddrBits.W)
28  val param  = UInt(TLPermissions.cWidth.W)
29  val voluntary = Bool()
30  val hasData = Bool()
31  val dirty = Bool()
32  val data = UInt((cfg.blockBytes * 8).W)
33
34  val delay_release = Bool()
35  val miss_id = UInt(log2Up(cfg.nMissEntries).W)
36
37  def dump() = {
38    XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n",
39      addr, param, voluntary, hasData, data)
40  }
41}
42
43// While a Release sleeps and waits for a refill to wake it up,
44// main pipe might update meta & data during this time.
45// So the meta & data to be released need to be updated too.
46class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle {
47  // only consider store here
48  val addr = UInt(PAddrBits.W)
49  val mask = UInt(DCacheBanks.W)
50  val data = UInt((cfg.blockBytes * 8).W)
51}
52
53class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump
54{
55  val io = IO(new Bundle {
56    val id = Input(UInt())
57    // allocate this entry for new req
58    val primary_valid = Input(Bool())
59    // this entry is free and can be allocated to new reqs
60    val primary_ready = Output(Bool())
61    // this entry is busy, but it can merge the new req
62    val secondary_ready = Output(Bool())
63    val req = Flipped(DecoupledIO(new WritebackReq))
64
65    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
66    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
67
68    val block_addr  = Output(Valid(UInt()))
69
70    val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W)))
71    val release_update = Flipped(ValidIO(new ReleaseUpdate))
72  })
73
74  val s_invalid :: s_sleep :: s_release_req :: s_release_resp :: Nil = Enum(4)
75  val state = RegInit(s_invalid)
76
77  // internal regs
78  // remaining beats
79  val remain = RegInit(0.U(refillCycles.W))
80  val remain_set = WireInit(0.U(refillCycles.W))
81  val remain_clr = WireInit(0.U(refillCycles.W))
82  remain := (remain | remain_set) & ~remain_clr
83
84  val busy = remain.orR
85
86  val req  = Reg(new WritebackReq)
87
88  // assign default signals to output signals
89  io.req.ready := false.B
90  io.mem_release.valid := false.B
91  io.mem_release.bits  := DontCare
92  io.mem_grant.ready   := false.B
93  io.block_addr.valid  := state =/= s_invalid
94  io.block_addr.bits   := req.addr
95
96
97  when (state =/= s_invalid) {
98    XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits)
99  }
100
101  def mergeData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = {
102    val full_wmask = FillInterleaved(64, wmask)
103    (~full_wmask & old_data | full_wmask & new_data)
104  }
105
106  // --------------------------------------------------------------------------------
107  // s_invalid: receive requests
108  // new req entering
109  when (io.req.valid && io.primary_valid && io.primary_ready) {
110    assert (remain === 0.U)
111    req := io.req.bits
112    when (io.req.bits.delay_release) {
113      state := s_sleep
114    }.otherwise {
115      state := s_release_req
116      remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
117    }
118  }
119
120  // --------------------------------------------------------------------------------
121  // s_sleep: wait for refill pipe to inform me that I can keep releasing
122  when (state === s_sleep) {
123    assert (remain === 0.U)
124
125    val update = io.release_update.valid && io.release_update.bits.addr === req.addr
126    when (update) {
127      req.hasData := req.hasData || io.release_update.bits.mask.orR
128      req.dirty := req.dirty || io.release_update.bits.mask.orR
129      req.data := mergeData(req.data, io.release_update.bits.data, io.release_update.bits.mask)
130    }
131
132    when (io.req.valid && io.secondary_ready) {
133      state := s_release_req
134      req.voluntary := false.B
135      req.param := req.param
136      req.hasData := req.hasData || io.req.bits.hasData
137      req.dirty := req.dirty || io.req.bits.dirty
138      req.data := Mux(
139        io.req.bits.hasData,
140        io.req.bits.data,
141        req.data
142      )
143      req.delay_release := false.B
144      remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
145    }.elsewhen (io.release_wakeup.valid && io.release_wakeup.bits === req.miss_id) {
146      state := s_release_req
147      req.delay_release := false.B
148      remain_set := Mux(req.hasData || update && io.release_update.bits.mask.orR, ~0.U(refillCycles.W), 1.U(refillCycles.W))
149    }
150  }
151
152  // --------------------------------------------------------------------------------
153  // while there beats remaining to be sent, we keep sending
154  // which beat to send in this cycle?
155  val beat = PriorityEncoder(remain)
156
157  val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W)))
158  for (i <- 0 until refillCycles) {
159    beat_data(i) := req.data((i + 1) * beatBits - 1, i * beatBits)
160  }
161
162  val probeResponse = edge.ProbeAck(
163    fromSource = io.id,
164    toAddress = req.addr,
165    lgSize = log2Ceil(cfg.blockBytes).U,
166    reportPermissions = req.param
167  )
168
169  val probeResponseData = edge.ProbeAck(
170    fromSource = io.id,
171    toAddress = req.addr,
172    lgSize = log2Ceil(cfg.blockBytes).U,
173    reportPermissions = req.param,
174    data = beat_data(beat)
175  )
176
177  val voluntaryRelease = edge.Release(
178    fromSource = io.id,
179    toAddress = req.addr,
180    lgSize = log2Ceil(cfg.blockBytes).U,
181    shrinkPermissions = req.param
182  )._2
183
184  val voluntaryReleaseData = edge.Release(
185    fromSource = io.id,
186    toAddress = req.addr,
187    lgSize = log2Ceil(cfg.blockBytes).U,
188    shrinkPermissions = req.param,
189    data = beat_data(beat)
190  )._2
191
192  voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty)
193  when(busy) {
194    assert(!req.dirty || req.hasData)
195  }
196
197  io.mem_release.valid := busy
198  io.mem_release.bits  := Mux(req.voluntary,
199    Mux(req.hasData, voluntaryReleaseData, voluntaryRelease),
200    Mux(req.hasData, probeResponseData, probeResponse))
201
202  when (io.mem_release.fire()) { remain_clr := PriorityEncoderOH(remain) }
203
204  val (_, _, release_done, _) = edge.count(io.mem_release)
205
206  when (state === s_release_req && release_done) {
207    state := Mux(req.voluntary, s_release_resp, s_invalid)
208  }
209
210  // --------------------------------------------------------------------------------
211  // receive ReleaseAck for Releases
212  when (state === s_release_resp) {
213    io.mem_grant.ready := true.B
214    when (io.mem_grant.fire()) {
215      state := s_invalid
216    }
217  }
218
219  // When does this entry merge a new req?
220  // 1. When this entry is free
221  // 2. When this entry wants to release while still waiting for release_wakeup signal,
222  //    and a probe req with the same addr comes. In this case we merge probe with release,
223  //    handle this probe, so we don't need another release.
224  io.primary_ready := state === s_invalid
225  io.secondary_ready := state === s_sleep && !io.req.bits.voluntary && io.req.bits.addr === req.addr
226
227  // performance counters
228  XSPerfAccumulate("wb_req", io.req.fire())
229  XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary)
230  XSPerfAccumulate("wb_probe_resp", state === s_release_req && release_done && !req.voluntary)
231  XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready)
232  XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_release_resp)
233}
234
235class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump
236{
237  val io = IO(new Bundle {
238    val req = Flipped(DecoupledIO(new WritebackReq))
239    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
240    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
241
242    val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W)))
243    val release_update = Flipped(ValidIO(new ReleaseUpdate))
244
245    val miss_req  = Flipped(Valid(UInt()))
246    val block_miss_req  = Output(Bool())
247  })
248
249  require(cfg.nReleaseEntries > cfg.nMissEntries)
250
251  // delay writeback req
252  val DelayWritebackReq = true
253  val req_delayed = Wire(Flipped(DecoupledIO(new WritebackReq)))
254  val req_delayed_valid = RegInit(false.B)
255  val req_delayed_bits = Reg(io.req.bits.cloneType)
256  req_delayed.valid := req_delayed_valid
257  req_delayed.bits := req_delayed_bits
258  when(req_delayed.fire()){
259    req_delayed_valid := false.B
260  }
261  // We delayed writeback queue enq for 1 cycle, missQ req does not
262  // depend on wbQ enqueue. As a result, missQ req may be blocked in
263  // req_delayed. When grant comes, that req should also be updated.
264  when(
265    req_delayed_valid &&
266    io.release_wakeup.valid &&
267    io.release_wakeup.bits === req_delayed_bits.miss_id
268  ){
269    // TODO: it is dirty
270    req_delayed_bits.delay_release := false.B // update pipe reg
271    req_delayed.bits.delay_release := false.B // update entry write req in current cycle
272  }
273  when(io.req.fire()){
274    req_delayed_valid := true.B
275    req_delayed_bits := io.req.bits
276  }
277  io.req.ready := !req_delayed_valid || req_delayed.fire()
278  dontTouch(req_delayed)
279
280  // allocate a free entry for incoming request
281  val block_conflict = Wire(Bool())
282  val primary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool()))
283  val secondary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool()))
284  val merge = Cat(secondary_ready_vec).orR
285  val alloc = !merge && Cat(primary_ready_vec).orR && !block_conflict
286  // Now we block release until last release of that block is finished
287  // TODO: Is it possible to merge these release req?
288
289  val req = req_delayed
290  val accept = merge || alloc
291  req.ready := accept
292
293  // assign default values to output signals
294  io.mem_release.valid := false.B
295  io.mem_release.bits  := DontCare
296  io.mem_grant.ready   := false.B
297
298  require(isPow2(cfg.nMissEntries))
299  val grant_source = io.mem_grant.bits.source
300  val entries = Seq.fill(cfg.nReleaseEntries)(Module(new WritebackEntry(edge)))
301  entries.zipWithIndex.foreach {
302    case (entry, i) =>
303      val former_primary_ready = if(i == 0)
304        false.B
305      else
306        Cat((0 until i).map(j => entries(j).io.primary_ready)).orR
307      val entry_id = (i + releaseIdBase).U
308
309      entry.io.id := entry_id
310
311      // entry req
312      entry.io.req.valid := req.valid
313      primary_ready_vec(i)   := entry.io.primary_ready
314      secondary_ready_vec(i) := entry.io.secondary_ready
315      entry.io.req.bits  := req.bits
316
317      entry.io.primary_valid := alloc &&
318        !former_primary_ready &&
319        entry.io.primary_ready
320
321      entry.io.mem_grant.valid := (entry_id === grant_source) && io.mem_grant.valid
322      entry.io.mem_grant.bits  := io.mem_grant.bits
323      when (entry_id === grant_source) {
324        io.mem_grant.ready := entry.io.mem_grant.ready
325      }
326
327      entry.io.release_wakeup := io.release_wakeup
328      entry.io.release_update := io.release_update
329  }
330
331  block_conflict := VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === req.bits.addr)).asUInt.orR
332  val miss_req_conflict = if(DelayWritebackReq)
333    req.bits.addr === io.miss_req.bits && req.valid ||
334    VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR
335  else
336    VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR
337  io.block_miss_req := io.miss_req.valid && miss_req_conflict
338
339  TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*)
340
341  // sanity check
342  // print all input/output requests for debug purpose
343  // print req
344  when (req.fire()) {
345    req.bits.dump()
346  }
347
348  when (io.mem_release.fire()) {
349    io.mem_release.bits.dump
350  }
351
352  when (io.mem_grant.fire()) {
353    io.mem_grant.bits.dump
354  }
355
356  when (io.miss_req.valid) {
357    XSDebug("miss_req: addr: %x\n", io.miss_req.bits)
358  }
359
360  when (io.block_miss_req) {
361    XSDebug("block_miss_req\n")
362  }
363
364  // performance counters
365  XSPerfAccumulate("wb_req", req.fire())
366
367  val perfinfo = IO(new Bundle(){
368    val perfEvents = Output(new PerfEventsBundle(5))
369  })
370  val perfEvents = Seq(
371    ("dcache_wbq_req          ", req.fire()                                                                                                                                                              ),
372    ("dcache_wbq_1/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) < (cfg.nReleaseEntries.U/4.U))                                                                                          ),
373    ("dcache_wbq_2/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/4.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U/2.U))     ),
374    ("dcache_wbq_3/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/2.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U*3.U/4.U)) ),
375    ("dcache_wbq_4/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U*3.U/4.U))                                                                                      ),
376  )
377
378  for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
379    perf_out.incr_step := RegNext(perf)
380  }
381}
382