xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/WritebackQueue.scala (revision 042e89e414f8956a139ecd64336469a6a7b5ff6f)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chisel3._
20import chisel3.util._
21import freechips.rocketchip.tilelink.TLPermissions._
22import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut}
23import org.chipsalliance.cde.config.Parameters
24import utils.HasTLDump
25import utility.{XSDebug, XSPerfAccumulate, HasPerfEvents}
26
27
28class WritebackReqCtrl(implicit p: Parameters) extends DCacheBundle {
29  val param  = UInt(cWidth.W)
30  val voluntary = Bool()
31  val hasData = Bool()
32  val corrupt = Bool()
33  val dirty = Bool()
34
35  val delay_release = Bool()
36  val miss_id = UInt(log2Up(cfg.nMissEntries).W)
37}
38
39class WritebackReqWodata(implicit p: Parameters) extends WritebackReqCtrl {
40  val addr = UInt(PAddrBits.W)
41
42  def dump() = {
43    XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b\n",
44      addr, param, voluntary, hasData)
45  }
46}
47
48class WritebackReqData(implicit p: Parameters) extends DCacheBundle {
49  val data = UInt((cfg.blockBytes * 8).W)
50}
51
52class WritebackReq(implicit p: Parameters) extends WritebackReqWodata {
53  val data = UInt((cfg.blockBytes * 8).W)
54
55  override def dump() = {
56    XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n",
57      addr, param, voluntary, hasData, data)
58  }
59
60  def toWritebackReqWodata(): WritebackReqWodata = {
61    val out = Wire(new WritebackReqWodata)
62    out.addr := addr
63    out.param := param
64    out.voluntary := voluntary
65    out.hasData := hasData
66    out.corrupt := corrupt
67    out.dirty := dirty
68    out.delay_release := delay_release
69    out.miss_id := miss_id
70    out
71  }
72
73  def toWritebackReqCtrl(): WritebackReqCtrl = {
74    val out = Wire(new WritebackReqCtrl)
75    out.param := param
76    out.voluntary := voluntary
77    out.hasData := hasData
78    out.corrupt := corrupt
79    out.dirty := dirty
80    out.delay_release := delay_release
81    out.miss_id := miss_id
82    out
83  }
84
85  def toWritebackReqData(): WritebackReqData = {
86    val out = Wire(new WritebackReqData)
87    out.data := data
88    out
89  }
90}
91
92// While a Release sleeps and waits for a refill to wake it up,
93// main pipe might update meta & data during this time.
94// So the meta & data to be released need to be updated too.
95class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle {
96  // only consider store here
97  val addr = UInt(PAddrBits.W)
98  val mask = UInt(DCacheBanks.W)
99  val data = UInt((cfg.blockBytes * 8).W)
100}
101
102// To reduce fanout, writeback queue entry data is updated 1 cycle
103// after ReleaseUpdate.fire
104class WBQEntryReleaseUpdate(implicit p: Parameters) extends DCacheBundle {
105  // only consider store here
106  val addr = UInt(PAddrBits.W)
107  val mask_delayed = UInt(DCacheBanks.W)
108  val data_delayed = UInt((cfg.blockBytes * 8).W)
109  val mask_orr = Bool()
110}
111
112// When a probe TtoB req enter dcache main pipe, check if that cacheline
113// is waiting for release. If it is so, change TtoB to TtoN, set dcache
114// coh to N.
115class ProbeToBCheckReq(implicit p: Parameters) extends DCacheBundle {
116  val addr = UInt(PAddrBits.W) // paddr from mainpipe s1
117}
118
119class ProbeToBCheckResp(implicit p: Parameters) extends DCacheBundle {
120  val toN = Bool() // need to set dcache coh to N
121}
122
123class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump
124{
125  val io = IO(new Bundle {
126    val id = Input(UInt())
127
128    val req = Flipped(DecoupledIO(new WritebackReqWodata))
129    val req_data = Input(new WritebackReqData)
130
131    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
132    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
133    val primary_valid = Input(Bool())
134    val primary_ready = Output(Bool())
135    val primary_ready_dup = Vec(nDupWbReady, Output(Bool()))
136
137    val block_addr  = Output(Valid(UInt()))
138  })
139
140  val s_invalid :: s_release_req :: s_release_resp ::Nil = Enum(3)
141  // ProbeAck:               s_invalid ->            s_release_req
142  // ProbeAck merge Release: s_invalid ->            s_release_req
143  // Release:                s_invalid -> s_sleep -> s_release_req -> s_release_resp
144  // Release merge ProbeAck: s_invalid -> s_sleep -> s_release_req
145  //                        (change Release into ProbeAck when Release is not fired)
146  //                     or: s_invalid -> s_sleep -> s_release_req -> s_release_resp -> s_release_req
147  //                        (send a ProbeAck after Release transaction is over)
148
149  val state = RegInit(s_invalid)
150  val state_dup_0 = RegInit(s_invalid)
151  val state_dup_1 = RegInit(s_invalid)
152  val state_dup_for_mp = RegInit(VecInit(Seq.fill(nDupWbReady)(s_invalid))) //TODO: clock gate
153
154  val remain = RegInit(0.U(refillCycles.W))
155  val remain_dup_0 = RegInit(0.U(refillCycles.W))
156  val remain_dup_1 = RegInit(0.U(refillCycles.W))
157  val remain_set = WireInit(0.U(refillCycles.W))
158  val remain_clr = WireInit(0.U(refillCycles.W))
159  remain := (remain | remain_set) & ~remain_clr
160  remain_dup_0 := (remain_dup_0 | remain_set) & ~remain_clr
161  remain_dup_1 := (remain_dup_1 | remain_set) & ~remain_clr
162
163  // writeback queue data
164  val data = Reg(UInt((cfg.blockBytes * 8).W))
165
166  // writeback queue paddr
167  val paddr_dup_0 = Reg(UInt(PAddrBits.W))
168  val paddr_dup_1 = Reg(UInt(PAddrBits.W))
169  val paddr_dup_2 = Reg(UInt(PAddrBits.W))
170
171  // pending data write
172  // !s_data_override means there is an in-progress data write
173  val s_data_override = RegInit(true.B)
174  // !s_data_merge means there is an in-progress data merge
175  //val s_data_merge = RegInit(true.B)
176
177  // there are valid request that can be sent to release bus
178  //val busy = remain.orR && s_data_override && s_data_merge // have remain beats and data write finished
179  val busy = remain.orR && s_data_override  // have remain beats and data write finished
180  val req = Reg(new WritebackReqWodata)
181
182  // assign default signals to output signals
183  io.req.ready := false.B
184  io.mem_release.valid := false.B
185  io.mem_release.bits  := DontCare
186  io.mem_grant.ready   := false.B
187  io.block_addr.valid  := state =/= s_invalid
188  io.block_addr.bits   := req.addr
189
190  s_data_override := true.B // data_override takes only 1 cycle
191  //s_data_merge := true.B // data_merge takes only 1 cycle
192
193  when (state =/= s_invalid) {
194    XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits)
195  }
196
197
198  // --------------------------------------------------------------------------------
199  // s_invalid: receive requests
200  // new req entering
201  io.req.ready := state === s_invalid
202  val alloc = io.req.valid && io.primary_valid && io.primary_ready
203  when (alloc) {
204    assert (remain === 0.U)
205    req := io.req.bits
206    s_data_override := false.B
207    // only update paddr when allocate a new missqueue entry
208    paddr_dup_0 := io.req.bits.addr
209    paddr_dup_1 := io.req.bits.addr
210    paddr_dup_2 := io.req.bits.addr
211
212    remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
213    state      := s_release_req
214    state_dup_0 := s_release_req
215    state_dup_1 := s_release_req
216    state_dup_for_mp.foreach(_ := s_release_req)
217  }
218
219  // --------------------------------------------------------------------------------
220  // while there beats remaining to be sent, we keep sending
221  // which beat to send in this cycle?
222  val beat = PriorityEncoder(remain_dup_0)
223
224  val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W)))
225  for (i <- 0 until refillCycles) {
226    beat_data(i) := data((i + 1) * beatBits - 1, i * beatBits)
227  }
228
229  val probeResponse = edge.ProbeAck(
230    fromSource = io.id,
231    toAddress = paddr_dup_1,
232    lgSize = log2Ceil(cfg.blockBytes).U,
233    reportPermissions = req.param
234  )
235  probeResponse.corrupt := req.corrupt
236
237  val probeResponseData = edge.ProbeAck(
238    fromSource = io.id,
239    toAddress = paddr_dup_1,
240    lgSize = log2Ceil(cfg.blockBytes).U,
241    reportPermissions = req.param,
242    data = beat_data(beat),
243    corrupt = req.corrupt
244  )
245
246  val voluntaryRelease = edge.Release(
247    fromSource = io.id,
248    toAddress = paddr_dup_2,
249    lgSize = log2Ceil(cfg.blockBytes).U,
250    shrinkPermissions = req.param
251  )._2
252  voluntaryRelease.corrupt := req.corrupt
253
254  val voluntaryReleaseData = edge.Release(
255    fromSource = io.id,
256    toAddress = paddr_dup_2,
257    lgSize = log2Ceil(cfg.blockBytes).U,
258    shrinkPermissions = req.param,
259    data = beat_data(beat),
260    corrupt = req.corrupt
261  )._2
262
263  // voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty)
264  when(busy) {
265    assert(!req.dirty || req.hasData)
266  }
267
268  io.mem_release.valid := busy
269  io.mem_release.bits  := Mux(req.voluntary,
270    Mux(req.hasData, voluntaryReleaseData, voluntaryRelease),
271    Mux(req.hasData, probeResponseData, probeResponse))
272
273
274  when (io.mem_release.fire) {remain_clr := PriorityEncoderOH(remain_dup_1)}
275
276  val (_, _, release_done, _) = edge.count(io.mem_release)
277
278  when(state === s_release_req && release_done){
279    state := Mux(req.voluntary, s_release_resp, s_invalid)
280    when(req.voluntary){
281      state_dup_for_mp.foreach(_ := s_release_resp)
282    } .otherwise{
283      state_dup_for_mp.foreach(_ := s_invalid)
284    }
285  }
286
287  io.primary_ready := state === s_invalid
288  io.primary_ready_dup.zip(state_dup_for_mp).foreach { case (rdy, st) => rdy := st === s_invalid }
289  // --------------------------------------------------------------------------------
290  // receive ReleaseAck for Releases
291  when (state === s_release_resp) {
292    io.mem_grant.ready := true.B
293    when (io.mem_grant.fire) {
294      state := s_invalid
295      state_dup_for_mp.foreach(_ := s_invalid)
296    }
297  }
298
299  // data update logic
300  when(!s_data_override && (req.hasData || RegNext(alloc))) {
301    data := io.req_data.data
302  }
303
304  // assert(!RegNext(!s_data_merge && !s_data_override))
305
306  // performance counters
307  XSPerfAccumulate("wb_req", io.req.fire)
308  XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary)
309  XSPerfAccumulate("wb_probe_resp", state === s_release_req && release_done && !req.voluntary)
310  XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready)
311  XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_release_resp)
312}
313
314class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents
315{
316  val io = IO(new Bundle {
317    val req = Flipped(DecoupledIO(new WritebackReq))
318    val req_ready_dup = Vec(nDupWbReady, Output(Bool()))
319    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
320    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
321
322    //val probe_ttob_check_req = Flipped(ValidIO(new ProbeToBCheckReq))
323    //val probe_ttob_check_resp = ValidIO(new ProbeToBCheckResp)
324
325    // 5 miss_req to check: 3*LoadPipe + 1*MainPipe + 1*missReqArb_out
326    val miss_req_conflict_check = Vec(LoadPipelineWidth + 2, Flipped(Valid(UInt())))
327    val block_miss_req = Vec(LoadPipelineWidth + 2, Output(Bool()))
328  })
329
330  require(cfg.nReleaseEntries > cfg.nMissEntries)
331
332  val primary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool()))
333  val alloc = Cat(primary_ready_vec).orR
334
335  val req = io.req
336  val block_conflict = Wire(Bool())
337
338  req.ready := alloc && !block_conflict
339
340  // assign default values to output signals
341  io.mem_release.valid := false.B
342  io.mem_release.bits  := DontCare
343  io.mem_grant.ready   := false.B
344
345  // delay data write in writeback req for 1 cycle
346  val req_data = RegEnable(io.req.bits.toWritebackReqData(), io.req.valid)
347
348  require(isPow2(cfg.nMissEntries))
349  val grant_source = io.mem_grant.bits.source
350  val entries = Seq.fill(cfg.nReleaseEntries)(Module(new WritebackEntry(edge)))
351  entries.zipWithIndex.foreach {
352    case (entry, i) =>
353      val former_primary_ready = if(i == 0)
354        false.B
355      else
356        Cat((0 until i).map(j => entries(j).io.primary_ready)).orR
357      val entry_id = (i + releaseIdBase).U
358
359      entry.io.id := entry_id
360
361      // entry req
362      entry.io.req.valid := req.valid && !block_conflict
363      primary_ready_vec(i)   := entry.io.primary_ready
364      entry.io.req.bits  := req.bits
365      entry.io.req_data  := req_data
366
367      entry.io.primary_valid := alloc &&
368        !former_primary_ready &&
369        entry.io.primary_ready
370
371      entry.io.mem_grant.valid := (entry_id === grant_source) && io.mem_grant.valid
372      entry.io.mem_grant.bits  := io.mem_grant.bits
373      //when (i.U === io.mem_grant.bits.source) {
374      //  io.mem_grant.ready := entry.io.mem_grant.ready
375      //}
376  }
377
378  io.req_ready_dup.zipWithIndex.foreach { case (rdy, i) =>
379    rdy := Cat(entries.map(_.io.primary_ready_dup(i))).orR && !block_conflict
380  }
381
382  io.mem_grant.ready := true.B
383  block_conflict := VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.req.bits.addr)).asUInt.orR
384  val miss_req_conflict = io.miss_req_conflict_check.map{ r =>
385    VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === r.bits)).asUInt.orR
386  }
387  io.block_miss_req.zipWithIndex.foreach{ case(blk, i) =>
388    blk := io.miss_req_conflict_check(i).valid && miss_req_conflict(i)
389  }
390
391  TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*)
392
393  // sanity check
394  // print all input/output requests for debug purpose
395  // print req
396  when(io.req.fire) {
397    io.req.bits.dump()
398  }
399
400  when(io.mem_release.fire){
401    io.mem_grant.bits.dump
402  }
403
404  // when (io.miss_req.valid) {
405  //   XSDebug("miss_req: addr: %x\n", io.miss_req.bits)
406  // }
407
408  // when (io.block_miss_req) {
409  //   XSDebug("block_miss_req\n")
410  // }
411
412  // performance counters
413  XSPerfAccumulate("wb_req", io.req.fire)
414
415  val perfValidCount = RegNext(PopCount(entries.map(e => e.io.block_addr.valid)))
416  val perfEvents = Seq(
417    ("dcache_wbq_req      ", io.req.fire),
418    ("dcache_wbq_1_4_valid", (perfValidCount < (cfg.nReleaseEntries.U/4.U))),
419    ("dcache_wbq_2_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/4.U)) & (perfValidCount <= (cfg.nReleaseEntries.U/2.U))),
420    ("dcache_wbq_3_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/2.U)) & (perfValidCount <= (cfg.nReleaseEntries.U*3.U/4.U))),
421    ("dcache_wbq_4_4_valid", (perfValidCount > (cfg.nReleaseEntries.U*3.U/4.U))),
422  )
423  generatePerfEvent()
424
425}