xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/WritebackQueue.scala (revision a273862e37f1d43bee748f2a6353320a2f52f6f4)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils.{HasTLDump, XSDebug, XSPerfAccumulate, PerfEventsBundle}
23import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut, TLPermissions}
24import huancun.{DirtyField, DirtyKey}
25
26class WritebackReq(implicit p: Parameters) extends DCacheBundle {
27  val addr = UInt(PAddrBits.W)
28  val param  = UInt(TLPermissions.cWidth.W)
29  val voluntary = Bool()
30  val hasData = Bool()
31  val dirty = Bool()
32  val data = UInt((cfg.blockBytes * 8).W)
33
34  val delay_release = Bool()
35  val miss_id = UInt(log2Up(cfg.nMissEntries).W)
36
37  def dump() = {
38    XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n",
39      addr, param, voluntary, hasData, data)
40  }
41}
42
43// While a Release sleeps and waits for a refill to wake it up,
44// main pipe might update meta & data during this time.
45// So the meta & data to be released need to be updated too.
46class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle {
47  // only consider store here
48  val addr = UInt(PAddrBits.W)
49  val mask = UInt(DCacheBanks.W)
50  val data = UInt((cfg.blockBytes * 8).W)
51}
52
53class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump
54{
55  val io = IO(new Bundle {
56    val id = Input(UInt())
57
58    val req = Flipped(DecoupledIO(new WritebackReq))
59    val merge = Output(Bool())
60    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
61    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
62
63    val block_addr  = Output(Valid(UInt()))
64
65    val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W)))
66    val release_update = Flipped(ValidIO(new ReleaseUpdate))
67  })
68
69  val s_invalid :: s_sleep :: s_release_req :: s_release_resp :: Nil = Enum(4)
70  val state = RegInit(s_invalid)
71
72  // internal regs
73  // remaining beats
74  val remain = RegInit(0.U(refillCycles.W))
75  val remain_set = WireInit(0.U(refillCycles.W))
76  val remain_clr = WireInit(0.U(refillCycles.W))
77  remain := (remain | remain_set) & ~remain_clr
78
79  val busy = remain.orR
80
81  val req  = Reg(new WritebackReq)
82
83  // assign default signals to output signals
84  io.req.ready := false.B
85  io.mem_release.valid := false.B
86  io.mem_release.bits  := DontCare
87  io.mem_grant.ready   := false.B
88  io.block_addr.valid  := state =/= s_invalid
89  io.block_addr.bits   := req.addr
90
91
92  when (state =/= s_invalid) {
93    XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits)
94  }
95
96  def mergeData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = {
97    val full_wmask = FillInterleaved(64, wmask)
98    (~full_wmask & old_data | full_wmask & new_data)
99  }
100
101  // --------------------------------------------------------------------------------
102  // s_invalid: receive requests
103  // new req entering
104  when (io.req.fire()) {
105    assert (remain === 0.U)
106    req := io.req.bits
107    when (io.req.bits.delay_release) {
108      state := s_sleep
109    }.otherwise {
110      state := s_release_req
111      remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
112    }
113  }
114
115  // --------------------------------------------------------------------------------
116  // s_sleep: wait for refill pipe to inform me that I can keep releasing
117  val merge_probe = WireInit(false.B)
118  io.merge := WireInit(false.B)
119  when (state === s_sleep) {
120    assert (remain === 0.U)
121
122    val update = io.release_update.valid && io.release_update.bits.addr === req.addr
123    when (update) {
124      req.hasData := req.hasData || io.release_update.bits.mask.orR
125      req.dirty := req.dirty || io.release_update.bits.mask.orR
126      req.data := mergeData(req.data, io.release_update.bits.data, io.release_update.bits.mask)
127    }
128
129    io.merge := !io.req.bits.voluntary && io.req.bits.addr === req.addr
130    merge_probe := io.req.valid && io.merge
131    when (merge_probe) {
132      state := s_release_req
133      req.voluntary := false.B
134      req.hasData := req.hasData || io.req.bits.hasData
135      req.dirty := req.dirty || io.req.bits.dirty
136      req.data := Mux(
137        io.req.bits.hasData,
138        io.req.bits.data,
139        req.data
140      )
141      req.delay_release := false.B
142      remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
143    }.elsewhen (io.release_wakeup.valid && io.release_wakeup.bits === req.miss_id) {
144      state := s_release_req
145      req.delay_release := false.B
146      remain_set := Mux(req.hasData || update && io.release_update.bits.mask.orR, ~0.U(refillCycles.W), 1.U(refillCycles.W))
147    }
148  }
149
150  // --------------------------------------------------------------------------------
151  // while there beats remaining to be sent, we keep sending
152  // which beat to send in this cycle?
153  val beat = PriorityEncoder(remain)
154
155  val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W)))
156  for (i <- 0 until refillCycles) {
157    beat_data(i) := req.data((i + 1) * beatBits - 1, i * beatBits)
158  }
159
160  val probeResponse = edge.ProbeAck(
161    fromSource = io.id,
162    toAddress = req.addr,
163    lgSize = log2Ceil(cfg.blockBytes).U,
164    reportPermissions = req.param
165  )
166
167  val probeResponseData = edge.ProbeAck(
168    fromSource = io.id,
169    toAddress = req.addr,
170    lgSize = log2Ceil(cfg.blockBytes).U,
171    reportPermissions = req.param,
172    data = beat_data(beat)
173  )
174
175  val voluntaryRelease = edge.Release(
176    fromSource = io.id,
177    toAddress = req.addr,
178    lgSize = log2Ceil(cfg.blockBytes).U,
179    shrinkPermissions = req.param
180  )._2
181
182  val voluntaryReleaseData = edge.Release(
183    fromSource = io.id,
184    toAddress = req.addr,
185    lgSize = log2Ceil(cfg.blockBytes).U,
186    shrinkPermissions = req.param,
187    data = beat_data(beat)
188  )._2
189
190  voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty)
191  when(busy) {
192    assert(!req.dirty || req.hasData)
193  }
194
195  io.mem_release.valid := busy
196  io.mem_release.bits  := Mux(req.voluntary,
197    Mux(req.hasData, voluntaryReleaseData, voluntaryRelease),
198    Mux(req.hasData, probeResponseData, probeResponse))
199
200  when (io.mem_release.fire()) { remain_clr := PriorityEncoderOH(remain) }
201
202  val (_, _, release_done, _) = edge.count(io.mem_release)
203
204  when (state === s_release_req && release_done) {
205    state := Mux(req.voluntary, s_release_resp, s_invalid)
206  }
207
208  // --------------------------------------------------------------------------------
209  // receive ReleaseAck for Releases
210  when (state === s_release_resp) {
211    io.mem_grant.ready := true.B
212    when (io.mem_grant.fire()) {
213      state := s_invalid
214    }
215  }
216
217  // When does this entry merge a new req?
218  // 1. When this entry is free
219  // 2. When this entry wants to release while still waiting for release_wakeup signal,
220  //    and a probe req with the same addr comes. In this case we merge probe with release,
221  //    handle this probe, so we don't need another release.
222  io.req.ready := state === s_invalid ||
223    state === s_sleep && !io.req.bits.voluntary && io.req.bits.addr === req.addr
224
225  // performance counters
226  XSPerfAccumulate("wb_req", io.req.fire())
227  XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary)
228  XSPerfAccumulate("wb_probe_resp", state === s_release_req && release_done && !req.voluntary)
229  XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready)
230  XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_release_resp)
231}
232
233class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump
234{
235  val io = IO(new Bundle {
236    val req = Flipped(DecoupledIO(new WritebackReq))
237    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
238    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
239
240    val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W)))
241    val release_update = Flipped(ValidIO(new ReleaseUpdate))
242
243    val miss_req  = Flipped(Valid(UInt()))
244    val block_miss_req  = Output(Bool())
245  })
246
247  // allocate a free entry for incoming request
248  val primary_ready  = Wire(Vec(cfg.nReleaseEntries, Bool()))
249  val merge_vec = Wire(Vec(cfg.nReleaseEntries, Bool()))
250  val allocate = primary_ready.asUInt.orR
251  val merge = merge_vec.asUInt.orR
252  val alloc_idx = PriorityEncoder(Mux(merge, merge_vec, primary_ready))
253
254  val req = io.req
255  val block_conflict = Wire(Bool())
256  val accept = merge || allocate && !block_conflict
257  req.ready := accept
258
259  // assign default values to output signals
260  io.mem_release.valid := false.B
261  io.mem_release.bits  := DontCare
262  io.mem_grant.ready   := false.B
263
264  require(isPow2(cfg.nMissEntries))
265  val grant_source = io.mem_grant.bits.source(log2Up(cfg.nReleaseEntries) - 1, 0)
266  val entries = (0 until cfg.nReleaseEntries) map { i =>
267    val entry = Module(new WritebackEntry(edge))
268
269    entry.io.id := (i + releaseIdBase).U
270
271    // entry req
272    entry.io.req.valid := (i.U === alloc_idx) && req.valid && accept
273    primary_ready(i)   := entry.io.req.ready
274    merge_vec(i) := entry.io.merge
275    entry.io.req.bits  := req.bits
276
277    entry.io.mem_grant.valid := (i.U === grant_source) && io.mem_grant.valid
278    entry.io.mem_grant.bits  := io.mem_grant.bits
279    when (i.U === grant_source) {
280      io.mem_grant.ready := entry.io.mem_grant.ready
281    }
282
283    entry.io.release_wakeup := io.release_wakeup
284    entry.io.release_update := io.release_update
285
286    entry
287  }
288
289  block_conflict := VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.req.bits.addr)).asUInt.orR
290  val miss_req_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR
291  io.block_miss_req := io.miss_req.valid && miss_req_conflict
292
293  TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*)
294
295  // sanity check
296  // print all input/output requests for debug purpose
297  // print req
298  when (io.req.fire()) {
299    io.req.bits.dump()
300  }
301
302  when (io.mem_release.fire()) {
303    io.mem_release.bits.dump
304  }
305
306  when (io.mem_grant.fire()) {
307    io.mem_grant.bits.dump
308  }
309
310  when (io.miss_req.valid) {
311    XSDebug("miss_req: addr: %x\n", io.miss_req.bits)
312  }
313
314  when (io.block_miss_req) {
315    XSDebug("block_miss_req\n")
316  }
317
318  // performance counters
319  XSPerfAccumulate("wb_req", io.req.fire())
320
321  val perfinfo = IO(new Bundle(){
322    val perfEvents = Output(new PerfEventsBundle(5))
323  })
324  val perfEvents = Seq(
325    ("dcache_wbq_req          ", io.req.fire()                                                                                                                                                              ),
326    ("dcache_wbq_1/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) < (cfg.nReleaseEntries.U/4.U))                                                                                          ),
327    ("dcache_wbq_2/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/4.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U/2.U))     ),
328    ("dcache_wbq_3/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/2.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U*3.U/4.U)) ),
329    ("dcache_wbq_4/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U*3.U/4.U))                                                                                      ),
330  )
331
332  for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
333    perf_out.incr_step := RegNext(perf)
334  }
335}
336