xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/Uncache.scala (revision 0a84afd5dd089307b667fd1d24e3b3b50a5ad80b)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chisel3._
20import chisel3.util._
21import org.chipsalliance.cde.config.Parameters
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.mem._
26import coupledL2.MemBackTypeMM
27import coupledL2.MemPageTypeNC
28import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes}
29import freechips.rocketchip.tilelink.{TLArbiter, TLBundleA, TLBundleD, TLClientNode, TLEdgeOut, TLMasterParameters, TLMasterPortParameters}
30import coupledL2.{MemBackTypeMMField, MemPageTypeNCField}
31
32class UncacheFlushBundle extends Bundle {
33  val valid = Output(Bool())
34  val empty = Input(Bool())
35}
36
37class UncacheEntry(implicit p: Parameters) extends DCacheBundle {
38  val cmd = UInt(M_SZ.W)
39  val addr = UInt(PAddrBits.W)
40  val vaddr = UInt(VAddrBits.W)
41  val data = UInt(XLEN.W)
42  val mask = UInt(DataBytes.W)
43  val id = UInt(uncacheIdxBits.W)
44  val nc = Bool()
45  val atomic = Bool()
46  val memBackTypeMM = Bool()
47
48  val resp_nderr = Bool()
49
50  /* NOTE: if it support the internal forward logic, here can uncomment */
51  // val fwd_data = UInt(XLEN.W)
52  // val fwd_mask = UInt(DataBytes.W)
53
54  def set(x: UncacheWordReq): Unit = {
55    cmd := x.cmd
56    addr := x.addr
57    vaddr := x.vaddr
58    data := x.data
59    mask := x.mask
60    id := x.id
61    nc := x.nc
62    memBackTypeMM := x.memBackTypeMM
63    atomic := x.atomic
64    resp_nderr := false.B
65    // fwd_data := 0.U
66    // fwd_mask := 0.U
67  }
68
69  def update(x: TLBundleD): Unit = {
70    when(cmd === MemoryOpConstants.M_XRD) {
71      data := x.data
72    }
73    resp_nderr := x.denied
74  }
75
76  // def update(forwardData: UInt, forwardMask: UInt): Unit = {
77  //   fwd_data := forwardData
78  //   fwd_mask := forwardMask
79  // }
80
81  def toUncacheWordResp(): UncacheWordResp = {
82    // val resp_fwd_data = VecInit((0 until DataBytes).map(j =>
83    //   Mux(fwd_mask(j), fwd_data(8*(j+1)-1, 8*j), data(8*(j+1)-1, 8*j))
84    // )).asUInt
85    val resp_fwd_data = data
86    val r = Wire(new UncacheWordResp)
87    r := DontCare
88    r.data := resp_fwd_data
89    r.id := id
90    r.nderr := resp_nderr
91    r.nc := nc
92    r.is2lq := cmd === MemoryOpConstants.M_XRD
93    r.miss := false.B
94    r.replay := false.B
95    r.tag_error := false.B
96    r.error := false.B
97    r
98  }
99}
100
101class UncacheEntryState(implicit p: Parameters) extends DCacheBundle {
102  // valid (-> waitSame) -> inflight -> waitReturn
103  val valid = Bool()
104  val inflight = Bool() // uncache -> L2
105  val waitSame = Bool()
106  val waitReturn = Bool() // uncache -> LSQ
107
108  def init: Unit = {
109    valid := false.B
110    inflight := false.B
111    waitSame := false.B
112    waitReturn := false.B
113  }
114
115  def isValid(): Bool = valid
116  def isInflight(): Bool = inflight
117  def isWaitReturn(): Bool = waitReturn
118  def isWaitSame(): Bool = waitSame
119  def can2Uncache(): Bool = valid && !inflight && !waitSame && !waitReturn
120  def can2Lsq(): Bool = valid && waitReturn
121
122  def setValid(x: Bool): Unit = { valid := x}
123  def setInflight(x: Bool): Unit = { inflight := x}
124  def setWaitReturn(x: Bool): Unit = { waitReturn := x }
125  def setWaitSame(x: Bool): Unit = { waitSame := x}
126
127  def updateUncacheResp(): Unit = {
128    assert(inflight, "The request was not sent and a response was received")
129    inflight := false.B
130    waitReturn := true.B
131  }
132  def updateReturn(): Unit = {
133    valid := false.B
134    inflight := false.B
135    waitSame := false.B
136    waitReturn := false.B
137  }
138}
139
140class UncacheIO(implicit p: Parameters) extends DCacheBundle {
141  val hartId = Input(UInt())
142  val enableOutstanding = Input(Bool())
143  val flush = Flipped(new UncacheFlushBundle)
144  val lsq = Flipped(new UncacheWordIO)
145  val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
146}
147
148// convert DCacheIO to TileLink
149// for Now, we only deal with TL-UL
150
151class Uncache()(implicit p: Parameters) extends LazyModule with HasXSParameter {
152  override def shouldBeInlined: Boolean = false
153  def idRange: Int = UncacheBufferSize
154
155  val clientParameters = TLMasterPortParameters.v1(
156    clients = Seq(TLMasterParameters.v1(
157      "uncache",
158      sourceId = IdRange(0, idRange)
159    )),
160    requestFields = Seq(MemBackTypeMMField(), MemPageTypeNCField())
161  )
162  val clientNode = TLClientNode(Seq(clientParameters))
163
164  lazy val module = new UncacheImp(this)
165}
166
167/* Uncache Buffer */
168class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
169  with HasTLDump
170  with HasXSParameter
171  with HasPerfEvents
172{
173  private val INDEX_WIDTH = log2Up(UncacheBufferSize)
174  println(s"Uncahe Buffer Size: $UncacheBufferSize entries")
175  val io = IO(new UncacheIO)
176
177  val (bus, edge) = outer.clientNode.out.head
178
179  val req  = io.lsq.req
180  val resp = io.lsq.resp
181  val mem_acquire = bus.a
182  val mem_grant   = bus.d
183  val req_ready = WireInit(false.B)
184
185  // assign default values to output signals
186  bus.b.ready := false.B
187  bus.c.valid := false.B
188  bus.c.bits  := DontCare
189  bus.d.ready := false.B
190  bus.e.valid := false.B
191  bus.e.bits  := DontCare
192  io.lsq.req.ready := req_ready
193  io.lsq.resp.valid := false.B
194  io.lsq.resp.bits := DontCare
195
196
197  /******************************************************************
198   * Data Structure
199   ******************************************************************/
200
201  val entries = Reg(Vec(UncacheBufferSize, new UncacheEntry))
202  val states = RegInit(VecInit(Seq.fill(UncacheBufferSize)(0.U.asTypeOf(new UncacheEntryState))))
203  val fence = RegInit(Bool(), false.B)
204  val s_idle :: s_refill_req :: s_refill_resp :: s_send_resp :: Nil = Enum(4)
205  val uState = RegInit(s_idle)
206
207  def sizeMap[T <: Data](f: Int => T) = VecInit((0 until UncacheBufferSize).map(f))
208  def isStore(e: UncacheEntry): Bool = e.cmd === MemoryOpConstants.M_XWR
209  def isStore(x: UInt): Bool = x === MemoryOpConstants.M_XWR
210  def addrMatch(x: UncacheEntry, y: UncacheWordReq): Bool = x.addr(PAddrBits - 1, 3) === y.addr(PAddrBits - 1, 3)
211  def addrMatch(x: UncacheWordReq, y: UncacheEntry): Bool = x.addr(PAddrBits - 1, 3) === y.addr(PAddrBits - 1, 3)
212  def addrMatch(x: UncacheEntry, y: UncacheEntry): Bool = x.addr(PAddrBits - 1, 3) === y.addr(PAddrBits - 1, 3)
213  def addrMatch(x: UInt, y: UInt): Bool = x(PAddrBits - 1, 3) === y(PAddrBits - 1, 3)
214
215  // drain buffer
216  val empty = Wire(Bool())
217  val f1_needDrain = Wire(Bool())
218  val do_uarch_drain = RegNext(f1_needDrain)
219
220  val q0_entry = Wire(new UncacheEntry)
221  val q0_canSentIdx = Wire(UInt(INDEX_WIDTH.W))
222  val q0_canSent = Wire(Bool())
223
224
225  /******************************************************************
226   * uState for non-outstanding
227   ******************************************************************/
228
229  switch(uState){
230    is(s_idle){
231      when(req.fire){
232        uState := s_refill_req
233      }
234    }
235    is(s_refill_req){
236      when(mem_acquire.fire){
237        uState := s_refill_resp
238      }
239    }
240    is(s_refill_resp){
241      when(mem_grant.fire){
242        uState := s_send_resp
243      }
244    }
245    is(s_send_resp){
246      when(resp.fire){
247        uState := s_idle
248      }
249    }
250  }
251
252
253  /******************************************************************
254   * Enter Buffer
255   *  Version 0 (better timing)
256   *    e0 judge: alloc/merge write vec
257   *    e1 alloc
258   *
259   *  Version 1 (better performance)
260   *    solved in one cycle for achieving the original performance.
261   ******************************************************************/
262
263  /**
264    TODO lyq: how to merge
265    1. same addr
266    2. same cmd
267    3. valid
268    FIXME lyq: not merge now due to the following issues
269    1. load cann't be merged
270    2. how to merge store and response precisely
271  */
272
273  val e0_fire = req.fire
274  val e0_req_valid = req.valid
275  val e0_req = req.bits
276  /**
277    TODO lyq: block or wait or forward?
278    NOW: strict block by same address; otherwise: exhaustive consideration is needed.
279      - ld->ld wait
280      - ld->st forward
281      - st->ld forward
282      - st->st block
283  */
284  val e0_existSame = sizeMap(j => e0_req_valid && states(j).isValid() && addrMatch(e0_req, entries(j))).asUInt.orR
285  val e0_invalidVec = sizeMap(i => !states(i).isValid())
286  val (e0_allocIdx, e0_canAlloc) = PriorityEncoderWithFlag(e0_invalidVec)
287  val e0_alloc = e0_canAlloc && !e0_existSame && e0_fire
288  req_ready := e0_invalidVec.asUInt.orR && !e0_existSame && !do_uarch_drain
289
290  when (e0_alloc) {
291    entries(e0_allocIdx).set(e0_req)
292    states(e0_allocIdx).setValid(true.B)
293
294    // judge whether wait same block: e0 & q0
295    val waitSameVec = sizeMap(j =>
296      e0_req_valid && states(j).isValid() && states(j).isInflight() && addrMatch(e0_req, entries(j))
297    )
298    val waitQ0 = q0_canSent && addrMatch(e0_req, q0_entry)
299    when (waitSameVec.reduce(_ || _) || waitQ0) {
300      states(e0_allocIdx).setWaitSame(true.B)
301    }
302
303  }
304
305
306  /******************************************************************
307   * Uncache Req
308   *  Version 0 (better timing)
309   *    q0: choose which one is sent
310   *    q0: sent
311   *
312   *  Version 1 (better performance)
313   *    solved in one cycle for achieving the original performance.
314   *    NOTE: "Enter Buffer" & "Uncache Req" not a continuous pipeline,
315   *          because there is no guarantee that mem_aquire will be always ready.
316   ******************************************************************/
317
318  val q0_canSentVec = sizeMap(i =>
319    (io.enableOutstanding || uState === s_refill_req) &&
320    states(i).can2Uncache()
321  )
322  val q0_res = PriorityEncoderWithFlag(q0_canSentVec)
323  q0_canSentIdx := q0_res._1
324  q0_canSent := q0_res._2
325  q0_entry := entries(q0_canSentIdx)
326
327  val size = PopCount(q0_entry.mask)
328  val (lgSize, legal) = PriorityMuxWithFlag(Seq(
329    1.U -> 0.U,
330    2.U -> 1.U,
331    4.U -> 2.U,
332    8.U -> 3.U
333  ).map(m => (size===m._1) -> m._2))
334  assert(!(q0_canSent && !legal))
335
336  val q0_load = edge.Get(
337    fromSource      = q0_canSentIdx,
338    toAddress       = q0_entry.addr,
339    lgSize          = lgSize
340  )._2
341
342  val q0_store = edge.Put(
343    fromSource      = q0_canSentIdx,
344    toAddress       = q0_entry.addr,
345    lgSize          = lgSize,
346    data            = q0_entry.data,
347    mask            = q0_entry.mask
348  )._2
349
350  val q0_isStore = q0_entry.cmd === MemoryOpConstants.M_XWR
351
352  mem_acquire.valid := q0_canSent
353  mem_acquire.bits := Mux(q0_isStore, q0_store, q0_load)
354  mem_acquire.bits.user.lift(MemBackTypeMM).foreach(_ := q0_entry.memBackTypeMM)
355  mem_acquire.bits.user.lift(MemPageTypeNC).foreach(_ := q0_entry.nc)
356  when(mem_acquire.fire){
357    states(q0_canSentIdx).setInflight(true.B)
358
359    // q0 should judge whether wait same block
360    (0 until UncacheBufferSize).map(j =>
361      when(states(j).isValid() && !states(j).isWaitReturn() && addrMatch(q0_entry, entries(j))){
362        states(j).setWaitSame(true.B)
363      }
364    )
365  }
366
367
368  /******************************************************************
369   * Uncache Resp
370   ******************************************************************/
371
372  val (_, _, refill_done, _) = edge.addr_inc(mem_grant)
373
374  mem_grant.ready := true.B
375  when (mem_grant.fire) {
376    val id = mem_grant.bits.source
377    entries(id).update(mem_grant.bits)
378    states(id).updateUncacheResp()
379    assert(refill_done, "Uncache response should be one beat only!")
380
381    // remove state of wait same block
382    (0 until UncacheBufferSize).map(j =>
383      when(states(j).isValid() && states(j).isWaitSame() && addrMatch(entries(id), entries(j))){
384        states(j).setWaitSame(false.B)
385      }
386    )
387  }
388
389
390  /******************************************************************
391   * Return to LSQ
392   ******************************************************************/
393
394  val r0_canSentVec = sizeMap(i => states(i).can2Lsq())
395  val (r0_canSentIdx, r0_canSent) = PriorityEncoderWithFlag(r0_canSentVec)
396  resp.valid := r0_canSent
397  resp.bits := entries(r0_canSentIdx).toUncacheWordResp()
398  when(resp.fire){
399    states(r0_canSentIdx).updateReturn()
400  }
401
402
403  /******************************************************************
404   * Buffer Flush
405   * 1. when io.flush.valid is true: drain store queue and ubuffer
406   * 2. when io.lsq.req.bits.atomic is true: not support temporarily
407   ******************************************************************/
408  empty := !VecInit(states.map(_.isValid())).asUInt.orR
409  io.flush.empty := empty
410
411
412  /******************************************************************
413   * Load Data Forward
414   *
415   * 0. ld in ldu pipeline
416   *    f0: vaddr match, mask & data select, fast resp
417   *    f1: paddr match, resp
418   *
419   * 1. ld in buffer (in "Enter Buffer")
420   *    ld(en) -> st(in): ld entry.update, state.updateUncacheResp
421   *    st(en) -> ld(in): ld entry.update, state.updateUncacheResp
422   *    NOW: strict block by same address; there is no such forward.
423   *
424   ******************************************************************/
425
426  val f0_validMask = sizeMap(i => isStore(entries(i)) && states(i).isValid())
427  val f0_fwdMaskCandidates = VecInit(entries.map(e => e.mask))
428  val f0_fwdDataCandidates = VecInit(entries.map(e => e.data))
429  val f1_tagMismatchVec = Wire(Vec(LoadPipelineWidth, Bool()))
430  f1_needDrain := f1_tagMismatchVec.asUInt.orR && !empty
431
432  for ((forward, i) <- io.forward.zipWithIndex) {
433    val f0_fwdValid = forward.valid
434    val f1_fwdValid = RegNext(f0_fwdValid)
435
436    // f0 vaddr match
437    val f0_vtagMatches = sizeMap(w => addrMatch(entries(w).vaddr, forward.vaddr))
438    val f0_validTagMatches = sizeMap(w => f0_vtagMatches(w) && f0_validMask(w) && f0_fwdValid)
439    // f0 select
440    val f0_fwdMask = shiftMaskToHigh(
441      forward.vaddr,
442      Mux1H(f0_validTagMatches, f0_fwdMaskCandidates)
443    ).asTypeOf(Vec(VDataBytes, Bool()))
444    val f0_fwdData = shiftDataToHigh(
445      forward.vaddr,
446      Mux1H(f0_validTagMatches, f0_fwdDataCandidates)
447    ).asTypeOf(Vec(VDataBytes, UInt(8.W)))
448
449    // f1 paddr match
450    val f1_fwdMask = RegEnable(f0_fwdMask, f0_fwdValid)
451    val f1_fwdData = RegEnable(f0_fwdData, f0_fwdValid)
452    // forward.paddr from dtlb, which is far from uncache
453    val f1_ptagMatches = sizeMap(w => addrMatch(RegEnable(entries(w).addr, f0_fwdValid), RegEnable(forward.paddr, f0_fwdValid)))
454    f1_tagMismatchVec(i) := sizeMap(w =>
455      RegEnable(f0_vtagMatches(w), f0_fwdValid) =/= f1_ptagMatches(w) && RegEnable(f0_validMask(w), f0_fwdValid) && f1_fwdValid
456    ).asUInt.orR
457    when(f1_tagMismatchVec(i)) {
458      XSDebug("forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n",
459        f1_ptagMatches.asUInt,
460        RegEnable(f0_vtagMatches.asUInt, f0_fwdValid),
461        RegEnable(forward.vaddr, f0_fwdValid),
462        RegEnable(forward.paddr, f0_fwdValid)
463      )
464    }
465    // f1 output
466    forward.addrInvalid := false.B // addr in ubuffer is always ready
467    forward.dataInvalid := false.B // data in ubuffer is always ready
468    forward.matchInvalid := f1_tagMismatchVec(i) // paddr / vaddr cam result does not match
469    for (j <- 0 until VDataBytes) {
470      forward.forwardMaskFast(j) := f0_fwdMask(j)
471
472      forward.forwardData(j) := f1_fwdData(j)
473      forward.forwardMask(j) := false.B
474      when(f1_fwdMask(j) && f1_fwdValid) {
475        forward.forwardMask(j) := true.B
476      }
477    }
478
479  }
480
481
482  /******************************************************************
483   * Debug / Performance
484   ******************************************************************/
485
486  /* Debug Counters */
487  // print all input/output requests for debug purpose
488  // print req/resp
489  XSDebug(req.fire, "req cmd: %x addr: %x data: %x mask: %x\n",
490    req.bits.cmd, req.bits.addr, req.bits.data, req.bits.mask)
491  XSDebug(resp.fire, "data: %x\n", req.bits.data)
492  // print tilelink messages
493  XSDebug(mem_acquire.valid, "mem_acquire valid, ready=%d ", mem_acquire.ready)
494  mem_acquire.bits.dump(mem_acquire.valid)
495
496  XSDebug(mem_grant.fire, "mem_grant fire ")
497  mem_grant.bits.dump(mem_grant.fire)
498
499  /* Performance Counters */
500  XSPerfAccumulate("uncache_memBackTypeMM", io.lsq.req.fire && io.lsq.req.bits.memBackTypeMM)
501  XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
502  XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
503  XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
504  XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
505  XSPerfAccumulate("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire)
506  XSPerfAccumulate("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR)))
507  XSPerfAccumulate("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec))
508
509  val perfEvents = Seq(
510    ("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
511    ("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
512    ("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
513    ("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
514    ("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire),
515    ("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR))),
516    ("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec))
517  )
518
519  generatePerfEvent()
520  //  End
521}
522