xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/Uncache.scala (revision 5299b43275f62c4fe2e4fde91bc30c97f5be89a2)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes}
25import freechips.rocketchip.tilelink.{TLArbiter, TLBundleA, TLBundleD, TLClientNode, TLEdgeOut, TLMasterParameters, TLMasterPortParameters}
26import xiangshan._
27import xiangshan.mem._
28import xiangshan.mem.Bundles._
29import coupledL2.{MemBackTypeMM, MemBackTypeMMField, MemPageTypeNC, MemPageTypeNCField}
30
31trait HasUncacheBufferParameters extends HasXSParameter with HasDCacheParameters {
32
33  def doMerge(oldData: UInt, oldMask: UInt, newData:UInt, newMask: UInt):(UInt, UInt) = {
34    val resData = VecInit((0 until DataBytes).map(j =>
35      Mux(newMask(j), newData(8*(j+1)-1, 8*j), oldData(8*(j+1)-1, 8*j))
36    )).asUInt
37    val resMask = newMask | oldMask
38    (resData, resMask)
39  }
40
41  def INDEX_WIDTH = log2Up(UncacheBufferSize)
42  def BLOCK_OFFSET = log2Up(XLEN / 8)
43  def getBlockAddr(x: UInt) = x >> BLOCK_OFFSET
44}
45
46abstract class UncacheBundle(implicit p: Parameters) extends XSBundle with HasUncacheBufferParameters
47
48abstract class UncacheModule(implicit p: Parameters) extends XSModule with HasUncacheBufferParameters
49
50
51class UncacheFlushBundle extends Bundle {
52  val valid = Output(Bool())
53  val empty = Input(Bool())
54}
55
56class UncacheEntry(implicit p: Parameters) extends UncacheBundle {
57  val cmd = UInt(M_SZ.W)
58  val addr = UInt(PAddrBits.W)
59  val vaddr = UInt(VAddrBits.W)
60  val data = UInt(XLEN.W)
61  val mask = UInt(DataBytes.W)
62  val nc = Bool()
63  val atomic = Bool()
64  val memBackTypeMM = Bool()
65
66  val resp_nderr = Bool()
67
68  /* NOTE: if it support the internal forward logic, here can uncomment */
69  // val fwd_data = UInt(XLEN.W)
70  // val fwd_mask = UInt(DataBytes.W)
71
72  def set(x: UncacheWordReq): Unit = {
73    cmd := x.cmd
74    addr := x.addr
75    vaddr := x.vaddr
76    data := x.data
77    mask := x.mask
78    nc := x.nc
79    memBackTypeMM := x.memBackTypeMM
80    atomic := x.atomic
81    resp_nderr := false.B
82    // fwd_data := 0.U
83    // fwd_mask := 0.U
84  }
85
86  def update(x: UncacheWordReq): Unit = {
87    val (resData, resMask) = doMerge(data, mask, x.data, x.mask)
88    // mask -> get the first position as 1 -> for address align
89    val (resOffset, resFlag) = PriorityEncoderWithFlag(resMask)
90    data := resData
91    mask := resMask
92    when(resFlag){
93      addr := (getBlockAddr(addr) << BLOCK_OFFSET) | resOffset
94      vaddr := (getBlockAddr(vaddr) << BLOCK_OFFSET) | resOffset
95    }
96  }
97
98  def update(x: TLBundleD): Unit = {
99    when(cmd === MemoryOpConstants.M_XRD) {
100      data := x.data
101    }
102    resp_nderr := x.denied || x.corrupt
103  }
104
105  // def update(forwardData: UInt, forwardMask: UInt): Unit = {
106  //   fwd_data := forwardData
107  //   fwd_mask := forwardMask
108  // }
109
110  def toUncacheWordResp(eid: UInt): UncacheWordResp = {
111    // val resp_fwd_data = VecInit((0 until DataBytes).map(j =>
112    //   Mux(fwd_mask(j), fwd_data(8*(j+1)-1, 8*j), data(8*(j+1)-1, 8*j))
113    // )).asUInt
114    val resp_fwd_data = data
115    val r = Wire(new UncacheWordResp)
116    r := DontCare
117    r.data := resp_fwd_data
118    r.id := eid
119    r.nderr := resp_nderr
120    r.nc := nc
121    r.is2lq := cmd === MemoryOpConstants.M_XRD
122    r.miss := false.B
123    r.replay := false.B
124    r.tag_error := false.B
125    r.error := false.B
126    r
127  }
128}
129
130class UncacheEntryState(implicit p: Parameters) extends DCacheBundle {
131  // valid (-> waitSame) -> inflight -> waitReturn
132  val valid = Bool()
133  val inflight = Bool() // uncache -> L2
134  val waitSame = Bool()
135  val waitReturn = Bool() // uncache -> LSQ
136
137  def init: Unit = {
138    valid := false.B
139    inflight := false.B
140    waitSame := false.B
141    waitReturn := false.B
142  }
143
144  def isValid(): Bool = valid
145  def isInflight(): Bool = valid && inflight
146  def isWaitReturn(): Bool = valid && waitReturn
147  def isWaitSame(): Bool = valid && waitSame
148  def can2Bus(): Bool = valid && !inflight && !waitSame && !waitReturn
149  def can2Lsq(): Bool = valid && waitReturn
150
151  def setValid(x: Bool): Unit = { valid := x}
152  def setInflight(x: Bool): Unit = { inflight := x}
153  def setWaitReturn(x: Bool): Unit = { waitReturn := x }
154  def setWaitSame(x: Bool): Unit = { waitSame := x}
155
156  def updateUncacheResp(): Unit = {
157    assert(inflight, "The request was not sent and a response was received")
158    inflight := false.B
159    waitReturn := true.B
160  }
161  def updateReturn(): Unit = {
162    valid := false.B
163    inflight := false.B
164    waitSame := false.B
165    waitReturn := false.B
166  }
167}
168
169class UncacheIO(implicit p: Parameters) extends DCacheBundle {
170  val hartId = Input(UInt())
171  val enableOutstanding = Input(Bool())
172  val flush = Flipped(new UncacheFlushBundle)
173  val lsq = Flipped(new UncacheWordIO)
174  val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
175}
176
177// convert DCacheIO to TileLink
178// for Now, we only deal with TL-UL
179
180class Uncache()(implicit p: Parameters) extends LazyModule with HasXSParameter {
181  override def shouldBeInlined: Boolean = false
182  def idRange: Int = UncacheBufferSize
183
184  val clientParameters = TLMasterPortParameters.v1(
185    clients = Seq(TLMasterParameters.v1(
186      "uncache",
187      sourceId = IdRange(0, idRange)
188    )),
189    requestFields = Seq(MemBackTypeMMField(), MemPageTypeNCField())
190  )
191  val clientNode = TLClientNode(Seq(clientParameters))
192
193  lazy val module = new UncacheImp(this)
194}
195
196/* Uncache Buffer */
197class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
198  with HasTLDump
199  with HasXSParameter
200  with HasUncacheBufferParameters
201  with HasPerfEvents
202{
203  println(s"Uncahe Buffer Size: $UncacheBufferSize entries")
204  val io = IO(new UncacheIO)
205
206  val (bus, edge) = outer.clientNode.out.head
207
208  val req  = io.lsq.req
209  val resp = io.lsq.resp
210  val mem_acquire = bus.a
211  val mem_grant   = bus.d
212  val req_ready = WireInit(false.B)
213
214  // assign default values to output signals
215  bus.b.ready := false.B
216  bus.c.valid := false.B
217  bus.c.bits  := DontCare
218  bus.d.ready := false.B
219  bus.e.valid := false.B
220  bus.e.bits  := DontCare
221  io.lsq.req.ready := req_ready
222  io.lsq.resp.valid := false.B
223  io.lsq.resp.bits := DontCare
224
225
226  /******************************************************************
227   * Data Structure
228   ******************************************************************/
229
230  val entries = Reg(Vec(UncacheBufferSize, new UncacheEntry))
231  val states = RegInit(VecInit(Seq.fill(UncacheBufferSize)(0.U.asTypeOf(new UncacheEntryState))))
232  val fence = RegInit(Bool(), false.B)
233  val s_idle :: s_inflight :: s_wait_return :: Nil = Enum(3)
234  val uState = RegInit(s_idle)
235
236  // drain buffer
237  val empty = Wire(Bool())
238  val f1_needDrain = Wire(Bool())
239  val do_uarch_drain = RegNext(f1_needDrain)
240
241  val q0_entry = Wire(new UncacheEntry)
242  val q0_canSentIdx = Wire(UInt(INDEX_WIDTH.W))
243  val q0_canSent = Wire(Bool())
244
245
246  /******************************************************************
247   * Functions
248   ******************************************************************/
249  def sizeMap[T <: Data](f: Int => T) = VecInit((0 until UncacheBufferSize).map(f))
250  def sizeForeach[T <: Data](f: Int => Unit) = (0 until UncacheBufferSize).map(f)
251  def isStore(e: UncacheEntry): Bool = e.cmd === MemoryOpConstants.M_XWR
252  def isStore(x: UInt): Bool = x === MemoryOpConstants.M_XWR
253  def addrMatch(x: UncacheEntry, y: UncacheWordReq) : Bool = getBlockAddr(x.addr) === getBlockAddr(y.addr)
254  def addrMatch(x: UncacheWordReq, y: UncacheEntry) : Bool = getBlockAddr(x.addr) === getBlockAddr(y.addr)
255  def addrMatch(x: UncacheEntry, y: UncacheEntry) : Bool = getBlockAddr(x.addr) === getBlockAddr(y.addr)
256  def addrMatch(x: UInt, y: UInt) : Bool = getBlockAddr(x) === getBlockAddr(y)
257
258  def continueAndAlign(mask: UInt): Bool = {
259    val res =
260      PopCount(mask) === 1.U ||
261      mask === 0b00000011.U ||
262      mask === 0b00001100.U ||
263      mask === 0b00110000.U ||
264      mask === 0b11000000.U ||
265      mask === 0b00001111.U ||
266      mask === 0b11110000.U ||
267      mask === 0b11111111.U
268    res
269  }
270
271  def canMergePrimary(x: UncacheWordReq, e: UncacheEntry): Bool = {
272    // vaddr same, properties same
273    getBlockAddr(x.vaddr) === getBlockAddr(e.vaddr) &&
274      x.cmd === e.cmd && x.nc && e.nc &&
275      x.memBackTypeMM === e.memBackTypeMM && !x.atomic && !e.atomic &&
276      continueAndAlign(x.mask | e.mask)
277  }
278
279  def canMergeSecondary(eid: UInt): Bool = {
280    // old entry is not inflight and senting
281    !states(eid).isInflight() && !(q0_canSent && q0_canSentIdx === eid)
282  }
283
284  /******************************************************************
285   * uState for non-outstanding
286   ******************************************************************/
287
288  switch(uState){
289    is(s_idle){
290      when(mem_acquire.fire){
291        uState := s_inflight
292      }
293    }
294    is(s_inflight){
295      when(mem_grant.fire){
296        uState := s_wait_return
297      }
298    }
299    is(s_wait_return){
300      when(resp.fire){
301        uState := s_idle
302      }
303    }
304  }
305
306
307  /******************************************************************
308   * Enter Buffer
309   *  Version 0 (better timing)
310   *    e0 judge: alloc/merge write vec
311   *    e1 alloc
312   *
313   *  Version 1 (better performance)
314   *    e0: solved in one cycle for achieving the original performance.
315   *    e1: return idResp to set sid for handshake
316   ******************************************************************/
317
318  /* e0: merge/alloc */
319  val e0_fire = req.fire
320  val e0_req_valid = req.valid
321  val e0_req = req.bits
322
323  val e0_rejectVec = Wire(Vec(UncacheBufferSize, Bool()))
324  val e0_mergeVec = Wire(Vec(UncacheBufferSize, Bool()))
325  val e0_allocWaitSameVec = Wire(Vec(UncacheBufferSize, Bool()))
326  sizeForeach(i => {
327    val valid = e0_req_valid && states(i).isValid()
328    val isAddrMatch = addrMatch(e0_req, entries(i))
329    val canMerge1 = canMergePrimary(e0_req, entries(i))
330    val canMerge2 = canMergeSecondary(i.U)
331    e0_rejectVec(i) := valid && isAddrMatch && !canMerge1
332    e0_mergeVec(i) := valid && isAddrMatch && canMerge1 && canMerge2
333    e0_allocWaitSameVec(i) := valid && isAddrMatch && canMerge1 && !canMerge2
334  })
335  assert(PopCount(e0_mergeVec) <= 1.U, "Uncache buffer should not merge multiple entries")
336
337  val e0_invalidVec = sizeMap(i => !states(i).isValid())
338  val e0_reject = do_uarch_drain || !e0_invalidVec.asUInt.orR || e0_rejectVec.reduce(_ || _)
339  val (e0_mergeIdx, e0_canMerge) = PriorityEncoderWithFlag(e0_mergeVec)
340  val (e0_allocIdx, e0_canAlloc) = PriorityEncoderWithFlag(e0_invalidVec)
341  val e0_allocWaitSame = e0_allocWaitSameVec.reduce(_ || _)
342  val e0_sid = Mux(e0_canMerge, e0_mergeIdx, e0_allocIdx)
343
344  // e0_fire is used to guarantee that it will not be rejected
345  when(e0_canMerge && e0_fire){
346    entries(e0_mergeIdx).update(e0_req)
347  }.elsewhen(e0_canAlloc && e0_fire){
348    entries(e0_allocIdx).set(e0_req)
349    states(e0_allocIdx).setValid(true.B)
350    when(e0_allocWaitSame){
351      states(e0_allocIdx).setWaitSame(true.B)
352    }
353  }
354
355  req_ready := !e0_reject
356
357  /* e1: return accept */
358  io.lsq.idResp.valid := RegNext(e0_fire)
359  io.lsq.idResp.bits.mid := RegEnable(e0_req.id, e0_fire)
360  io.lsq.idResp.bits.sid := RegEnable(e0_sid, e0_fire)
361  io.lsq.idResp.bits.is2lq := RegEnable(!isStore(e0_req.cmd), e0_fire)
362  io.lsq.idResp.bits.nc := RegEnable(e0_req.nc, e0_fire)
363
364  /******************************************************************
365   * Uncache Req
366   *  Version 0 (better timing)
367   *    q0: choose which one is sent
368   *    q0: sent
369   *
370   *  Version 1 (better performance)
371   *    solved in one cycle for achieving the original performance.
372   *    NOTE: "Enter Buffer" & "Uncache Req" not a continuous pipeline,
373   *          because there is no guarantee that mem_aquire will be always ready.
374   ******************************************************************/
375
376  val q0_canSentVec = sizeMap(i =>
377    (io.enableOutstanding || uState === s_idle) &&
378    states(i).can2Bus()
379  )
380  val q0_res = PriorityEncoderWithFlag(q0_canSentVec)
381  q0_canSentIdx := q0_res._1
382  q0_canSent := q0_res._2
383  q0_entry := entries(q0_canSentIdx)
384
385  val size = PopCount(q0_entry.mask)
386  val (lgSize, legal) = PriorityMuxWithFlag(Seq(
387    1.U -> 0.U,
388    2.U -> 1.U,
389    4.U -> 2.U,
390    8.U -> 3.U
391  ).map(m => (size===m._1) -> m._2))
392  assert(!(q0_canSent && !legal))
393
394  val q0_load = edge.Get(
395    fromSource      = q0_canSentIdx,
396    toAddress       = q0_entry.addr,
397    lgSize          = lgSize
398  )._2
399
400  val q0_store = edge.Put(
401    fromSource      = q0_canSentIdx,
402    toAddress       = q0_entry.addr,
403    lgSize          = lgSize,
404    data            = q0_entry.data,
405    mask            = q0_entry.mask
406  )._2
407
408  val q0_isStore = q0_entry.cmd === MemoryOpConstants.M_XWR
409
410  mem_acquire.valid := q0_canSent
411  mem_acquire.bits := Mux(q0_isStore, q0_store, q0_load)
412  mem_acquire.bits.user.lift(MemBackTypeMM).foreach(_ := q0_entry.memBackTypeMM)
413  mem_acquire.bits.user.lift(MemPageTypeNC).foreach(_ := q0_entry.nc)
414  when(mem_acquire.fire){
415    states(q0_canSentIdx).setInflight(true.B)
416
417    // q0 should judge whether wait same block
418    (0 until UncacheBufferSize).map(j =>
419      when(states(j).isValid() && !states(j).isWaitReturn() && addrMatch(q0_entry, entries(j))){
420        states(j).setWaitSame(true.B)
421      }
422    )
423  }
424
425
426  /******************************************************************
427   * Uncache Resp
428   ******************************************************************/
429
430  val (_, _, refill_done, _) = edge.addr_inc(mem_grant)
431
432  mem_grant.ready := true.B
433  when (mem_grant.fire) {
434    val id = mem_grant.bits.source
435    entries(id).update(mem_grant.bits)
436    states(id).updateUncacheResp()
437    assert(refill_done, "Uncache response should be one beat only!")
438
439    // remove state of wait same block
440    (0 until UncacheBufferSize).map(j =>
441      when(states(j).isValid() && states(j).isWaitSame() && addrMatch(entries(id), entries(j))){
442        states(j).setWaitSame(false.B)
443      }
444    )
445  }
446
447
448  /******************************************************************
449   * Return to LSQ
450   ******************************************************************/
451
452  val r0_canSentVec = sizeMap(i => states(i).can2Lsq())
453  val (r0_canSentIdx, r0_canSent) = PriorityEncoderWithFlag(r0_canSentVec)
454  resp.valid := r0_canSent
455  resp.bits := entries(r0_canSentIdx).toUncacheWordResp(r0_canSentIdx)
456  when(resp.fire){
457    states(r0_canSentIdx).updateReturn()
458  }
459
460
461  /******************************************************************
462   * Buffer Flush
463   * 1. when io.flush.valid is true: drain store queue and ubuffer
464   * 2. when io.lsq.req.bits.atomic is true: not support temporarily
465   ******************************************************************/
466  empty := !VecInit(states.map(_.isValid())).asUInt.orR
467  io.flush.empty := empty
468
469
470  /******************************************************************
471   * Load Data Forward to loadunit
472   *  f0: vaddr match, fast resp
473   *  f1: mask & data select, merge; paddr match; resp
474   *      NOTE: forward.paddr from dtlb, which is far from uncache f0
475   ******************************************************************/
476
477  val f0_validMask = sizeMap(i => isStore(entries(i)) && states(i).isValid())
478  val f0_fwdMaskCandidates = VecInit(entries.map(e => e.mask))
479  val f0_fwdDataCandidates = VecInit(entries.map(e => e.data))
480  val f1_fwdMaskCandidates = sizeMap(i => RegEnable(entries(i).mask, f0_validMask(i)))
481  val f1_fwdDataCandidates = sizeMap(i => RegEnable(entries(i).data, f0_validMask(i)))
482  val f1_tagMismatchVec = Wire(Vec(LoadPipelineWidth, Bool()))
483  f1_needDrain := f1_tagMismatchVec.asUInt.orR && !empty
484
485  for ((forward, i) <- io.forward.zipWithIndex) {
486    val f0_fwdValid = forward.valid
487    val f1_fwdValid = RegNext(f0_fwdValid)
488
489    /* f0 */
490    // vaddr match
491    val f0_vtagMatches = sizeMap(w => addrMatch(entries(w).vaddr, forward.vaddr))
492    val f0_flyTagMatches = sizeMap(w => f0_vtagMatches(w) && f0_validMask(w) && f0_fwdValid && states(i).inflight)
493    val f0_idleTagMatches = sizeMap(w => f0_vtagMatches(w) && f0_validMask(w) && f0_fwdValid && !states(i).inflight)
494    // ONLY for fast use to get better timing
495    val f0_flyMaskFast = shiftMaskToHigh(
496      forward.vaddr,
497      Mux1H(f0_flyTagMatches, f0_fwdMaskCandidates)
498    ).asTypeOf(Vec(VDataBytes, Bool()))
499    val f0_idleMaskFast = shiftMaskToHigh(
500      forward.vaddr,
501      Mux1H(f0_idleTagMatches, f0_fwdMaskCandidates)
502    ).asTypeOf(Vec(VDataBytes, Bool()))
503
504    /* f1 */
505    val f1_flyTagMatches = RegEnable(f0_flyTagMatches, f0_fwdValid)
506    val f1_idleTagMatches = RegEnable(f0_idleTagMatches, f0_fwdValid)
507    val f1_fwdPAddr = RegEnable(forward.paddr, f0_fwdValid)
508    // select
509    val f1_flyMask = Mux1H(f1_flyTagMatches, f1_fwdMaskCandidates)
510    val f1_flyData = Mux1H(f1_flyTagMatches, f1_fwdDataCandidates)
511    val f1_idleMask = Mux1H(f1_idleTagMatches, f1_fwdMaskCandidates)
512    val f1_idleData = Mux1H(f1_idleTagMatches, f1_fwdDataCandidates)
513    // merge old(inflight) and new(idle)
514    val (f1_fwdDataTmp, f1_fwdMaskTmp) = doMerge(f1_flyData, f1_flyMask, f1_idleData, f1_idleMask)
515    val f1_fwdMask = shiftMaskToHigh(f1_fwdPAddr, f1_fwdMaskTmp).asTypeOf(Vec(VDataBytes, Bool()))
516    val f1_fwdData = shiftDataToHigh(f1_fwdPAddr, f1_fwdDataTmp).asTypeOf(Vec(VDataBytes, UInt(8.W)))
517    // paddr match and mismatch judge
518    val f1_ptagMatches = sizeMap(w => addrMatch(RegEnable(entries(w).addr, f0_fwdValid), f1_fwdPAddr))
519    f1_tagMismatchVec(i) := sizeMap(w =>
520      RegEnable(f0_vtagMatches(w), f0_fwdValid) =/= f1_ptagMatches(w) && RegEnable(f0_validMask(w), f0_fwdValid) && f1_fwdValid
521    ).asUInt.orR
522    when(f1_tagMismatchVec(i)) {
523      XSDebug("forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n",
524        f1_ptagMatches.asUInt,
525        RegEnable(f0_vtagMatches.asUInt, f0_fwdValid),
526        RegEnable(forward.vaddr, f0_fwdValid),
527        RegEnable(forward.paddr, f0_fwdValid)
528      )
529    }
530    // response
531    forward.addrInvalid := false.B // addr in ubuffer is always ready
532    forward.dataInvalid := false.B // data in ubuffer is always ready
533    forward.matchInvalid := f1_tagMismatchVec(i) // paddr / vaddr cam result does not match
534    for (j <- 0 until VDataBytes) {
535      forward.forwardMaskFast(j) := f0_flyMaskFast(j) || f0_idleMaskFast(j)
536
537      forward.forwardData(j) := f1_fwdData(j)
538      forward.forwardMask(j) := false.B
539      when(f1_fwdMask(j) && f1_fwdValid) {
540        forward.forwardMask(j) := true.B
541      }
542    }
543
544  }
545
546
547  /******************************************************************
548   * Debug / Performance
549   ******************************************************************/
550
551  /* Debug Counters */
552  // print all input/output requests for debug purpose
553  // print req/resp
554  XSDebug(req.fire, "req cmd: %x addr: %x data: %x mask: %x\n",
555    req.bits.cmd, req.bits.addr, req.bits.data, req.bits.mask)
556  XSDebug(resp.fire, "data: %x\n", req.bits.data)
557  // print tilelink messages
558  XSDebug(mem_acquire.valid, "mem_acquire valid, ready=%d ", mem_acquire.ready)
559  mem_acquire.bits.dump(mem_acquire.valid)
560
561  XSDebug(mem_grant.fire, "mem_grant fire ")
562  mem_grant.bits.dump(mem_grant.fire)
563
564  /* Performance Counters */
565  XSPerfAccumulate("e0_reject", e0_reject && e0_req_valid)
566  XSPerfAccumulate("e0_total_enter", e0_fire)
567  XSPerfAccumulate("e0_merge", e0_fire && e0_canMerge)
568  XSPerfAccumulate("e0_alloc_simple", e0_fire && e0_canAlloc && !e0_allocWaitSame)
569  XSPerfAccumulate("e0_alloc_wait_same", e0_fire && e0_canAlloc && e0_allocWaitSame)
570  XSPerfAccumulate("q0_acquire", q0_canSent)
571  XSPerfAccumulate("q0_acquire_store", q0_canSent && q0_isStore)
572  XSPerfAccumulate("q0_acquire_load", q0_canSent && !q0_isStore)
573  XSPerfAccumulate("uncache_memBackTypeMM", io.lsq.req.fire && io.lsq.req.bits.memBackTypeMM)
574  XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
575  XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
576  XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
577  XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
578  XSPerfAccumulate("uncache_outstanding", uState =/= s_idle && mem_acquire.fire)
579  XSPerfAccumulate("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR)))
580  XSPerfAccumulate("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec))
581
582  val perfEvents = Seq(
583    ("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
584    ("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
585    ("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
586    ("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
587    ("uncache_outstanding", uState =/= s_idle && mem_acquire.fire),
588    ("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR))),
589    ("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec))
590  )
591
592  generatePerfEvent()
593  //  End
594}
595