xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/Probe.scala (revision 066ac8a465b27b54ba22458ff1a67bcd28215d73)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22
23import freechips.rocketchip.tilelink.{TLEdgeOut, TLBundleB, TLMessages, TLPermissions}
24
25import utils.{HasTLDump, XSDebug, XSPerfAccumulate, PerfEventsBundle}
26
27class ProbeReq(implicit p: Parameters) extends DCacheBundle
28{
29  val source = UInt()
30  val opcode = UInt()
31  val addr   = UInt(PAddrBits.W)
32  // TODO: l2 should use vaddr index to probe l1
33  val vaddr  = UInt(VAddrBits.W)
34  val param  = UInt(TLPermissions.bdWidth.W)
35  val needData = Bool()
36
37  def dump() = {
38    XSDebug("ProbeReq source: %d opcode: %d addr: %x param: %d\n",
39      source, opcode, addr, param)
40  }
41}
42
43class ProbeEntry(implicit p: Parameters) extends DCacheModule {
44  val io = IO(new Bundle {
45    val req = Flipped(Decoupled(new ProbeReq))
46    val pipe_req  = DecoupledIO(new MainPipeReq)
47    val lrsc_locked_block = Input(Valid(UInt()))
48
49    // the block we are probing
50    val block_addr  = Output(Valid(UInt()))
51  })
52
53  val s_invalid :: s_pipe_req :: Nil = Enum(2)
54
55  val state = RegInit(s_invalid)
56
57  val req = Reg(new ProbeReq)
58
59  // assign default values to signals
60  io.req.ready      := false.B
61  io.pipe_req.valid := false.B
62  io.pipe_req.bits  := DontCare
63
64  io.block_addr.valid := state =/= s_invalid
65  io.block_addr.bits  := req.addr
66
67  when (state =/= s_invalid) {
68    XSDebug("state: %d\n", state)
69  }
70
71  when (state =/= s_invalid) {
72    XSDebug("ProbeEntry: state: %d block_addr: %x\n", state, io.block_addr.bits)
73  }
74
75  when (state === s_invalid) {
76    io.req.ready := true.B
77    when (io.req.fire()) {
78      req := io.req.bits
79      state := s_pipe_req
80    }
81  }
82
83  when (state === s_pipe_req) {
84    // Note that probe req will be blocked in the next cycle if a lr updates lrsc_locked_block addr
85    // in this way, we can RegNext(lrsc_blocked) for better timing
86    val lrsc_blocked = io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === req.addr
87    io.pipe_req.valid := !RegNext(lrsc_blocked)
88
89    val pipe_req = io.pipe_req.bits
90    pipe_req := DontCare
91    pipe_req.miss := false.B
92    pipe_req.probe := true.B
93    pipe_req.probe_param := req.param
94    pipe_req.addr   := req.addr
95    pipe_req.vaddr  := req.vaddr
96    pipe_req.probe_need_data := req.needData
97
98    when (io.pipe_req.fire()) {
99      state := s_invalid
100    }
101  }
102
103  // perfoemance counters
104  XSPerfAccumulate("probe_req", state === s_invalid && io.req.fire())
105  XSPerfAccumulate("probe_penalty", state =/= s_invalid)
106  XSPerfAccumulate("probe_penalty_blocked_by_lrsc", state === s_pipe_req && io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === req.addr)
107  XSPerfAccumulate("probe_penalty_blocked_by_pipeline", state === s_pipe_req && io.pipe_req.valid && !io.pipe_req.ready)
108}
109
110class ProbeQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump
111{
112  val io = IO(new Bundle {
113    val mem_probe = Flipped(Decoupled(new TLBundleB(edge.bundle)))
114    val pipe_req  = DecoupledIO(new MainPipeReq)
115    val lrsc_locked_block = Input(Valid(UInt()))
116    val update_resv_set = Input(Bool())
117  })
118
119  val pipe_req_arb = Module(new Arbiter(new MainPipeReq, cfg.nProbeEntries))
120
121  // allocate a free entry for incoming request
122  val primary_ready  = Wire(Vec(cfg.nProbeEntries, Bool()))
123  val allocate = primary_ready.asUInt.orR
124  val alloc_idx = PriorityEncoder(primary_ready)
125
126  // translate to inner req
127  val req = Wire(new ProbeReq)
128  val alias_addr_frag = io.mem_probe.bits.data(2, 1) // add extra 2 bits from vaddr to get vindex
129  req.source := io.mem_probe.bits.source
130  req.opcode := io.mem_probe.bits.opcode
131  req.addr := io.mem_probe.bits.address
132  if(DCacheAboveIndexOffset > DCacheTagOffset) {
133    // have alias problem, extra alias bits needed for index
134    req.vaddr := Cat(
135      io.mem_probe.bits.address(PAddrBits - 1, DCacheAboveIndexOffset), // dontcare
136      alias_addr_frag(DCacheAboveIndexOffset - DCacheTagOffset - 1, 0), // index
137      io.mem_probe.bits.address(DCacheTagOffset - 1, 0)                 // index & others
138    )
139  } else { // no alias problem
140    req.vaddr := io.mem_probe.bits.address
141  }
142  req.param := io.mem_probe.bits.param
143  req.needData := io.mem_probe.bits.data(0)
144
145  io.mem_probe.ready := allocate
146
147  val entries = (0 until cfg.nProbeEntries) map { i =>
148    val entry = Module(new ProbeEntry)
149
150    // entry req
151    entry.io.req.valid := (i.U === alloc_idx) && allocate && io.mem_probe.valid
152    primary_ready(i)   := entry.io.req.ready
153    entry.io.req.bits  := req
154
155    // pipe_req
156    pipe_req_arb.io.in(i) <> entry.io.pipe_req
157
158    entry.io.lrsc_locked_block := io.lrsc_locked_block
159
160    entry
161  }
162
163  io.pipe_req <> pipe_req_arb.io.out
164  // When we update update_resv_set, block all probe req in the next cycle
165  // It should give Probe reservation set addr compare an independent cycle,
166  // which will lead to better timing
167  when(RegNext(io.update_resv_set)){
168    io.pipe_req.valid := false.B
169    pipe_req_arb.io.out.ready := false.B
170  }
171
172  // print all input/output requests for debug purpose
173  when (io.mem_probe.valid) {
174    // before a probe finishes, L2 should not further issue probes on this block
175    val probe_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.mem_probe.bits.address)).asUInt.orR
176    assert (!probe_conflict)
177    // for now, we can only deal with ProbeBlock
178    assert (io.mem_probe.bits.opcode === TLMessages.Probe)
179  }
180
181  // debug output
182  when (io.mem_probe.fire()) {
183    XSDebug("mem_probe: ")
184    io.mem_probe.bits.dump
185  }
186
187//  when (io.pipe_req.fire()) {
188//    io.pipe_req.bits.dump()
189//  }
190
191  when (io.lrsc_locked_block.valid) {
192    XSDebug("lrsc_locked_block: %x\n", io.lrsc_locked_block.bits)
193  }
194  val perfinfo = IO(new Bundle(){
195    val perfEvents = Output(new PerfEventsBundle(5))
196  })
197  val perfEvents = Seq(
198    ("dcache_probq_req          ", io.pipe_req.fire()                                                                                                                                                                       ),
199    ("dcache_probq_1/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) < (cfg.nProbeEntries.U/4.U))                                                                                       ),
200    ("dcache_probq_2/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nProbeEntries.U/4.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nProbeEntries.U/2.U))    ),
201    ("dcache_probq_3/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nProbeEntries.U/2.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nProbeEntries.U*3.U/4.U))),
202    ("dcache_probq_4/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nProbeEntries.U*3.U/4.U))                                                                                   ),
203  )
204
205  for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
206    perf_out.incr_step := RegNext(perf)
207  }
208}
209