xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/Probe.scala (revision 51e45dbbf87325e45ff2af6ca86ed6c7eed04464)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.tilelink.{TLBundleB, TLEdgeOut, TLMessages, TLPermissions}
23import utils.{HasPerfEvents, HasTLDump, XSDebug, XSPerfAccumulate}
24
25class ProbeReq(implicit p: Parameters) extends DCacheBundle
26{
27  val source = UInt()
28  val opcode = UInt()
29  val addr   = UInt(PAddrBits.W)
30  val vaddr  = UInt(VAddrBits.W) // l2 uses vaddr index to probe l1
31  val param  = UInt(TLPermissions.bdWidth.W)
32  val needData = Bool()
33
34  // probe queue entry ID
35  val id = UInt(log2Up(cfg.nProbeEntries).W)
36
37  def dump() = {
38    XSDebug("ProbeReq source: %d opcode: %d addr: %x param: %d\n",
39      source, opcode, addr, param)
40  }
41}
42
43class ProbeResp(implicit p: Parameters) extends DCacheBundle {
44  // probe queue entry ID
45  val id = UInt(log2Up(cfg.nProbeEntries).W)
46}
47
48class ProbeEntry(implicit p: Parameters) extends DCacheModule {
49  val io = IO(new Bundle {
50    val req = Flipped(Decoupled(new ProbeReq))
51    val pipe_req  = DecoupledIO(new MainPipeReq)
52    val pipe_resp = Input(Valid(new ProbeResp))
53    val lrsc_locked_block = Input(Valid(UInt()))
54    val id = Input(UInt(log2Up(cfg.nProbeEntries).W))
55
56    // the block we are probing
57    val block_addr  = Output(Valid(UInt()))
58  })
59
60  val s_invalid :: s_pipe_req :: s_wait_resp :: Nil = Enum(3)
61
62  val state = RegInit(s_invalid)
63
64  val req = Reg(new ProbeReq)
65
66  // assign default values to signals
67  io.req.ready      := false.B
68  io.pipe_req.valid := false.B
69  io.pipe_req.bits  := DontCare
70
71  io.block_addr.valid := state =/= s_invalid
72  io.block_addr.bits  := req.addr
73
74  when (state =/= s_invalid) {
75    XSDebug("state: %d\n", state)
76  }
77
78  when (state =/= s_invalid) {
79    XSDebug("ProbeEntry: state: %d block_addr: %x\n", state, io.block_addr.bits)
80  }
81
82  when (state === s_invalid) {
83    io.req.ready := true.B
84    when (io.req.fire) {
85      req := io.req.bits
86      state := s_pipe_req
87    }
88  }
89
90  val lrsc_blocked = Mux(
91    io.req.fire,
92    io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(io.req.bits.addr),
93    io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(req.addr)
94  )
95
96  when (state === s_pipe_req) {
97    // Note that probe req will be blocked in the next cycle if a lr updates lrsc_locked_block addr
98    // in this way, we can RegNext(lrsc_blocked) for better timing
99    io.pipe_req.valid := !RegNext(lrsc_blocked)
100
101    val pipe_req = io.pipe_req.bits
102    pipe_req := DontCare
103    pipe_req.miss := false.B
104    pipe_req.probe := true.B
105    pipe_req.probe_param := req.param
106    pipe_req.addr   := req.addr
107    pipe_req.vaddr  := req.vaddr
108    pipe_req.probe_need_data := req.needData
109    pipe_req.error := false.B
110    pipe_req.id := io.id
111
112    when (io.pipe_req.fire) {
113      state := s_wait_resp
114    }
115  }
116
117  when (state === s_wait_resp) {
118    when (io.pipe_resp.valid && io.id === io.pipe_resp.bits.id) {
119      state := s_invalid
120    }
121  }
122
123  // perfoemance counters
124  XSPerfAccumulate("probe_req", state === s_invalid && io.req.fire)
125  XSPerfAccumulate("probe_penalty", state =/= s_invalid)
126  XSPerfAccumulate("probe_penalty_blocked_by_lrsc", state === s_pipe_req && io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(req.addr))
127  XSPerfAccumulate("probe_penalty_blocked_by_pipeline", state === s_pipe_req && io.pipe_req.valid && !io.pipe_req.ready)
128}
129
130class ProbeQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents
131{
132  val io = IO(new Bundle {
133    val mem_probe = Flipped(Decoupled(new TLBundleB(edge.bundle)))
134    val pipe_req  = DecoupledIO(new MainPipeReq)
135    val lrsc_locked_block = Input(Valid(UInt()))
136    val update_resv_set = Input(Bool())
137  })
138
139  val pipe_req_arb = Module(new Arbiter(new MainPipeReq, cfg.nProbeEntries))
140
141  // allocate a free entry for incoming request
142  val primary_ready  = Wire(Vec(cfg.nProbeEntries, Bool()))
143  val allocate = primary_ready.asUInt.orR
144  val alloc_idx = PriorityEncoder(primary_ready)
145
146  // translate to inner req
147  val req = Wire(new ProbeReq)
148  val alias_addr_frag = io.mem_probe.bits.data(2, 1) // add extra 2 bits from vaddr to get vindex
149  req.source := io.mem_probe.bits.source
150  req.opcode := io.mem_probe.bits.opcode
151  req.addr := io.mem_probe.bits.address
152  if(DCacheAboveIndexOffset > DCacheTagOffset) {
153    // have alias problem, extra alias bits needed for index
154    req.vaddr := Cat(
155      io.mem_probe.bits.address(PAddrBits - 1, DCacheAboveIndexOffset), // dontcare
156      alias_addr_frag(DCacheAboveIndexOffset - DCacheTagOffset - 1, 0), // index
157      io.mem_probe.bits.address(DCacheTagOffset - 1, 0)                 // index & others
158    )
159  } else { // no alias problem
160    req.vaddr := io.mem_probe.bits.address
161  }
162  req.param := io.mem_probe.bits.param
163  req.needData := io.mem_probe.bits.data(0)
164  req.id := DontCare
165
166  io.mem_probe.ready := allocate
167
168  val entries = (0 until cfg.nProbeEntries) map { i =>
169    val entry = Module(new ProbeEntry)
170    entry.io.id := i.U
171
172    // entry req
173    entry.io.req.valid := (i.U === alloc_idx) && allocate && io.mem_probe.valid
174    primary_ready(i)   := entry.io.req.ready
175    entry.io.req.bits  := req
176
177    // pipe_req
178    pipe_req_arb.io.in(i) <> entry.io.pipe_req
179
180    // pipe_resp
181    entry.io.pipe_resp.valid := io.pipe_req.fire
182    entry.io.pipe_resp.bits.id := io.pipe_req.bits.id
183
184    entry.io.lrsc_locked_block := io.lrsc_locked_block
185
186    entry
187  }
188
189  // delay probe req for 1 cycle
190  val selected_req_valid = RegInit(false.B)
191  val selected_req_bits = RegEnable(pipe_req_arb.io.out.bits, pipe_req_arb.io.out.fire)
192  val selected_lrsc_blocked = Mux(
193    pipe_req_arb.io.out.fire,
194    io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(pipe_req_arb.io.out.bits.addr),
195    io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(selected_req_bits.addr) && selected_req_valid
196  )
197  val resvsetProbeBlock = RegNext(io.update_resv_set || selected_lrsc_blocked)
198  // When we update update_resv_set, block all probe req in the next cycle
199  // It should give Probe reservation set addr compare an independent cycle,
200  // which will lead to better timing
201  pipe_req_arb.io.out.ready := !selected_req_valid || io.pipe_req.fire
202  io.pipe_req.valid := selected_req_valid && !resvsetProbeBlock
203  io.pipe_req.bits := selected_req_bits
204  when(io.pipe_req.fire){
205    selected_req_valid := false.B
206  }
207  when(pipe_req_arb.io.out.fire){
208    selected_req_valid := true.B
209  }
210
211  // print all input/output requests for debug purpose
212  when (io.mem_probe.valid) {
213    // before a probe finishes, L2 should not further issue probes on this block
214    val probe_conflict = VecInit(entries.map(e => e.io.block_addr.valid && get_block(e.io.block_addr.bits) === get_block(io.mem_probe.bits.address))).asUInt.orR
215    assert (!probe_conflict)
216    // for now, we can only deal with ProbeBlock
217    assert (io.mem_probe.bits.opcode === TLMessages.Probe)
218  }
219
220  // debug output
221  when (io.mem_probe.fire) {
222    XSDebug("mem_probe: ")
223    io.mem_probe.bits.dump
224  }
225
226//  when (io.pipe_req.fire) {
227//    io.pipe_req.bits.dump()
228//  }
229
230  when (io.lrsc_locked_block.valid) {
231    XSDebug("lrsc_locked_block: %x\n", io.lrsc_locked_block.bits)
232  }
233  XSPerfAccumulate("ProbeL1DCache", io.mem_probe.fire)
234
235  val perfValidCount = RegNext(PopCount(entries.map(e => e.io.block_addr.valid)))
236  val perfEvents = Seq(
237    ("dcache_probq_req      ", io.pipe_req.fire),
238    ("dcache_probq_1_4_valid", (perfValidCount < (cfg.nProbeEntries.U/4.U))),
239    ("dcache_probq_2_4_valid", (perfValidCount > (cfg.nProbeEntries.U/4.U)) & (perfValidCount <= (cfg.nProbeEntries.U/2.U))),
240    ("dcache_probq_3_4_valid", (perfValidCount > (cfg.nProbeEntries.U/2.U)) & (perfValidCount <= (cfg.nProbeEntries.U*3.U/4.U))),
241    ("dcache_probq_4_4_valid", (perfValidCount > (cfg.nProbeEntries.U*3.U/4.U))),
242  )
243  generatePerfEvent()
244}
245