xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/MainPipe.scala (revision 708ceed4afe43fb0ea3a52407e46b2794c573634)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.tilelink.ClientStates._
23import freechips.rocketchip.tilelink.MemoryOpCategories._
24import freechips.rocketchip.tilelink.TLPermissions._
25import utils._
26import freechips.rocketchip.tilelink.{ClientMetadata, ClientStates, TLPermissions}
27
28class MainPipeReq(implicit p: Parameters) extends DCacheBundle
29{
30  // for request that comes from MissQueue
31  // does this req come from MissQueue
32  val miss = Bool()
33  // which MissQueueEntry send this req?
34  val miss_id = UInt(log2Up(cfg.nMissEntries).W)
35  // what permission are we granted with?
36  val miss_param = UInt(TLPermissions.bdWidth.W)
37  // whether the grant data is dirty
38  val miss_dirty = Bool()
39
40  // for request that comes from MissQueue
41  // does this req come from Probe
42  val probe = Bool()
43  val probe_param = UInt(TLPermissions.bdWidth.W)
44  val probe_need_data = Bool()
45
46  // request info
47  // reqs from MissQueue, Store, AMO use this
48  // probe does not use this
49  val source = UInt(sourceTypeWidth.W)
50  val cmd    = UInt(M_SZ.W)
51  // if dcache size > 32KB, vaddr is also needed for store
52  // vaddr is used to get extra index bits
53  val vaddr  = UInt(VAddrBits.W)
54  // must be aligned to block
55  val addr   = UInt(PAddrBits.W)
56
57  // store
58  val store_data   = UInt((cfg.blockBytes * 8).W)
59  val store_mask   = UInt(cfg.blockBytes.W)
60
61  // which word does amo work on?
62  val word_idx = UInt(log2Up(cfg.blockBytes * 8 / DataBits).W)
63  val amo_data   = UInt(DataBits.W)
64  val amo_mask   = UInt((DataBits/8).W)
65
66  val id     = UInt(reqIdWidth.W)
67
68  def dump() = {
69    XSDebug("MainPipeReq: miss: %b miss_id: %d miss_param: %d probe: %b probe_param: %d source: %d cmd: %d addr: %x store_data: %x store_mask: %x word_idx: %d data: %x mask: %x id: %d\n",
70      miss, miss_id, miss_param, probe, probe_param, source, cmd, addr, store_data, store_mask, word_idx, amo_data, amo_mask, id)
71  }
72}
73
74class MainPipeResp(implicit p: Parameters) extends DCacheBundle
75{
76  val id     = UInt(reqIdWidth.W)
77  // AMO resp data
78  val data   = UInt(DataBits.W)
79  val miss   = Bool()
80  val replay = Bool()
81  def dump() = {
82    XSDebug("MainPipeResp: id: %d data: %x miss: %b replay: %b\n",
83      id, data, miss, replay)
84  }
85}
86
87class MainPipe(implicit p: Parameters) extends DCacheModule {
88  def metaBits = (new L1Metadata).getWidth
89  def encMetaBits = cacheParams.tagCode.width(metaBits)
90
91  val io = IO(new DCacheBundle {
92    // req and resp
93    val req        = Flipped(DecoupledIO(new MainPipeReq))
94    val miss_req   = DecoupledIO(new MissReq)
95    val miss_resp  = ValidIO(new MainPipeResp)
96    val store_resp = ValidIO(new MainPipeResp)
97    val amo_resp   = ValidIO(new MainPipeResp)
98
99    // meta/data read/write
100    val banked_data_read  = DecoupledIO(new L1BankedDataReadLineReq)
101    val banked_data_write = DecoupledIO(new L1BankedDataWriteReq)
102    val banked_data_resp = Input(Vec(DCacheBanks, new L1BankedDataReadResult()))
103
104    val meta_read  = DecoupledIO(new L1MetaReadReq)
105    val meta_resp  = Input(Vec(nWays, UInt(encMetaBits.W)))
106    val meta_write = DecoupledIO(new L1MetaWriteReq)
107
108    // write back
109    val wb_req     = DecoupledIO(new WritebackReq)
110
111    // lrsc locked block should block probe
112    val lrsc_locked_block = Output(Valid(UInt(PAddrBits.W)))
113
114    // update state vec in replacement algo
115    val replace_access = Flipped(Vec(LoadPipelineWidth, ValidIO(new ReplacementAccessBundle)))
116
117    // load fast wakeup should be disabled when data read is not ready
118    val disable_ld_fast_wakeup = Output(Vec(LoadPipelineWidth, Bool()))
119  })
120
121  def getMeta(encMeta: UInt): UInt = {
122    require(encMeta.getWidth == encMetaBits)
123    encMeta(metaBits - 1, 0)
124  }
125
126  // assign default value to output signals
127  io.req.ready := false.B
128  io.miss_req.valid := false.B
129  io.miss_req.bits := DontCare
130  io.miss_resp.valid := false.B
131  io.store_resp.valid := false.B
132  io.amo_resp.valid := false.B
133
134  io.meta_read.valid := false.B
135  io.meta_write.valid := false.B
136  io.meta_write.bits := DontCare
137
138  io.wb_req.valid := false.B
139  io.wb_req.bits := DontCare
140
141  io.lrsc_locked_block.valid := false.B
142  io.lrsc_locked_block.bits := DontCare
143
144  // Pipeline
145  val s1_s0_set_conflict, s2_s0_set_conflict, s3_s0_set_conflict = Wire(Bool())
146  val set_conflict = s1_s0_set_conflict || s2_s0_set_conflict || s3_s0_set_conflict
147  val s1_ready, s2_ready, s3_ready = Wire(Bool())
148  val s3_valid = RegInit(false.B)
149  val update_meta = Wire(Bool())
150
151  // --------------------------------------------------------------------------------
152  // stage 0
153  // read meta
154  val s0_valid = io.req.valid
155  val s0_fire = io.req.fire()
156  val s0_req = io.req.bits
157
158  val bank_write = VecInit((0 until DCacheBanks).map(i => get_mask_of_bank(i, s0_req.store_mask).orR)).asUInt
159  val bank_full_write = VecInit((0 until DCacheBanks).map(i => get_mask_of_bank(i, s0_req.store_mask).andR)).asUInt
160  val banks_full_overwrite = bank_full_write.andR
161
162  val banked_store_rmask = bank_write & ~bank_full_write
163  val banked_full_rmask = ~0.U(DCacheBanks.W)
164  val banked_none_rmask = 0.U(DCacheBanks.W)
165
166  // sanity check
167  when (s0_fire) {
168    OneHot.checkOneHot(Seq(s0_req.miss, s0_req.probe))
169  }
170  assert(!RegNext(s0_fire && s0_req.miss && !banks_full_overwrite), "miss req should full overwrite")
171
172  val meta_ready = io.meta_read.ready
173  val data_ready = io.banked_data_read.ready
174  io.req.ready := meta_ready && !set_conflict && s1_ready //&& !(s3_valid && update_meta)
175
176  io.meta_read.valid := io.req.valid && !set_conflict && s1_ready
177  val meta_read = io.meta_read.bits
178  meta_read.idx := get_idx(s0_req.vaddr)
179  meta_read.way_en := ~0.U(nWays.W)
180  meta_read.tag := DontCare
181
182  // generata rmask here and use it in stage 1
183  // If req comes form MissQueue, it must be a full overwrite,
184  //   but we still need to read data array
185  //   since we may do replacement
186  // If it's a store(not from MissQueue):
187  //   If it's full mask, no need to read data array;
188  //   If it's partial mask, no need to read full masked words.
189  // If it's a AMO(not from MissQueue), only need to read the specific word.
190  // If it's probe, read it all.
191  val miss_need_data = s0_req.miss
192  val banked_store_need_data = !s0_req.miss && !s0_req.probe && s0_req.source === STORE_SOURCE.U && banked_store_rmask.orR
193  val amo_need_data = !s0_req.miss && !s0_req.probe && s0_req.source === AMO_SOURCE.U
194  val probe_need_data = s0_req.probe
195
196  val banked_need_data = miss_need_data || banked_store_need_data || amo_need_data || probe_need_data
197
198  val s0_banked_rmask = Mux(banked_store_need_data, banked_store_rmask,
199    Mux(amo_need_data || probe_need_data || miss_need_data,
200      banked_full_rmask,
201      banked_none_rmask
202    ))
203
204  // generate wmask here and use it in stage 2
205  val banked_store_wmask = bank_write
206  val banked_full_wmask = ~0.U(DCacheBanks.W)
207  val banked_none_wmask = 0.U(DCacheBanks.W)
208
209  dump_pipeline_reqs("MainPipe s0", s0_valid, s0_req)
210
211  // --------------------------------------------------------------------------------
212  // stage 1
213  // read data, get meta, check hit or miss
214  val s1_valid = RegInit(false.B)
215  val s1_need_data = RegEnable(banked_need_data, s0_fire)
216  val s1_fire = s1_valid && s2_ready && (!s1_need_data || io.banked_data_read.ready)
217  val s1_req = RegEnable(s0_req, s0_fire)
218  val s1_set = get_idx(s1_req.vaddr)
219
220  val s1_banked_rmask = RegEnable(s0_banked_rmask, s0_fire)
221  val s1_banked_store_wmask = RegEnable(banked_store_wmask, s0_fire)
222
223  s1_s0_set_conflict := s1_valid && get_idx(s1_req.vaddr) === get_idx(s0_req.vaddr)
224  // assert(!(s1_valid && s1_req.vaddr === 0.U)) // probe vaddr may be 0
225
226  when (s0_fire) {
227    s1_valid := true.B
228  }.elsewhen (s1_fire) {
229    s1_valid := false.B
230  }
231  s1_ready := !s1_valid || s1_fire
232
233  // tag match
234  val ecc_meta_resp = WireInit(VecInit(Seq.fill(nWays)(0.U(encMetaBits.W))))
235  ecc_meta_resp := Mux(RegNext(s0_fire), io.meta_resp, RegNext(ecc_meta_resp))
236  val meta_resp = ecc_meta_resp.map(m => getMeta(m).asTypeOf(new L1Metadata))
237
238
239  def wayMap[T <: Data](f: Int => T) = VecInit((0 until nWays).map(f))
240  val s1_tag_eq_way = wayMap((w: Int) => meta_resp(w).tag === (get_tag(s1_req.addr))).asUInt
241  val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta_resp(w).coh.isValid()).asUInt
242  val s1_tag_match = s1_tag_match_way.orR
243
244  val s1_fake_meta = Wire(new L1Metadata)
245  s1_fake_meta.tag := get_tag(s1_req.addr)
246  s1_fake_meta.coh := ClientMetadata.onReset
247
248  // when there are no tag match, we give it a Fake Meta
249  // this simplifies our logic in s2 stage
250  val s1_hit_meta  = Mux(s1_tag_match, Mux1H(s1_tag_match_way, wayMap((w: Int) => meta_resp(w))), s1_fake_meta)
251  val s1_hit_coh = s1_hit_meta.coh
252
253  // replacement policy
254  val replacer = ReplacementPolicy.fromString(cacheParams.replacer, nWays, nSets)
255  val s1_repl_way_en = WireInit(0.U(nWays.W))
256  s1_repl_way_en := Mux(RegNext(s0_fire), UIntToOH(replacer.way(s1_set)), RegNext(s1_repl_way_en))
257  val s1_repl_meta = Mux1H(s1_repl_way_en, wayMap((w: Int) => meta_resp(w)))
258  val s1_repl_coh = s1_repl_meta.coh
259
260  // only true miss request(not permission miss) need to do replacement
261  // we use repl meta when we really need to a replacement
262  val s1_need_replacement = s1_req.miss && !s1_tag_match
263  val s1_way_en        = Mux(s1_need_replacement, s1_repl_way_en, s1_tag_match_way)
264  val s1_meta          = Mux(s1_need_replacement, s1_repl_meta,   s1_hit_meta)
265  val s1_coh           = Mux(s1_need_replacement, s1_repl_coh,  s1_hit_coh)
266
267  // read data
268  io.banked_data_read.valid := s1_fire && s1_need_data
269  io.banked_data_read.bits.rmask := s1_banked_rmask
270  io.banked_data_read.bits.way_en := s1_way_en
271  io.banked_data_read.bits.addr := s1_req.vaddr
272
273  // tag ecc check
274  (0 until nWays).foreach(w => assert(!(s1_valid && s1_tag_match_way(w) && cacheParams.tagCode.decode(ecc_meta_resp(w)).uncorrectable)))
275
276  dump_pipeline_reqs("MainPipe s1", s1_valid, s1_req)
277
278  // --------------------------------------------------------------------------------
279  // stage 2
280  // select out data
281  // to release timing pressure, we only do data selection in s2
282  // all other stuff, permission checking, write/amo stuff stay in s3
283  // we only change cache internal states(lr/sc counter, tag/data array) in s3
284  val s2_valid = RegInit(false.B)
285  val s2_fire = s2_valid && s3_ready
286  val s2_req = RegEnable(s1_req, s1_fire)
287  s2_ready := !s2_valid || s2_fire
288
289  val s2_banked_store_wmask = RegEnable(s1_banked_store_wmask, s1_fire)
290
291  s2_s0_set_conflict := s2_valid && get_idx(s2_req.vaddr) === get_idx(s0_req.vaddr)
292
293  when (s1_fire) { s2_valid := true.B }
294  .elsewhen(s2_fire) { s2_valid := false.B }
295
296  val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_fire)
297  val s2_tag_match = RegEnable(s1_tag_match, s1_fire)
298  val s2_hit_meta = RegEnable(s1_hit_meta, s1_fire)
299  val s2_hit_coh = RegEnable(s1_hit_coh, s1_fire)
300  val s2_has_permission = s2_hit_coh.onAccess(s2_req.cmd)._1
301  val s2_new_hit_coh = s2_hit_coh.onAccess(s2_req.cmd)._3
302
303  val s2_repl_meta = RegEnable(s1_repl_meta, s1_fire)
304  val s2_repl_coh = s2_repl_meta.coh
305  val s2_repl_way_en = RegEnable(s1_repl_way_en, s1_fire)
306
307  val s2_need_replacement = RegEnable(s1_need_replacement, s1_fire)
308  val s2_way_en = RegEnable(s1_way_en, s1_fire)
309  val s2_meta = RegEnable(s1_meta, s1_fire)
310  val s2_coh = s2_meta.coh
311
312  // we will treat it as a hit
313  // if we need to update meta from Trunk to Dirty
314  // go update it
315  val s2_hit = s2_tag_match && s2_has_permission
316  val s2_amo_hit = s2_hit && !s2_req.miss && !s2_req.probe && s2_req.source === AMO_SOURCE.U
317
318  when (s2_valid) {
319    XSDebug("MainPipe: s2 s2_tag_match: %b s2_has_permission: %b s2_hit: %b s2_need_replacement: %b s2_way_en: %x s2_state: %d\n",
320      s2_tag_match, s2_has_permission, s2_hit, s2_need_replacement, s2_way_en, s2_coh.state)
321  }
322
323  val banked_data_resp = Wire(io.banked_data_resp.cloneType)
324  banked_data_resp := Mux(RegNext(s1_fire), io.banked_data_resp, RegNext(banked_data_resp))
325
326  // generate write data
327  val s2_store_data_merged = Wire(Vec(DCacheBanks, UInt(DCacheSRAMRowBits.W)))
328
329  def mergePutData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = {
330    val full_wmask = FillInterleaved(8, wmask)
331    ((~full_wmask & old_data) | (full_wmask & new_data))
332  }
333
334  val s2_data = WireInit(VecInit((0 until DCacheBanks).map(i => {
335    val decoded = cacheParams.dataCode.decode(banked_data_resp(i).asECCData())
336    // assert(!RegNext(s2_valid && s2_hit && decoded.uncorrectable))
337    // TODO: trigger ecc error
338    banked_data_resp(i).raw_data
339  })))
340
341  for (i <- 0 until DCacheBanks) {
342    val old_data = s2_data(i)
343    val new_data = get_data_of_bank(i, s2_req.store_data)
344    // for amo hit, we should use read out SRAM data
345    // do not merge with store data
346    val wmask = Mux(s2_amo_hit, 0.U(wordBytes.W), get_mask_of_bank(i, s2_req.store_mask))
347    s2_store_data_merged(i) := mergePutData(old_data, new_data, wmask)
348  }
349
350  // AMO hits
351
352  val s2_data_word = s2_store_data_merged(s2_req.word_idx)
353
354  dump_pipeline_reqs("MainPipe s2", s2_valid, s2_req)
355
356  // --------------------------------------------------------------------------------
357  // stage 3
358  // do permission checking, write/amo stuff in s3
359  // we only change cache internal states(lr/sc counter, tag/data array) in s3
360  val s3_fire = Wire(Bool())
361  val s3_req = RegEnable(s2_req, s2_fire)
362  s3_ready := !s3_valid || s3_fire
363
364  val s3_banked_store_wmask = RegEnable(s2_banked_store_wmask, s2_fire)
365
366  val s3_data_word = RegEnable(s2_data_word, s2_fire)
367  val s3_store_data_merged = RegEnable(s2_store_data_merged, s2_fire)
368  val s3_data = RegEnable(s2_data, s2_fire)
369
370  s3_s0_set_conflict := s3_valid && get_idx(s3_req.vaddr) === get_idx(s0_req.vaddr)
371
372  when (s2_fire) { s3_valid := true.B }
373  .elsewhen (s3_fire) { s3_valid := false.B }
374
375  val s3_tag_match_way = RegEnable(s2_tag_match_way, s2_fire)
376  val s3_tag_match = RegEnable(s2_tag_match, s2_fire)
377  val s3_hit_meta = RegEnable(s2_hit_meta, s2_fire)
378  val s3_hit_coh = RegEnable(s2_hit_coh, s2_fire)
379  val s3_has_permission = s3_hit_coh.onAccess(s3_req.cmd)._1
380  val s3_new_hit_coh = s3_hit_coh.onAccess(s3_req.cmd)._3
381
382  val s3_repl_meta = RegEnable(s2_repl_meta, s2_fire)
383  val s3_repl_coh = s3_repl_meta.coh
384  val s3_repl_way_en = RegEnable(s2_repl_way_en, s2_fire)
385
386  val s3_need_replacement = RegEnable(s2_need_replacement, s2_fire)
387  val s3_way_en = RegEnable(s2_way_en, s2_fire)
388  val s3_meta = RegEnable(s2_meta, s2_fire)
389  val s3_coh = s3_meta.coh
390
391  // --------------------------------------------------------------------------------
392  // Permission checking
393  def missCohGen(cmd: UInt, param: UInt, dirty: Bool) = {
394    val c = categorize(cmd)
395    MuxLookup(Cat(c, param, dirty), Nothing, Seq(
396      //(effect param) -> (next)
397      Cat(rd, toB, false.B)  -> Branch,
398      Cat(rd, toB, true.B)   -> Branch,
399      Cat(rd, toT, false.B)  -> Trunk,
400      Cat(rd, toT, true.B)   -> Dirty,
401      Cat(wi, toT, false.B)  -> Trunk,
402      Cat(wi, toT, true.B)   -> Dirty,
403      Cat(wr, toT, false.B)  -> Dirty,
404      Cat(wr, toT, true.B)   -> Dirty))
405  }
406  val miss_new_coh = ClientMetadata(missCohGen(s3_req.cmd, s3_req.miss_param, s3_req.miss_dirty))
407  assert(!RegNext(s3_valid && s3_req.miss && s3_req.miss_param === toB && s3_req.miss_dirty))
408  assert(!RegNext(s3_valid && s3_req.miss && !miss_new_coh.isValid()))
409  assert(!RegNext(s3_valid && s3_req.miss && s3_tag_match && !(s3_hit_coh.state < miss_new_coh.state)))
410
411  // Determine what state to go to based on Probe param
412  val (probe_has_dirty_data, probe_shrink_param, probe_new_coh) = s3_coh.onProbe(s3_req.probe_param)
413
414  // as long as we has permission
415  // we will treat it as a hit
416  // if we need to update meta from Trunk to Dirty
417  // go update it
418  val s3_hit = s3_tag_match && s3_has_permission
419  val s3_store_hit = s3_hit && !s3_req.miss && !s3_req.probe && s3_req.source === STORE_SOURCE.U
420  val s3_amo_hit = s3_hit && !s3_req.miss && !s3_req.probe && s3_req.source === AMO_SOURCE.U
421
422  when (s3_valid) {
423    XSDebug("MainPipe: s3 s3_tag_match: %b s3_has_permission: %b s3_hit: %b s3_need_replacement: %b s3_way_en: %x s3_state: %d\n",
424      s3_tag_match, s3_has_permission, s3_hit, s3_need_replacement, s3_way_en, s3_coh.state)
425  }
426
427  dump_pipeline_reqs("MainPipe s3", s3_valid, s3_req)
428
429  // --------------------------------------------------------------------------------
430  // Write to MetaArray
431  // miss should always update meta
432  // store only update meta when it hits and needs to update Trunk to Dirty
433  val miss_update_meta = s3_req.miss
434  val probe_update_meta = s3_req.probe && s3_tag_match && s3_coh =/= probe_new_coh
435  val store_update_meta = s3_store_hit && s3_hit_coh =/= s3_new_hit_coh
436  val amo_update_meta = s3_amo_hit && s3_hit_coh =/= s3_new_hit_coh
437  update_meta := miss_update_meta || probe_update_meta || store_update_meta || amo_update_meta
438
439  val new_coh = Mux(miss_update_meta, miss_new_coh,
440    Mux(probe_update_meta, probe_new_coh,
441    Mux(store_update_meta || amo_update_meta, s3_new_hit_coh, ClientMetadata.onReset)))
442
443  io.meta_write.valid := s3_fire && update_meta
444  io.meta_write.bits.idx := get_idx(s3_req.vaddr)
445  io.meta_write.bits.way_en := s3_way_en
446  io.meta_write.bits.data.tag := get_tag(s3_req.addr)
447  io.meta_write.bits.data.coh := new_coh
448
449  // --------------------------------------------------------------------------------
450  // LR, SC and AMO
451  val debug_sc_fail_addr = RegInit(0.U)
452  val debug_sc_fail_cnt  = RegInit(0.U(8.W))
453
454  val lrsc_count = RegInit(0.U(log2Ceil(lrscCycles).W))
455  val lrsc_valid = lrsc_count > lrscBackoff.U
456  val lrsc_addr  = Reg(UInt())
457  val s3_lr = !s3_req.probe && s3_req.source === AMO_SOURCE.U && s3_req.cmd === M_XLR
458  val s3_sc = !s3_req.probe && s3_req.source === AMO_SOURCE.U && s3_req.cmd === M_XSC
459  val s3_lrsc_addr_match = lrsc_valid && lrsc_addr === get_block_addr(s3_req.addr)
460  val s3_sc_fail = s3_sc && !s3_lrsc_addr_match
461  val s3_sc_resp = Mux(s3_sc_fail, 1.U, 0.U)
462
463  val s3_can_do_amo = (s3_req.miss && !s3_req.probe && s3_req.source === AMO_SOURCE.U) || s3_amo_hit
464  val s3_can_do_amo_write = s3_can_do_amo && isWrite(s3_req.cmd) && !s3_sc_fail
465  when (s3_valid && (s3_lr || s3_sc)) {
466    when (s3_can_do_amo && s3_lr) {
467      lrsc_count := (lrscCycles - 1).U
468      lrsc_addr := get_block_addr(s3_req.addr)
469    } .otherwise {
470      lrsc_count := 0.U
471    }
472  } .elsewhen (lrsc_count > 0.U) {
473    lrsc_count := lrsc_count - 1.U
474  }
475
476  io.lrsc_locked_block.valid := lrsc_valid
477  io.lrsc_locked_block.bits  := lrsc_addr
478
479  // when we release this block,
480  // we invalidate this reservation set
481  when (io.wb_req.fire()) {
482    when (io.wb_req.bits.addr === lrsc_addr) {
483      lrsc_count := 0.U
484    }
485  }
486
487  when (s3_valid) {
488    when (s3_req.addr === debug_sc_fail_addr) {
489      when (s3_sc_fail) {
490        debug_sc_fail_cnt := debug_sc_fail_cnt + 1.U
491      } .elsewhen (s3_sc) {
492        debug_sc_fail_cnt := 0.U
493      }
494    } .otherwise {
495      when (s3_sc_fail) {
496        debug_sc_fail_addr := s3_req.addr
497        debug_sc_fail_cnt  := 1.U
498      }
499    }
500  }
501  assert(debug_sc_fail_cnt < 100.U, "L1DCache failed too many SCs in a row")
502
503  // --------------------------------------------------------------------------------
504  // Write to DataArray
505  // Miss:
506  //   1. not store and not amo, data: store_data mask: store_mask(full_mask)
507  //   2. store, data: store_data mask: store_mask(full_mask)
508  //   3. amo, data: merge(store_data, amo_data, amo_mask) mask: store_mask(full_mask)
509  //
510  // Probe: do not write data, DontCare
511  // Store hit: data: merge(s3_data, store_data, store_mask) mask: store_mask
512  // AMO hit: data: merge(s3_data, amo_data, amo_mask) mask: store_mask
513  // so we can first generate store data and then merge with amo_data
514
515  // generate write mask
516  // which word do we need to write
517  val banked_amo_wmask = UIntToOH(s3_req.word_idx)
518  val banked_wmask = Mux(s3_req.miss, banked_full_wmask,
519    Mux(s3_store_hit, s3_banked_store_wmask,
520    Mux(s3_can_do_amo_write, banked_amo_wmask,
521      banked_none_wmask)))
522  val banked_need_write_data = VecInit(banked_wmask.orR).asUInt.orR
523
524  // generate write data
525  // AMO hits
526  val amoalu   = Module(new AMOALU(wordBits))
527  amoalu.io.mask := s3_req.amo_mask
528  amoalu.io.cmd  := s3_req.cmd
529  amoalu.io.lhs  := s3_data_word
530  amoalu.io.rhs  := s3_req.amo_data
531
532  // merge amo write data
533  val s3_amo_data_merged = Wire(Vec(DCacheBanks, UInt(DCacheSRAMRowBits.W)))
534  for (i <- 0 until DCacheBanks) {
535    val old_data = s3_store_data_merged(i)
536    val new_data = amoalu.io.out
537    val wmask = Mux(s3_can_do_amo_write && s3_req.word_idx === i.U,
538      ~0.U(wordBytes.W), 0.U(wordBytes.W))
539    s3_amo_data_merged(i) := mergePutData(old_data, new_data, wmask)
540  }
541
542  io.banked_data_write.valid := s3_fire && banked_need_write_data
543  io.banked_data_write.bits.way_en := s3_way_en
544  io.banked_data_write.bits.addr := s3_req.vaddr
545  io.banked_data_write.bits.wmask := banked_wmask
546  io.banked_data_write.bits.data := s3_amo_data_merged.asUInt.asTypeOf(io.banked_data_write.bits.data.cloneType)
547
548  // --------------------------------------------------------------------------------
549  // Writeback
550  // whether we need to write back a block
551  // TODO: add support for ProbePerm
552  // Now, we only deal with ProbeBlock
553  val miss_writeback = s3_need_replacement && s3_coh.state =/= ClientStates.Nothing
554  val probe_writeback = s3_req.probe
555  val need_writeback  = miss_writeback || probe_writeback
556
557  val (_, miss_shrink_param, _) = s3_coh.onCacheControl(M_FLUSH)
558  val writeback_param = Mux(miss_writeback, miss_shrink_param, probe_shrink_param)
559
560  val writeback_data = s3_tag_match && s3_req.probe && s3_req.probe_need_data ||
561    s3_coh === ClientStates.Dirty || miss_writeback && s3_coh.state =/= ClientStates.Nothing
562
563  val writeback_paddr = Cat(s3_meta.tag, get_untag(s3_req.vaddr))
564
565  val wb_req = io.wb_req.bits
566  io.wb_req.valid := s3_fire && need_writeback
567  wb_req.addr := get_block_addr(writeback_paddr)
568  wb_req.param := writeback_param
569  wb_req.voluntary := miss_writeback
570  wb_req.hasData := writeback_data
571  wb_req.data := s3_data.asUInt
572  wb_req.dirty := s3_coh === ClientStates.Dirty
573
574  // for write has higher priority than read, meta/data array ready is not needed
575  s3_fire := s3_valid && (!need_writeback || io.wb_req.ready)/* &&
576                         (!update_meta || io.meta_write.ready) &&
577                         (!need_write_data || io.debug_data_write.ready)*/
578
579  // Technically, load fast wakeup should be disabled when debug_data_write.valid is true,
580  // but for timing purpose, we loose the condition to s3_valid, ignoring whether wb is ready or not.
581  for (i <- 0 until (LoadPipelineWidth - 1)) {
582    io.disable_ld_fast_wakeup(i) := banked_need_write_data && s3_valid
583  }
584  io.disable_ld_fast_wakeup(LoadPipelineWidth - 1) := banked_need_write_data && s3_valid || s1_need_data && s1_valid
585
586  // --------------------------------------------------------------------------------
587  // update replacement policy
588  val access_bundle = Wire(ValidIO(new ReplacementAccessBundle))
589  access_bundle.valid := RegNext(s3_fire && (update_meta || banked_need_write_data))
590  access_bundle.bits.set := RegNext(get_idx(s3_req.vaddr))
591  access_bundle.bits.way := RegNext(OHToUInt(s3_way_en))
592  val access_bundles = io.replace_access.toSeq ++ Seq(access_bundle)
593  val sets = access_bundles.map(_.bits.set)
594  val touch_ways = Seq.fill(LoadPipelineWidth + 1)(Wire(ValidIO(UInt(log2Up(nWays).W))))
595  (touch_ways zip access_bundles).map{ case (w, access) =>
596    w.valid := access.valid
597    w.bits := access.bits.way
598  }
599  replacer.access(sets, touch_ways)
600
601  // --------------------------------------------------------------------------------
602  // send store/amo miss to miss queue
603  val store_amo_miss = !s3_req.miss && !s3_req.probe && !s3_hit && (s3_req.source === STORE_SOURCE.U || s3_req.source === AMO_SOURCE.U)
604  io.miss_req.valid := s3_fire && store_amo_miss
605  io.miss_req.bits.source := s3_req.source
606  io.miss_req.bits.cmd := s3_req.cmd
607  io.miss_req.bits.addr := s3_req.addr
608  io.miss_req.bits.vaddr := s3_req.vaddr
609  io.miss_req.bits.store_data := s3_req.store_data
610  io.miss_req.bits.store_mask := s3_req.store_mask
611  io.miss_req.bits.word_idx := s3_req.word_idx
612  io.miss_req.bits.amo_data := s3_req.amo_data
613  io.miss_req.bits.amo_mask := s3_req.amo_mask
614  io.miss_req.bits.coh := s3_coh
615  io.miss_req.bits.id := s3_req.id
616
617  // --------------------------------------------------------------------------------
618  // send response
619  val resp = Wire(new MainPipeResp)
620  resp.data := DontCare
621  resp.id := s3_req.id
622  resp.miss := store_amo_miss
623  resp.replay := io.miss_req.valid && !io.miss_req.ready
624
625  io.miss_resp.valid := s3_fire && s3_req.miss
626  io.miss_resp.bits := resp
627  io.miss_resp.bits.id := s3_req.miss_id
628
629  io.store_resp.valid := s3_fire && s3_req.source === STORE_SOURCE.U
630  io.store_resp.bits := resp
631
632  io.amo_resp.valid := s3_fire && s3_req.source === AMO_SOURCE.U
633  io.amo_resp.bits := resp
634  io.amo_resp.bits.data := Mux(s3_sc, s3_sc_resp, s3_data_word)
635  // reuse this field to pass lr sc valid to commit
636  // nemu use this to see whether lr sc counter is still valid
637  io.amo_resp.bits.id   := lrsc_valid
638
639  when (io.req.fire()) {
640    io.req.bits.dump()
641  }
642
643  when (io.miss_req.fire()) {
644    io.miss_req.bits.dump()
645  }
646
647  when (io.miss_resp.fire()) {
648    io.miss_resp.bits.dump()
649  }
650
651  when (io.store_resp.fire()) {
652    io.store_resp.bits.dump()
653  }
654
655  when (io.amo_resp.fire()) {
656    io.amo_resp.bits.dump()
657  }
658
659  when (io.wb_req.fire()) {
660    io.wb_req.bits.dump()
661  }
662
663  when (io.lrsc_locked_block.valid) {
664    XSDebug("lrsc_locked_block: %x\n", io.lrsc_locked_block.bits)
665  }
666
667  // -------
668  // Debug logging functions
669  def dump_pipeline_reqs(pipeline_stage_name: String, valid: Bool, req: MainPipeReq) = {
670    when (valid) {
671      XSDebug(s"$pipeline_stage_name ")
672      req.dump()
673    }
674  }
675
676  // performance counters
677  // penalty for each req in pipeline in average = pipe_total_penalty / pipe_req
678  XSPerfAccumulate("pipe_req", s0_fire)
679  XSPerfAccumulate("pipe_total_penalty", PopCount(VecInit(Seq(s0_fire, s1_valid, s2_valid, s3_valid))))
680
681  XSPerfAccumulate("pipe_blocked_by_wbu", s3_valid && need_writeback && !io.wb_req.ready)
682  XSPerfAccumulate("pipe_blocked_by_nack_data", s1_valid && s1_need_data && !io.banked_data_read.ready)
683  XSPerfAccumulate("pipe_reject_req_for_nack_meta", s0_valid && !meta_ready)
684  XSPerfAccumulate("pipe_reject_req_for_set_conflict", s0_valid && set_conflict)
685
686  for (i <- 0 until LoadPipelineWidth) {
687    for (w <- 0 until nWays) {
688      XSPerfAccumulate("load_pipe_" + Integer.toString(i,10) + "_access_way_" + Integer.toString(w, 10),
689        io.replace_access(i).valid && io.replace_access(i).bits.way === w.U)
690    }
691  }
692
693  for (w <- 0 until nWays) {
694    XSPerfAccumulate("main_pipe_access_way_" + Integer.toString(w, 10),
695      access_bundle.valid && access_bundle.bits.way === w.U)
696    XSPerfAccumulate("main_pipe_choose_way_" + Integer.toString(w, 10),
697      RegNext(s0_fire) && s1_repl_way_en === UIntToOH(w.U))
698  }
699
700}
701