xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala (revision 42b75a597e916f6a6887cb8bc626483d0d2645dd)
1/***************************************************************************************
2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4* Copyright (c) 2020-2021 Peng Cheng Laboratory
5*
6* XiangShan is licensed under Mulan PSL v2.
7* You can use this software according to the terms and conditions of the Mulan PSL v2.
8* You may obtain a copy of Mulan PSL v2 at:
9*          http://license.coscl.org.cn/MulanPSL2
10*
11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14*
15* See the Mulan PSL v2 for more details.
16*
17*
18* Acknowledgement
19*
20* This implementation is inspired by several key papers:
21* [1] Gurindar S. Sohi, and Manoj Franklin. "[High-bandwidth data memory systems for superscalar processors.]
22* (https://doi.org/10.1145/106972.106980)" 4th International Conference on Architectural Support for Programming
23* Languages and Operating Systems (ASPLOS). 1991.
24***************************************************************************************/
25
26package xiangshan.cache
27
28import org.chipsalliance.cde.config.Parameters
29import chisel3._
30import utils._
31import utility._
32import utility.sram.SRAMTemplate
33import chisel3.util._
34import utility.mbist.MbistPipeline
35import xiangshan.mem.LqPtr
36import xiangshan.{L1CacheErrorInfo, XSCoreParamsKey}
37
38import scala.math.max
39
40class BankConflictDB(implicit p: Parameters) extends DCacheBundle{
41  val addr = Vec(LoadPipelineWidth, Bits(PAddrBits.W))
42  val set_index = Vec(LoadPipelineWidth, UInt((DCacheAboveIndexOffset - DCacheSetOffset).W))
43  val bank_index = Vec(VLEN/DCacheSRAMRowBits, UInt((DCacheSetOffset - DCacheBankOffset).W))
44  val way_index = UInt(wayBits.W)
45  val fake_rr_bank_conflict = Bool()
46}
47
48class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle
49{
50  val way_en = Bits(DCacheWays.W)
51  val addr = Bits(PAddrBits.W)
52}
53
54class L1BankedDataReadReqWithMask(implicit p: Parameters) extends DCacheBundle
55{
56  val way_en = Bits(DCacheWays.W)
57  val addr = Bits(PAddrBits.W)
58  val addr_dup = Bits(PAddrBits.W)
59  val bankMask = Bits(DCacheBanks.W)
60  val kill = Bool()
61  val lqIdx = new LqPtr
62}
63
64class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq
65{
66  val rmask = Bits(DCacheBanks.W)
67}
68
69// Now, we can write a cache-block in a single cycle
70class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq
71{
72  val wmask = Bits(DCacheBanks.W)
73  val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W))
74}
75
76// cache-block write request without data
77class L1BankedDataWriteReqCtrl(implicit p: Parameters) extends L1BankedDataReadReq
78
79class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle
80{
81  // you can choose which bank to read to save power
82  val ecc = Bits(dataECCBits.W)
83  val raw_data = Bits(DCacheSRAMRowBits.W)
84  val error_delayed = Bool() // 1 cycle later than data resp
85
86  def asECCData() = {
87    Cat(ecc, raw_data)
88  }
89}
90
91class DataSRAMBankWriteReq(implicit p: Parameters) extends DCacheBundle {
92  val en = Bool()
93  val addr = UInt()
94  val way_en = UInt(DCacheWays.W)
95  val data = UInt(encDataBits.W)
96}
97
98// wrap a sram
99class DataSRAM(bankIdx: Int, wayIdx: Int)(implicit p: Parameters) extends DCacheModule {
100  val io = IO(new Bundle() {
101    val w = new Bundle() {
102      val en = Input(Bool())
103      val addr = Input(UInt())
104      val data = Input(UInt(encDataBits.W))
105    }
106
107    val r = new Bundle() {
108      val en = Input(Bool())
109      val addr = Input(UInt())
110      val data = Output(UInt(encDataBits.W))
111    }
112  })
113
114  // data sram
115  val data_sram = Module(new SRAMTemplate(
116    Bits(encDataBits.W),
117    set = DCacheSets / DCacheSetDiv,
118    way = 1,
119    shouldReset = false,
120    holdRead = false,
121    singlePort = true,
122    hasMbist = hasMbist
123  ))
124
125  data_sram.io.w.req.valid := io.w.en
126  data_sram.io.w.req.bits.apply(
127    setIdx = io.w.addr,
128    data = io.w.data,
129    waymask = 1.U
130  )
131  data_sram.io.r.req.valid := io.r.en
132  data_sram.io.r.req.bits.apply(setIdx = io.r.addr)
133  io.r.data := data_sram.io.r.resp.data(0)
134  XSPerfAccumulate("part_data_read_counter", data_sram.io.r.req.valid)
135
136  def dump_r() = {
137    XSDebug(RegNext(io.r.en),
138      "bank read set %x bank %x way %x data %x\n",
139      RegEnable(io.r.addr, io.r.en),
140      bankIdx.U,
141      wayIdx.U,
142      io.r.data
143    )
144  }
145
146  def dump_w() = {
147    XSDebug(io.w.en,
148      "bank write set %x bank %x way %x data %x\n",
149      io.w.addr,
150      bankIdx.U,
151      wayIdx.U,
152      io.w.data
153    )
154  }
155
156  def dump() = {
157    dump_w()
158    dump_r()
159  }
160}
161
162// wrap data rows of 8 ways
163class DataSRAMBank(index: Int)(implicit p: Parameters) extends DCacheModule {
164  val io = IO(new Bundle() {
165    val w = Input(new DataSRAMBankWriteReq)
166
167    val r = new Bundle() {
168      val en = Input(Bool())
169      val addr = Input(UInt())
170      val data = Output(Vec(DCacheWays, UInt(encDataBits.W)))
171    }
172  })
173
174  assert(RegNext(!io.w.en || PopCount(io.w.way_en) <= 1.U))
175
176  // external controls do not read and write at the same time
177  val w_info = io.w
178  // val rw_bypass = RegNext(io.w.addr === io.r.addr && io.w.way_en === io.r.way_en && io.w.en)
179
180  // multiway data bank
181  val data_bank = Seq.fill(DCacheWays) {
182    Module(new SRAMTemplate(
183      Bits(encDataBits.W),
184      set = DCacheSets / DCacheSetDiv,
185      way = 1,
186      shouldReset = false,
187      holdRead = false,
188      singlePort = true,
189      withClockGate = true,
190      hasMbist = hasMbist
191    ))
192  }
193
194  for (w <- 0 until DCacheWays) {
195    val wen = w_info.en && w_info.way_en(w)
196    data_bank(w).io.w.req.valid := wen
197    data_bank(w).io.w.req.bits.apply(
198      setIdx = w_info.addr,
199      data = w_info.data,
200      waymask = 1.U
201    )
202    data_bank(w).io.r.req.valid := io.r.en
203    data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr)
204  }
205  XSPerfAccumulate("part_data_read_counter", PopCount(Cat(data_bank.map(_.io.r.req.valid))))
206
207  io.r.data := data_bank.map(_.io.r.resp.data(0))
208
209  def dump_r() = {
210    XSDebug(RegNext(io.r.en),
211      "bank read addr %x data %x\n",
212      RegEnable(io.r.addr, io.r.en),
213      io.r.data.asUInt
214    )
215  }
216
217  def dump_w() = {
218    XSDebug(io.w.en,
219      "bank write addr %x way_en %x data %x\n",
220      io.w.addr,
221      io.w.way_en,
222      io.w.data
223    )
224  }
225
226  def dump() = {
227    dump_w()
228    dump_r()
229  }
230}
231
232case object HasDataEccParam
233
234//                     Banked DCache Data
235// -----------------------------------------------------------------
236// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 |
237// -----------------------------------------------------------------
238// | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  |
239// | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  |
240// | ....  | ....  | ....  | ....  | ....  | ....  | ....  | ....  |
241// -----------------------------------------------------------------
242abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule
243{
244  val DataEccParam = if(EnableDataEcc) Some(HasDataEccParam) else None
245  val ReadlinePortErrorIndex = LoadPipelineWidth
246  val io = IO(new DCacheBundle {
247    // load pipeline read word req
248    val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReqWithMask)))
249    val is128Req = Input(Vec(LoadPipelineWidth, Bool()))
250    // main pipeline read / write line req
251    val readline_intend = Input(Bool())
252    val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq))
253    val write = Flipped(DecoupledIO(new L1BankedDataWriteReq))
254    val write_dup = Vec(DCacheBanks, Flipped(Decoupled(new L1BankedDataWriteReqCtrl)))
255    // data for readline and loadpipe
256    val readline_resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult()))
257    val readline_error_delayed = Output(Bool())
258    val read_resp          = Output(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, new L1BankedDataReadResult())))
259    val read_error_delayed = Output(Vec(LoadPipelineWidth,Vec(VLEN/DCacheSRAMRowBits, Bool())))
260    // val nacks = Output(Vec(LoadPipelineWidth, Bool()))
261    // val errors = Output(Vec(LoadPipelineWidth + 1, ValidIO(new L1CacheErrorInfo))) // read ports + readline port
262    // when bank_conflict, read (1) port should be ignored
263    val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool()))
264    val disable_ld_fast_wakeup = Output(Vec(LoadPipelineWidth, Bool()))
265    val pseudo_error = Flipped(DecoupledIO(Vec(DCacheBanks, new CtrlUnitSignalingBundle)))
266  })
267
268  // bank (0, 1, 2, 3) each way use duplicate addr
269  def DuplicatedQueryBankSeq = Seq(0, 1, 2, 3)
270
271  def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f))
272
273  def getECCFromEncWord(encWord: UInt) = {
274    if (EnableDataEcc) {
275      require(encWord.getWidth == encDataBits, s"encDataBits=$encDataBits != encDataBits=$encDataBits!")
276      encWord(encDataBits-1, DCacheSRAMRowBits)
277    } else {
278      0.U
279    }
280  }
281
282  def getDataFromEncWord(encWord: UInt) = {
283    encWord(DCacheSRAMRowBits-1, 0)
284  }
285
286  def asECCData(ecc: UInt, data: UInt) = {
287    if (EnableDataEcc) {
288      Cat(ecc, data)
289    } else {
290      data
291    }
292  }
293
294  def dumpRead = {
295    (0 until LoadPipelineWidth) map { w =>
296      XSDebug(io.read(w).valid,
297        s"DataArray Read channel: $w valid way_en: %x addr: %x\n",
298        io.read(w).bits.way_en, io.read(w).bits.addr)
299    }
300    XSDebug(io.readline.valid,
301      s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n",
302      io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask)
303  }
304
305  def dumpWrite = {
306    XSDebug(io.write.valid,
307      s"DataArray Write valid way_en: %x addr: %x\n",
308      io.write.bits.way_en, io.write.bits.addr)
309
310    (0 until DCacheBanks) map { r =>
311      XSDebug(io.write.valid,
312        s"cycle: $r data: %x wmask: %x\n",
313        io.write.bits.data(r), io.write.bits.wmask(r))
314    }
315  }
316
317  def dumpResp = {
318    XSDebug(s"DataArray ReadeResp channel:\n")
319    (0 until LoadPipelineWidth) map { r =>
320      XSDebug(s"cycle: $r data: %x\n", Mux(io.is128Req(r),
321        Cat(io.read_resp(r)(1).raw_data,io.read_resp(r)(0).raw_data),
322        io.read_resp(r)(0).raw_data))
323    }
324  }
325
326  def dump() = {
327    dumpRead
328    dumpWrite
329    dumpResp
330  }
331
332  def selcetOldestPort(valid: Seq[Bool], bits: Seq[LqPtr], index: Seq[UInt]):((Bool, LqPtr), UInt) = {
333    require(valid.length == bits.length &&  bits.length == index.length, s"length must eq, valid:${valid.length}, bits:${bits.length}, index:${index.length}")
334    ParallelOperation(valid zip bits zip index,
335      (a: ((Bool, LqPtr), UInt), b: ((Bool, LqPtr), UInt)) => {
336        val au = a._1._2
337        val bu = b._1._2
338        val aValid = a._1._1
339        val bValid = b._1._1
340        val bSel = au > bu
341        val bits = Mux(
342          aValid && bValid,
343          Mux(bSel, b._1._2, a._1._2),
344          Mux(aValid && !bValid, a._1._2, b._1._2)
345        )
346        val idx = Mux(
347          aValid && bValid,
348          Mux(bSel, b._2, a._2),
349          Mux(aValid && !bValid, a._2, b._2)
350        )
351        ((aValid || bValid, bits), idx)
352      }
353    )
354  }
355
356}
357
358// the smallest access unit is sram
359class SramedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
360  println("  DCacheType: SramedDataArray")
361  val ReduceReadlineConflict = false
362
363  io.write.ready := true.B
364  io.write_dup.foreach(_.ready := true.B)
365
366  val data_banks = List.tabulate(DCacheSetDiv)( k => {
367    val banks = List.tabulate(DCacheBanks)(i => List.tabulate(DCacheWays)(j => Module(new DataSRAM(i,j))))
368    val mbistPl = MbistPipeline.PlaceMbistPipeline(1, s"MbistPipeDataSet$k", hasMbist)
369    banks
370  })
371  data_banks.map(_.map(_.map(_.dump())))
372
373  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
374  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
375  val div_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
376  val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
377
378  val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr)
379  val line_div_addr = addr_to_dcache_div(io.readline.bits.addr)
380  // when WPU is enabled, line_way_en is all enabled when read data
381  val line_way_en = Fill(DCacheWays, 1.U) // val line_way_en = io.readline.bits.way_en
382  val line_way_en_reg = RegEnable(io.readline.bits.way_en, 0.U(DCacheWays.W),io.readline.valid)
383
384  val write_bank_mask_reg = RegEnable(io.write.bits.wmask, 0.U(DCacheBanks.W), io.write.valid)
385  val write_data_reg = RegEnable(io.write.bits.data, io.write.valid)
386  val write_valid_reg = RegNext(io.write.valid)
387  val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid))
388  val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, 0.U(DCacheWays.W), x.valid))
389  val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid))
390  val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid))
391
392  // read data_banks and ecc_banks
393  // for single port SRAM, do not allow read and write in the same cycle
394  val rrhazard = false.B // io.readline.valid
395  (0 until LoadPipelineWidth).map(rport_index => {
396    div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr)
397    set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr)
398    bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
399    bank_addrs(rport_index)(1) := bank_addrs(rport_index)(0) + 1.U
400
401    // use way_en to select a way after data read out
402    assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
403    way_en(rport_index) := io.read(rport_index).bits.way_en
404  })
405
406  // read conflict
407  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y => {
408    if (x == y) {
409      false.B
410    } else {
411      io.read(x).valid && io.read(y).valid &&
412        div_addrs(x) === div_addrs(y) &&
413        (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U &&
414        io.read(x).bits.way_en === io.read(y).bits.way_en &&
415        set_addrs(x) =/= set_addrs(y)
416    }
417  }))
418  val load_req_with_bank_conflict = rr_bank_conflict.map(_.reduce(_ || _))
419  val load_req_valid = io.read.map(_.valid)
420  val load_req_lqIdx = io.read.map(_.bits.lqIdx)
421  val load_req_index = (0 until LoadPipelineWidth).map(_.asUInt)
422
423
424  val load_req_bank_conflict_selcet = selcetOldestPort(load_req_with_bank_conflict, load_req_lqIdx, load_req_index)
425  val load_req_bank_select_port  = UIntToOH(load_req_bank_conflict_selcet._2).asBools
426
427  val rr_bank_conflict_oldest = (0 until LoadPipelineWidth).map(i =>
428    !load_req_bank_select_port(i) && load_req_with_bank_conflict(i)
429  )
430
431  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
432  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
433  (0 until LoadPipelineWidth).foreach { i =>
434    val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i)
435                else io.read(i).valid && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i)
436    rrl_bank_conflict(i) := judge && io.readline.valid
437    rrl_bank_conflict_intend(i) := judge && io.readline_intend
438  }
439  val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x =>
440    io.read(x).valid && write_valid_reg &&
441    div_addrs(x) === write_div_addr_dup_reg.head &&
442    way_en(x) === write_wayen_dup_reg.head &&
443    (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x))
444  )
445  val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head
446  // ready
447  io.readline.ready := !(wrl_bank_conflict)
448  io.read.zipWithIndex.map { case (x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard) }
449
450  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
451  val bank_conflict_fast = Wire(Vec(LoadPipelineWidth, Bool()))
452  (0 until LoadPipelineWidth).foreach(i => {
453    bank_conflict_fast(i) := wr_bank_conflict(i) || rrl_bank_conflict(i) ||
454    rr_bank_conflict_oldest(i)
455    io.bank_conflict_slow(i) := RegNext(bank_conflict_fast(i))
456    io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
457      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
458  })
459  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
460  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
461    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
462  ))
463  (0 until LoadPipelineWidth).foreach(i => {
464    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
465    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i))
466    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
467  })
468  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
469  XSPerfAccumulate("data_array_read_line", io.readline.valid)
470  XSPerfAccumulate("data_array_write", io.write.valid)
471
472  val read_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult()))))
473  val read_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult()))))
474  val read_error_delayed_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool()))))
475  dontTouch(read_result)
476  dontTouch(read_error_delayed_result)
477
478  val pseudo_data_toggle_mask = io.pseudo_error.bits.map {
479    case bank =>
480      Mux(io.pseudo_error.valid && bank.valid, bank.mask, 0.U)
481  }
482  val readline_hit = io.readline.fire &&
483                     (io.readline.bits.rmask & VecInit(io.pseudo_error.bits.map(_.valid)).asUInt).orR
484  val readbank_hit = io.read.zip(bank_addrs.zip(io.is128Req)).zipWithIndex.map {
485                          case ((read, (bank_addr, is128Req)), i) =>
486                            val error_bank0 = io.pseudo_error.bits(bank_addr(0))
487                            val error_bank1 = io.pseudo_error.bits(bank_addr(1))
488                            read.fire && (error_bank0.valid || error_bank1.valid && is128Req) && !io.bank_conflict_slow(i)
489                      }.reduce(_|_)
490  io.pseudo_error.ready := RegNext(readline_hit || readbank_hit)
491
492  for (div_index <- 0 until DCacheSetDiv){
493    for (bank_index <- 0 until DCacheBanks) {
494      for (way_index <- 0 until DCacheWays) {
495        //     Set Addr & Read Way Mask
496        //
497        //    Pipe 0   ....  Pipe (n-1)
498        //      +      ....     +
499        //      |      ....     |
500        // +----+---------------+-----+
501        //  X                        X
502        //   X                      +------+ Bank Addr Match
503        //    +---------+----------+
504        //              |
505        //     +--------+--------+
506        //     |    Data Bank    |
507        //     +-----------------+
508        val loadpipe_en = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
509          io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) &&
510          way_en(i)(way_index) &&
511          !rr_bank_conflict_oldest(i)
512        })))
513        val readline_en = Wire(Bool())
514        if (ReduceReadlineConflict) {
515          readline_en := io.readline.valid && io.readline.bits.rmask(bank_index) && line_way_en(way_index) && div_index.U === line_div_addr
516        } else {
517          readline_en := io.readline.valid && line_way_en(way_index) && div_index.U === line_div_addr
518        }
519        val sram_set_addr = Mux(readline_en,
520          addr_to_dcache_div_set(io.readline.bits.addr),
521          PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => loadpipe_en(i) -> set_addrs(i)))
522        )
523        val read_en = loadpipe_en.asUInt.orR || readline_en
524        // read raw data
525        val data_bank = data_banks(div_index)(bank_index)(way_index)
526        data_bank.io.r.en := read_en
527        data_bank.io.r.addr := sram_set_addr
528
529        read_result(div_index)(bank_index)(way_index).ecc := getECCFromEncWord(data_bank.io.r.data)
530        read_result(div_index)(bank_index)(way_index).raw_data := getDataFromEncWord(data_bank.io.r.data) ^ pseudo_data_toggle_mask(bank_index)
531
532        if (EnableDataEcc) {
533          val ecc_data = read_result(div_index)(bank_index)(way_index).asECCData()
534          val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_en))
535          read_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
536          read_error_delayed_result(div_index)(bank_index)(way_index) := read_result(div_index)(bank_index)(way_index).error_delayed
537        } else {
538          read_result(div_index)(bank_index)(way_index).error_delayed := false.B
539          read_error_delayed_result(div_index)(bank_index)(way_index) := false.B
540        }
541
542        read_result_delayed(div_index)(bank_index)(way_index) := RegEnable(read_result(div_index)(bank_index)(way_index), RegNext(read_en))
543      }
544    }
545  }
546
547  val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv * DCacheBanks * DCacheWays)(0.U(1.W))))
548  for(div_index <- 0 until DCacheSetDiv){
549    for (bank_index <- 0 until DCacheBanks) {
550      for (way_index <- 0 until DCacheWays) {
551        data_read_oh(div_index *  DCacheBanks * DCacheWays + bank_index * DCacheWays + way_index) := data_banks(div_index)(bank_index)(way_index).io.r.en
552      }
553    }
554  }
555  XSPerfAccumulate("data_read_counter", PopCount(Cat(data_read_oh)))
556
557  // read result: expose banked read result
558  // TODO: clock gate
559  (0 until LoadPipelineWidth).map(i => {
560    // io.read_resp(i) := read_result(RegNext(bank_addrs(i)))(RegNext(OHToUInt(way_en(i))))
561    val r_read_fire = RegNext(io.read(i).fire)
562    val r_div_addr  = RegEnable(div_addrs(i), io.read(i).fire)
563    val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire)
564    val r_way_addr  = RegNext(OHToUInt(way_en(i)))
565    val rr_read_fire = RegNext(RegNext(io.read(i).fire))
566    val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire)
567    val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire)
568    val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire)
569    (0 until VLEN/DCacheSRAMRowBits).map( j =>{
570      io.read_resp(i)(j) := read_result(r_div_addr)(r_bank_addr(j))(r_way_addr)
571      // error detection
572      // normal read ports
573      io.read_error_delayed(i)(j) := rr_read_fire && read_error_delayed_result(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i))
574    })
575  })
576
577  // readline port
578  val readline_error_delayed = Wire(Vec(DCacheBanks, Bool()))
579  val readline_r_way_addr = RegEnable(OHToUInt(io.readline.bits.way_en), io.readline.valid)
580  val readline_rr_way_addr = RegEnable(readline_r_way_addr, RegNext(io.readline.valid))
581  val readline_r_div_addr = RegEnable(line_div_addr, io.readline.valid)
582  val readline_rr_div_addr = RegEnable(readline_r_div_addr, RegNext(io.readline.valid))
583  (0 until DCacheBanks).map(i => {
584    io.readline_resp(i) := read_result(readline_r_div_addr)(i)(readline_r_way_addr)
585    readline_error_delayed(i) := read_result(readline_rr_div_addr)(i)(readline_rr_way_addr).error_delayed
586  })
587  io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) && readline_error_delayed.asUInt.orR
588
589  // write data_banks & ecc_banks
590  for (div_index <- 0 until DCacheSetDiv) {
591    for (bank_index <- 0 until DCacheBanks) {
592      for (way_index <- 0 until DCacheWays) {
593        // data write
594        val wen_reg = write_bank_mask_reg(bank_index) &&
595          write_valid_dup_reg(bank_index) &&
596          write_div_addr_dup_reg(bank_index) === div_index.U &&
597          write_wayen_dup_reg(bank_index)(way_index)
598        val write_ecc_reg = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode(io.write.bits.data(bank_index))), io.write.valid)
599        val data_bank = data_banks(div_index)(bank_index)(way_index)
600        data_bank.io.w.en := wen_reg
601        data_bank.io.w.addr := write_set_addr_dup_reg(bank_index)
602        data_bank.io.w.data := asECCData(write_ecc_reg, write_data_reg(bank_index))
603      }
604    }
605  }
606
607  val tableName =  "BankConflict" + p(XSCoreParamsKey).HartId.toString
608  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
609  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
610  val bankConflictData = Wire(new BankConflictDB)
611  for (i <- 0 until LoadPipelineWidth) {
612    bankConflictData.set_index(i) := set_addrs(i)
613    bankConflictData.addr(i) := io.read(i).bits.addr
614  }
615
616  // FIXME: rr_bank_conflict(0)(1) no generalization
617  when(rr_bank_conflict(0)(1)) {
618    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
619      bankConflictData.bank_index(i) := bank_addrs(0)(i)
620    })
621    bankConflictData.way_index  := OHToUInt(way_en(0))
622    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1)
623  }.otherwise {
624    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
625      bankConflictData.bank_index(i) := 0.U
626    })
627    bankConflictData.way_index := 0.U
628    bankConflictData.fake_rr_bank_conflict := false.B
629  }
630
631  val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}")
632  bankConflictTable.log(
633    data = bankConflictData,
634    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
635    site = siteName,
636    clock = clock,
637    reset = reset
638  )
639
640  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
641    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x)===set_addrs(y) && div_addrs(x) === div_addrs(y))
642  ))
643
644  if (backendParams.debugEn){
645    load_req_with_bank_conflict.map(dontTouch(_))
646    dontTouch(read_result)
647    dontTouch(read_error_delayed_result)
648  }
649}
650
651// the smallest access unit is bank
652class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
653  println("  DCacheType: BankedDataArray")
654  val ReduceReadlineConflict = false
655
656  io.write.ready := true.B
657  io.write_dup.foreach(_.ready := true.B)
658
659  val data_banks = List.tabulate(DCacheSetDiv) { k =>
660    val banks = List.tabulate(DCacheBanks)(i => Module(new DataSRAMBank(i)))
661    val mbistPl = MbistPipeline.PlaceMbistPipeline(1, s"MbistPipeDcacheDataSet$k", hasMbist)
662    banks
663  }
664  data_banks.map(_.map(_.dump()))
665
666  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
667  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
668  val set_addrs_dup = Wire(Vec(LoadPipelineWidth, UInt()))
669  val div_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
670  val div_addrs_dup = Wire(Vec(LoadPipelineWidth, UInt()))
671  val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
672  val bank_addrs_dup = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
673  val way_en_reg = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
674  val set_addrs_reg = Wire(Vec(LoadPipelineWidth, UInt()))
675  val set_addrs_dup_reg = Wire(Vec(LoadPipelineWidth, UInt()))
676
677  val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr)
678  val line_div_addr = addr_to_dcache_div(io.readline.bits.addr)
679  val line_way_en = io.readline.bits.way_en
680
681  val write_bank_mask_reg = RegEnable(io.write.bits.wmask, io.write.valid)
682  val write_data_reg = RegEnable(io.write.bits.data, io.write.valid)
683  val write_valid_reg = RegNext(io.write.valid)
684  val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid))
685  val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, x.valid))
686  val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid))
687  val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid))
688
689  // read data_banks and ecc_banks
690  // for single port SRAM, do not allow read and write in the same cycle
691  val rwhazard = RegNext(io.write.valid)
692  val rrhazard = false.B // io.readline.valid
693  (0 until LoadPipelineWidth).map(rport_index => {
694    div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr)
695    div_addrs_dup(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr_dup)
696    bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
697    bank_addrs(rport_index)(1) := Mux(io.is128Req(rport_index), bank_addrs(rport_index)(0) + 1.U, bank_addrs(rport_index)(0))
698    bank_addrs_dup(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr_dup)
699    bank_addrs_dup(rport_index)(1) := Mux(io.is128Req(rport_index), bank_addrs_dup(rport_index)(0) + 1.U, bank_addrs_dup(rport_index)(0))
700    set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr)
701    set_addrs_dup(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr_dup)
702    set_addrs_reg(rport_index) := RegEnable(addr_to_dcache_div_set(io.read(rport_index).bits.addr), io.read(rport_index).valid)
703    set_addrs_dup_reg(rport_index) := RegEnable(addr_to_dcache_div_set(io.read(rport_index).bits.addr_dup), io.read(rport_index).valid)
704
705    // use way_en to select a way after data read out
706    assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
707    way_en(rport_index) := io.read(rport_index).bits.way_en
708    way_en_reg(rport_index) := RegEnable(io.read(rport_index).bits.way_en, io.read(rport_index).valid)
709  })
710
711  // read each bank, get bank result
712  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y => {
713    if (x == y) {
714      false.B
715    } else {
716      io.read(x).valid && io.read(y).valid &&
717      div_addrs(x) === div_addrs(y) &&
718      (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U &&
719      set_addrs(x) =/= set_addrs(y)
720    }
721  }
722  ))
723
724  val load_req_with_bank_conflict = rr_bank_conflict.map(_.reduce(_ || _))
725  val load_req_valid = io.read.map(_.valid)
726  val load_req_lqIdx = io.read.map(_.bits.lqIdx)
727  val load_req_index = (0 until LoadPipelineWidth).map(_.asUInt)
728
729  val load_req_bank_conflict_selcet = selcetOldestPort(load_req_with_bank_conflict, load_req_lqIdx, load_req_index)
730  val load_req_bank_select_port  = UIntToOH(load_req_bank_conflict_selcet._2).asBools
731
732  val rr_bank_conflict_oldest = (0 until LoadPipelineWidth).map(i =>
733    !load_req_bank_select_port(i) && load_req_with_bank_conflict(i)
734  )
735
736  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
737  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
738  (0 until LoadPipelineWidth).foreach { i =>
739    val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && div_addrs(i) === line_div_addr
740                else io.read(i).valid && div_addrs(i)===line_div_addr
741    rrl_bank_conflict(i) := judge && io.readline.valid
742    rrl_bank_conflict_intend(i) := judge && io.readline_intend
743  }
744  val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x =>
745    io.read(x).valid &&
746    write_valid_reg &&
747    div_addrs(x) === write_div_addr_dup_reg.head &&
748    (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x))
749  )
750  val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head
751  // ready
752  io.readline.ready := !(wrl_bank_conflict)
753  io.read.zipWithIndex.map{case(x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard)}
754
755  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
756  (0 until LoadPipelineWidth).foreach(i => {
757    // remove fake rr_bank_conflict situation in s2
758    val real_other_bank_conflict_reg = RegNext(wr_bank_conflict(i) || rrl_bank_conflict(i))
759    val real_rr_bank_conflict_reg = RegNext(rr_bank_conflict_oldest(i))
760    io.bank_conflict_slow(i) := real_other_bank_conflict_reg || real_rr_bank_conflict_reg
761
762    // get result in s1
763    io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
764      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
765  })
766  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
767  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
768    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
769  ))
770  (0 until LoadPipelineWidth).foreach(i => {
771    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
772    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i))
773    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
774  })
775  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
776  XSPerfAccumulate("data_array_read_line", io.readline.valid)
777  XSPerfAccumulate("data_array_write", io.write.valid)
778
779  val bank_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult()))))
780  val bank_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult()))))
781  val read_bank_error_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool()))))
782
783  val pseudo_data_toggle_mask = io.pseudo_error.bits.map {
784    case bank =>
785      Mux(io.pseudo_error.valid && bank.valid, bank.mask, 0.U)
786  }
787  val readline_hit = io.readline.fire &&
788                     (io.readline.bits.rmask & VecInit(io.pseudo_error.bits.map(_.valid)).asUInt).orR
789  val readbank_hit = io.read.zip(bank_addrs.zip(io.is128Req)).zipWithIndex.map {
790                          case ((read, (bank_addr, is128Req)), i) =>
791                            val error_bank0 = io.pseudo_error.bits(bank_addr(0))
792                            val error_bank1 = io.pseudo_error.bits(bank_addr(1))
793                            read.fire && (error_bank0.valid || error_bank1.valid && is128Req) && !io.bank_conflict_slow(i)
794                      }.reduce(_|_)
795  io.pseudo_error.ready := RegNext(readline_hit || readbank_hit)
796
797  for (div_index <- 0 until DCacheSetDiv) {
798    for (bank_index <- 0 until DCacheBanks) {
799      //     Set Addr & Read Way Mask
800      //
801      //    Pipe 0   ....  Pipe (n-1)
802      //      +      ....     +
803      //      |      ....     |
804      // +----+---------------+-----+
805      //  X                        X
806      //   X                      +------+ Bank Addr Match
807      //    +---------+----------+
808      //              |
809      //     +--------+--------+
810      //     |    Data Bank    |
811      //     +-----------------+
812      val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
813        io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) &&
814          !rr_bank_conflict_oldest(i)
815      })))
816      val bank_addr_matchs_dup = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
817        io.read(i).valid && div_addrs_dup(i) === div_index.U && (bank_addrs_dup(i)(0) === bank_index.U || bank_addrs_dup(i)(1) === bank_index.U && io.is128Req(i)) &&
818          !rr_bank_conflict_oldest(i)
819      })))
820      val readline_match = Wire(Bool())
821      if (ReduceReadlineConflict) {
822        readline_match := io.readline.valid && io.readline.bits.rmask(bank_index) && line_div_addr === div_index.U
823      } else {
824        readline_match := io.readline.valid && line_div_addr === div_index.U
825      }
826
827      val bank_set_addr = Mux(readline_match,
828        line_set_addr,
829        PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> set_addrs(i)))
830      )
831      val bank_set_addr_dup = Mux(readline_match,
832        line_set_addr,
833        PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs_dup(i) -> set_addrs_dup(i)))
834      )
835      val read_enable = bank_addr_matchs.asUInt.orR || readline_match
836
837      // read raw data
838      val data_bank = data_banks(div_index)(bank_index)
839      data_bank.io.r.en := read_enable
840
841      if (DuplicatedQueryBankSeq.contains(bank_index)) {
842        data_bank.io.r.addr := bank_set_addr_dup
843      } else {
844        data_bank.io.r.addr := bank_set_addr
845      }
846      for (way_index <- 0 until DCacheWays) {
847        bank_result(div_index)(bank_index)(way_index).ecc := getECCFromEncWord(data_bank.io.r.data(way_index))
848        bank_result(div_index)(bank_index)(way_index).raw_data := getDataFromEncWord(data_bank.io.r.data(way_index)) ^ pseudo_data_toggle_mask(bank_index)
849
850        if (EnableDataEcc) {
851          val ecc_data = bank_result(div_index)(bank_index)(way_index).asECCData()
852          val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_enable))
853          bank_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
854          read_bank_error_delayed(div_index)(bank_index)(way_index) := bank_result(div_index)(bank_index)(way_index).error_delayed
855        } else {
856          bank_result(div_index)(bank_index)(way_index).error_delayed := false.B
857          read_bank_error_delayed(div_index)(bank_index)(way_index) := false.B
858        }
859        bank_result_delayed(div_index)(bank_index)(way_index) := RegEnable(bank_result(div_index)(bank_index)(way_index), RegNext(read_enable))
860      }
861    }
862  }
863
864  val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv)(0.U(XLEN.W))))
865  for (div_index <- 0 until DCacheSetDiv){
866    val temp = WireInit(VecInit(Seq.fill(DCacheBanks)(0.U(XLEN.W))))
867    for (bank_index <- 0 until DCacheBanks) {
868      temp(bank_index) := PopCount(Fill(DCacheWays, data_banks(div_index)(bank_index).io.r.en.asUInt))
869    }
870    data_read_oh(div_index) := temp.reduce(_ + _)
871  }
872  XSPerfAccumulate("data_read_counter", data_read_oh.foldLeft(0.U)(_ + _))
873
874  (0 until LoadPipelineWidth).map(i => {
875    // 1 cycle after read fire(load s2)
876    val r_read_fire = RegNext(io.read(i).fire)
877    val r_div_addr = RegEnable(div_addrs(i), io.read(i).fire)
878    val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire)
879    val r_way_addr = RegEnable(OHToUInt(way_en(i)), io.read(i).fire)
880    // 2 cycles after read fire(load s3)
881    val rr_read_fire = RegNext(r_read_fire)
882    val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire)
883    val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire)
884    val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire)
885    (0 until VLEN/DCacheSRAMRowBits).map( j =>{
886      io.read_resp(i)(j)          := bank_result(r_div_addr)(r_bank_addr(j))(r_way_addr)
887      // error detection
888      io.read_error_delayed(i)(j) := rr_read_fire && read_bank_error_delayed(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i))
889    })
890  })
891
892  // read result: expose banked read result
893  val readline_error_delayed = Wire(Vec(DCacheBanks, Bool()))
894  val readline_r_way_addr = RegEnable(OHToUInt(io.readline.bits.way_en), io.readline.valid)
895  val readline_rr_way_addr = RegEnable(readline_r_way_addr, RegNext(io.readline.valid))
896  val readline_r_div_addr = RegEnable(line_div_addr, io.readline.valid)
897  val readline_rr_div_addr = RegEnable(readline_r_div_addr, RegNext(io.readline.valid))
898  (0 until DCacheBanks).map(i => {
899    io.readline_resp(i) := bank_result(readline_r_div_addr)(i)(readline_r_way_addr)
900    readline_error_delayed(i) := bank_result(readline_rr_div_addr)(i)(readline_rr_way_addr).error_delayed
901  })
902  io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) && readline_error_delayed.asUInt.orR
903
904  // write data_banks & ecc_banks
905  for (div_index <- 0 until DCacheSetDiv) {
906    for (bank_index <- 0 until DCacheBanks) {
907      // data write
908      val wen_reg = write_bank_mask_reg(bank_index) &&
909        write_valid_dup_reg(bank_index) &&
910        write_div_addr_dup_reg(bank_index) === div_index.U && RegNext(io.write.valid)
911      val write_ecc_reg = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode(io.write.bits.data(bank_index))), io.write.valid)
912      val data_bank = data_banks(div_index)(bank_index)
913      data_bank.io.w.en := wen_reg
914      data_bank.io.w.way_en := write_wayen_dup_reg(bank_index)
915      data_bank.io.w.addr := write_set_addr_dup_reg(bank_index)
916      data_bank.io.w.data := asECCData(write_ecc_reg, write_data_reg(bank_index))
917    }
918  }
919
920  val tableName = "BankConflict" + p(XSCoreParamsKey).HartId.toString
921  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
922  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
923  val bankConflictData = Wire(new BankConflictDB)
924  for (i <- 0 until LoadPipelineWidth) {
925    bankConflictData.set_index(i) := set_addrs(i)
926    bankConflictData.addr(i) := io.read(i).bits.addr
927  }
928
929  // FIXME: rr_bank_conflict(0)(1) no generalization
930  when(rr_bank_conflict(0)(1)) {
931    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
932      bankConflictData.bank_index(i) := bank_addrs(0)(i)
933    })
934    bankConflictData.way_index := OHToUInt(way_en(0))
935    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1)
936  }.otherwise {
937    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
938      bankConflictData.bank_index(i) := 0.U
939    })
940    bankConflictData.way_index := 0.U
941    bankConflictData.fake_rr_bank_conflict := false.B
942  }
943
944  val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}")
945  bankConflictTable.log(
946    data = bankConflictData,
947    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
948    site = siteName,
949    clock = clock,
950    reset = reset
951  )
952
953  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
954    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x) === set_addrs(y) && div_addrs(x) === div_addrs(y))
955  ))
956
957  if (backendParams.debugEn){
958    load_req_with_bank_conflict.map(dontTouch(_))
959    dontTouch(bank_result)
960    dontTouch(read_bank_error_delayed)
961  }
962}
963