xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala (revision fc74c6e4974d29a63a39a3a1715a369e1236fa06)
1/***************************************************************************************
2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4* Copyright (c) 2020-2021 Peng Cheng Laboratory
5*
6* XiangShan is licensed under Mulan PSL v2.
7* You can use this software according to the terms and conditions of the Mulan PSL v2.
8* You may obtain a copy of Mulan PSL v2 at:
9*          http://license.coscl.org.cn/MulanPSL2
10*
11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14*
15* See the Mulan PSL v2 for more details.
16***************************************************************************************/
17
18package xiangshan.cache
19
20import org.chipsalliance.cde.config.Parameters
21import chisel3._
22import utils._
23import utility._
24import chisel3.util._
25import freechips.rocketchip.tilelink.{ClientMetadata, TLClientParameters, TLEdgeOut}
26import xiangshan.{L1CacheErrorInfo, XSCoreParamsKey}
27
28import scala.math.max
29
30class BankConflictDB(implicit p: Parameters) extends DCacheBundle{
31  val addr = Vec(LoadPipelineWidth, Bits(PAddrBits.W))
32  val set_index = Vec(LoadPipelineWidth, UInt((DCacheAboveIndexOffset - DCacheSetOffset).W))
33  val bank_index = Vec(VLEN/DCacheSRAMRowBits, UInt((DCacheSetOffset - DCacheBankOffset).W))
34  val way_index = UInt(wayBits.W)
35  val fake_rr_bank_conflict = Bool()
36}
37
38class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle
39{
40  val way_en = Bits(DCacheWays.W)
41  val addr = Bits(PAddrBits.W)
42}
43
44class L1BankedDataReadReqWithMask(implicit p: Parameters) extends DCacheBundle
45{
46  val way_en = Bits(DCacheWays.W)
47  val addr = Bits(PAddrBits.W)
48  val bankMask = Bits(DCacheBanks.W)
49}
50
51class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq
52{
53  val rmask = Bits(DCacheBanks.W)
54}
55
56// Now, we can write a cache-block in a single cycle
57class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq
58{
59  val wmask = Bits(DCacheBanks.W)
60  val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W))
61}
62
63// cache-block write request without data
64class L1BankedDataWriteReqCtrl(implicit p: Parameters) extends L1BankedDataReadReq
65
66class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle
67{
68  // you can choose which bank to read to save power
69  val ecc = Bits(eccBits.W)
70  val raw_data = Bits(DCacheSRAMRowBits.W)
71  val error_delayed = Bool() // 1 cycle later than data resp
72
73  def asECCData() = {
74    Cat(ecc, raw_data)
75  }
76}
77
78class DataSRAMBankWriteReq(implicit p: Parameters) extends DCacheBundle {
79  val en = Bool()
80  val addr = UInt()
81  val way_en = UInt(DCacheWays.W)
82  val data = UInt(DCacheSRAMRowBits.W)
83}
84
85// wrap a sram
86class DataSRAM(bankIdx: Int, wayIdx: Int)(implicit p: Parameters) extends DCacheModule {
87  val io = IO(new Bundle() {
88    val w = new Bundle() {
89      val en = Input(Bool())
90      val addr = Input(UInt())
91      val data = Input(UInt(DCacheSRAMRowBits.W))
92    }
93
94    val r = new Bundle() {
95      val en = Input(Bool())
96      val addr = Input(UInt())
97      val data = Output(UInt(DCacheSRAMRowBits.W))
98    }
99  })
100
101  // data sram
102  val data_sram = Module(new SRAMTemplate(
103    Bits(DCacheSRAMRowBits.W),
104    set = DCacheSets / DCacheSetDiv,
105    way = 1,
106    shouldReset = false,
107    holdRead = false,
108    singlePort = true
109  ))
110
111  data_sram.io.w.req.valid := io.w.en
112  data_sram.io.w.req.bits.apply(
113    setIdx = io.w.addr,
114    data = io.w.data,
115    waymask = 1.U
116  )
117  data_sram.io.r.req.valid := io.r.en
118  data_sram.io.r.req.bits.apply(setIdx = io.r.addr)
119  io.r.data := data_sram.io.r.resp.data(0)
120  XSPerfAccumulate("part_data_read_counter", data_sram.io.r.req.valid)
121
122  def dump_r() = {
123    when(RegNext(io.r.en)) {
124      XSDebug("bank read set %x bank %x way %x data %x\n",
125        RegEnable(io.r.addr, io.r.en),
126        bankIdx.U,
127        wayIdx.U,
128        io.r.data
129      )
130    }
131  }
132
133  def dump_w() = {
134    when(io.w.en) {
135      XSDebug("bank write set %x bank %x way %x data %x\n",
136        io.w.addr,
137        bankIdx.U,
138        wayIdx.U,
139        io.w.data
140      )
141    }
142  }
143
144  def dump() = {
145    dump_w()
146    dump_r()
147  }
148}
149
150// wrap data rows of 8 ways
151class DataSRAMBank(index: Int)(implicit p: Parameters) extends DCacheModule {
152  val io = IO(new Bundle() {
153    val w = Input(new DataSRAMBankWriteReq)
154
155    val r = new Bundle() {
156      val en = Input(Bool())
157      val addr = Input(UInt())
158      val data = Output(Vec(DCacheWays, UInt(DCacheSRAMRowBits.W)))
159    }
160  })
161
162  assert(RegNext(!io.w.en || PopCount(io.w.way_en) <= 1.U))
163
164  // external controls do not read and write at the same time
165  val w_info = io.w
166  // val rw_bypass = RegNext(io.w.addr === io.r.addr && io.w.way_en === io.r.way_en && io.w.en)
167
168  // multiway data bank
169  val data_bank = Seq.fill(DCacheWays) {
170    Module(new SRAMTemplate(
171      Bits(DCacheSRAMRowBits.W),
172      set = DCacheSets / DCacheSetDiv,
173      way = 1,
174      shouldReset = false,
175      holdRead = false,
176      singlePort = true
177    ))
178  }
179
180  for (w <- 0 until DCacheWays) {
181    val wen = w_info.en && w_info.way_en(w)
182    data_bank(w).io.w.req.valid := wen
183    data_bank(w).io.w.req.bits.apply(
184      setIdx = w_info.addr,
185      data = w_info.data,
186      waymask = 1.U
187    )
188    data_bank(w).io.r.req.valid := io.r.en
189    data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr)
190    data_bank(w).clock := ClockGate(false.B, io.r.en | (io.w.en & io.w.way_en(w)), clock)
191  }
192  XSPerfAccumulate("part_data_read_counter", PopCount(Cat(data_bank.map(_.io.r.req.valid))))
193
194  io.r.data := data_bank.map(_.io.r.resp.data(0))
195
196  def dump_r() = {
197    when(RegNext(io.r.en)) {
198      XSDebug("bank read addr %x data %x\n",
199        RegEnable(io.r.addr, io.r.en),
200        io.r.data.asUInt
201      )
202    }
203  }
204
205  def dump_w() = {
206    when(io.w.en) {
207      XSDebug("bank write addr %x way_en %x data %x\n",
208        io.w.addr,
209        io.w.way_en,
210        io.w.data
211      )
212    }
213  }
214
215  def dump() = {
216    dump_w()
217    dump_r()
218  }
219}
220
221case object HasDataEccParam
222
223//                     Banked DCache Data
224// -----------------------------------------------------------------
225// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 |
226// -----------------------------------------------------------------
227// | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  |
228// | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  |
229// | ....  | ....  | ....  | ....  | ....  | ....  | ....  | ....  |
230// -----------------------------------------------------------------
231abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule
232{
233  val DataEccParam = if(EnableDataEcc) Some(HasDataEccParam) else None
234  val ReadlinePortErrorIndex = LoadPipelineWidth
235  val io = IO(new DCacheBundle {
236    // load pipeline read word req
237    val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReqWithMask)))
238    val is128Req = Input(Vec(LoadPipelineWidth, Bool()))
239    // main pipeline read / write line req
240    val readline_intend = Input(Bool())
241    val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq))
242    val write = Flipped(DecoupledIO(new L1BankedDataWriteReq))
243    val write_dup = Vec(DCacheBanks, Flipped(Decoupled(new L1BankedDataWriteReqCtrl)))
244    // data for readline and loadpipe
245    val readline_resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult()))
246    val readline_error_delayed = Output(Bool())
247    val read_resp          = Output(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, new L1BankedDataReadResult())))
248    val read_error_delayed = Output(Vec(LoadPipelineWidth,Vec(VLEN/DCacheSRAMRowBits, Bool())))
249    // val nacks = Output(Vec(LoadPipelineWidth, Bool()))
250    // val errors = Output(Vec(LoadPipelineWidth + 1, ValidIO(new L1CacheErrorInfo))) // read ports + readline port
251    // when bank_conflict, read (1) port should be ignored
252    val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool()))
253    val disable_ld_fast_wakeup = Output(Vec(LoadPipelineWidth, Bool()))
254    // customized cache op port
255    val cacheOp = Flipped(new L1CacheInnerOpIO)
256    val cacheOp_req_dup = Vec(DCacheDupNum, Flipped(Valid(new CacheCtrlReqInfo)))
257    val cacheOp_req_bits_opCode_dup = Input(Vec(DCacheDupNum, UInt(XLEN.W)))
258  })
259
260  def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f))
261
262  def getECCFromEncWord(encWord: UInt) = {
263    require(encWord.getWidth == encWordBits)
264    encWord(encWordBits - 1, wordBits)
265  }
266
267  def dumpRead = {
268    (0 until LoadPipelineWidth) map { w =>
269      when(io.read(w).valid) {
270        XSDebug(s"DataArray Read channel: $w valid way_en: %x addr: %x\n",
271          io.read(w).bits.way_en, io.read(w).bits.addr)
272      }
273    }
274    when(io.readline.valid) {
275      XSDebug(s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n",
276        io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask)
277    }
278  }
279
280  def dumpWrite = {
281    when(io.write.valid) {
282      XSDebug(s"DataArray Write valid way_en: %x addr: %x\n",
283        io.write.bits.way_en, io.write.bits.addr)
284
285      (0 until DCacheBanks) map { r =>
286        XSDebug(s"cycle: $r data: %x wmask: %x\n",
287          io.write.bits.data(r), io.write.bits.wmask(r))
288      }
289    }
290  }
291
292  def dumpResp = {
293    XSDebug(s"DataArray ReadeResp channel:\n")
294    (0 until LoadPipelineWidth) map { r =>
295      XSDebug(s"cycle: $r data: %x\n", Mux(io.is128Req(r),
296        Cat(io.read_resp(r)(1).raw_data,io.read_resp(r)(0).raw_data),
297        io.read_resp(r)(0).raw_data))
298    }
299  }
300
301  def dump() = {
302    dumpRead
303    dumpWrite
304    dumpResp
305  }
306}
307
308// the smallest access unit is sram
309class SramedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
310  println("  DCacheType: SramedDataArray")
311  val ReduceReadlineConflict = false
312
313  io.write.ready := true.B
314  io.write_dup.foreach(_.ready := true.B)
315
316  val data_banks = List.tabulate(DCacheSetDiv)( k => List.tabulate(DCacheBanks)(i => List.tabulate(DCacheWays)(j => Module(new DataSRAM(i,j)))))
317  // ecc_banks also needs to be changed to two-dimensional to align with data_banks
318  val ecc_banks = DataEccParam.map {
319    case _ =>
320      val ecc = List.tabulate(DCacheSetDiv)( k =>
321        List.tabulate(DCacheWays)(j =>
322          List.tabulate(DCacheBanks)(i =>
323            Module(new SRAMTemplate(
324                Bits(eccBits.W),
325                set = DCacheSets / DCacheSetDiv,
326                way = 1,
327                shouldReset = false,
328                holdRead = false,
329                singlePort = true
330            ))
331      )))
332      ecc
333  }
334
335  data_banks.map(_.map(_.map(_.dump())))
336
337  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
338  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
339  val div_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
340  val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
341
342  val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr)
343  val line_div_addr = addr_to_dcache_div(io.readline.bits.addr)
344  // when WPU is enabled, line_way_en is all enabled when read data
345  val line_way_en = Fill(DCacheWays, 1.U) // val line_way_en = io.readline.bits.way_en
346  val line_way_en_reg = RegEnable(io.readline.bits.way_en, 0.U(DCacheWays.W),io.readline.valid)
347
348  val write_bank_mask_reg = RegEnable(io.write.bits.wmask, 0.U(DCacheBanks.W), io.write.valid)
349  val write_data_reg = RegEnable(io.write.bits.data, io.write.valid)
350  val write_valid_reg = RegNext(io.write.valid)
351  val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid))
352  val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, 0.U(DCacheWays.W), x.valid))
353  val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid))
354  val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid))
355
356  // read data_banks and ecc_banks
357  // for single port SRAM, do not allow read and write in the same cycle
358  val rrhazard = false.B // io.readline.valid
359  (0 until LoadPipelineWidth).map(rport_index => {
360    div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr)
361    set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr)
362    bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
363    bank_addrs(rport_index)(1) := bank_addrs(rport_index)(0) + 1.U
364
365    // use way_en to select a way after data read out
366    assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
367    way_en(rport_index) := io.read(rport_index).bits.way_en
368  })
369
370  // read conflict
371  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y =>
372    io.read(x).valid && io.read(y).valid &&
373    div_addrs(x) === div_addrs(y) &&
374    (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U &&
375    io.read(x).bits.way_en === io.read(y).bits.way_en &&
376    set_addrs(x) =/= set_addrs(y)
377  ))
378  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
379  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
380  (0 until LoadPipelineWidth).foreach { i =>
381    val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i)
382                else io.read(i).valid && line_div_addr === div_addrs(i) && line_set_addr =/= set_addrs(i)
383    rrl_bank_conflict(i) := judge && io.readline.valid
384    rrl_bank_conflict_intend(i) := judge && io.readline_intend
385  }
386  val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x =>
387    io.read(x).valid && write_valid_reg &&
388    div_addrs(x) === write_div_addr_dup_reg.head &&
389    way_en(x) === write_wayen_dup_reg.head &&
390    (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x))
391  )
392  val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head
393  // ready
394  io.readline.ready := !(wrl_bank_conflict)
395  io.read.zipWithIndex.map { case (x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard) }
396
397  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
398  val bank_conflict_fast = Wire(Vec(LoadPipelineWidth, Bool()))
399  (0 until LoadPipelineWidth).foreach(i => {
400    bank_conflict_fast(i) := wr_bank_conflict(i) || rrl_bank_conflict(i) ||
401      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
402    io.bank_conflict_slow(i) := RegNext(bank_conflict_fast(i))
403    io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
404      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
405  })
406  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
407  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
408    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
409  ))
410  (0 until LoadPipelineWidth).foreach(i => {
411    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
412    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i))
413    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
414  })
415  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
416  XSPerfAccumulate("data_array_read_line", io.readline.valid)
417  XSPerfAccumulate("data_array_write", io.write.valid)
418
419  val read_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult()))))
420  val read_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays,new L1BankedDataReadResult()))))
421  val read_error_delayed_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool()))))
422  dontTouch(read_result)
423  dontTouch(read_error_delayed_result)
424  for (div_index <- 0 until DCacheSetDiv){
425    for (bank_index <- 0 until DCacheBanks) {
426      for (way_index <- 0 until DCacheWays) {
427        //     Set Addr & Read Way Mask
428        //
429        //    Pipe 0   ....  Pipe (n-1)
430        //      +      ....     +
431        //      |      ....     |
432        // +----+---------------+-----+
433        //  X                        X
434        //   X                      +------+ Bank Addr Match
435        //    +---------+----------+
436        //              |
437        //     +--------+--------+
438        //     |    Data Bank    |
439        //     +-----------------+
440        val loadpipe_en = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
441          io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i)) && way_en(i)(way_index)
442        })))
443        val readline_en = Wire(Bool())
444        if (ReduceReadlineConflict) {
445          readline_en := io.readline.valid && io.readline.bits.rmask(bank_index) && line_way_en(way_index) && div_index.U === line_div_addr
446        } else {
447          readline_en := io.readline.valid && line_way_en(way_index) && div_index.U === line_div_addr
448        }
449        val sram_set_addr = Mux(readline_en,
450          addr_to_dcache_div_set(io.readline.bits.addr),
451          PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => loadpipe_en(i) -> set_addrs(i)))
452        )
453        val read_en = loadpipe_en.asUInt.orR || readline_en
454        // read raw data
455        val data_bank = data_banks(div_index)(bank_index)(way_index)
456        data_bank.io.r.en := read_en
457        data_bank.io.r.addr := sram_set_addr
458        ecc_banks match {
459          case Some(banks) =>
460            val ecc_bank = banks(div_index)(bank_index)(way_index)
461            ecc_bank.io.r.req.valid := read_en
462            ecc_bank.io.r.req.bits.apply(setIdx = sram_set_addr)
463            read_result(div_index)(bank_index)(way_index).ecc := ecc_bank.io.r.resp.data(0)
464          case None =>
465            read_result(div_index)(bank_index)(way_index).ecc := 0.U
466        }
467
468        read_result(div_index)(bank_index)(way_index).raw_data := data_bank.io.r.data
469        read_result_delayed(div_index)(bank_index)(way_index) := RegEnable(read_result(div_index)(bank_index)(way_index), RegNext(read_en))
470
471        // use ECC to check error
472        ecc_banks match {
473          case Some(_) =>
474            val ecc_data = read_result(div_index)(bank_index)(way_index).asECCData()
475            val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_en))
476            read_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
477            read_error_delayed_result(div_index)(bank_index)(way_index) := read_result(div_index)(bank_index)(way_index).error_delayed
478          case None =>
479            read_result(div_index)(bank_index)(way_index).error_delayed := false.B
480            read_error_delayed_result(div_index)(bank_index)(way_index) := false.B
481        }
482      }
483    }
484  }
485
486  val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv * DCacheBanks * DCacheWays)(0.U(1.W))))
487  for(div_index <- 0 until DCacheSetDiv){
488    for (bank_index <- 0 until DCacheBanks) {
489      for (way_index <- 0 until DCacheWays) {
490        data_read_oh(div_index *  DCacheBanks * DCacheWays + bank_index * DCacheWays + way_index) := data_banks(div_index)(bank_index)(way_index).io.r.en
491      }
492    }
493  }
494  XSPerfAccumulate("data_read_counter", PopCount(Cat(data_read_oh)))
495
496  // read result: expose banked read result
497  // TODO: clock gate
498  (0 until LoadPipelineWidth).map(i => {
499    // io.read_resp(i) := read_result(RegNext(bank_addrs(i)))(RegNext(OHToUInt(way_en(i))))
500    val r_read_fire = RegNext(io.read(i).fire)
501    val r_div_addr  = RegEnable(div_addrs(i), io.read(i).fire)
502    val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire)
503    val r_way_addr  = RegNext(OHToUInt(way_en(i)))
504    val rr_read_fire = RegNext(RegNext(io.read(i).fire))
505    val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire)
506    val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire)
507    val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire)
508    (0 until VLEN/DCacheSRAMRowBits).map( j =>{
509      io.read_resp(i)(j) := read_result(r_div_addr)(r_bank_addr(j))(r_way_addr)
510      // error detection
511      // normal read ports
512      io.read_error_delayed(i)(j) := rr_read_fire && read_error_delayed_result(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i))
513    })
514  })
515
516  // readline port
517  (0 until DCacheBanks).map(i => {
518    io.readline_resp(i) := read_result(RegEnable(line_div_addr, io.readline.valid))(i)(RegEnable(OHToUInt(io.readline.bits.way_en),io.readline.valid))
519  })
520  io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) &&
521    VecInit((0 until DCacheBanks).map(i => io.readline_resp(i).error_delayed)).asUInt.orR
522
523  // write data_banks & ecc_banks
524  for (div_index <- 0 until DCacheSetDiv) {
525    for (bank_index <- 0 until DCacheBanks) {
526      for (way_index <- 0 until DCacheWays) {
527        // data write
528        val wen_reg = write_bank_mask_reg(bank_index) &&
529          write_valid_dup_reg(bank_index) &&
530          write_div_addr_dup_reg(bank_index) === div_index.U &&
531          write_wayen_dup_reg(bank_index)(way_index)
532        val data_bank = data_banks(div_index)(bank_index)(way_index)
533        data_bank.io.w.en := wen_reg
534
535        data_bank.io.w.addr := write_set_addr_dup_reg(bank_index)
536        data_bank.io.w.data := write_data_reg(bank_index)
537        // ecc write
538        ecc_banks match {
539          case Some(banks) =>
540            val ecc_bank = banks(div_index)(bank_index)(way_index)
541            ecc_bank.io.w.req.valid := wen_reg
542            ecc_bank.io.w.req.bits.apply(
543              setIdx = write_set_addr_dup_reg(bank_index),
544              data = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))), io.write.valid),
545              waymask = 1.U
546            )
547            when(ecc_bank.io.w.req.valid) {
548              XSDebug("write in ecc sram: bank %x set %x data %x waymask %x\n",
549                bank_index.U,
550                addr_to_dcache_div_set(io.write.bits.addr),
551                getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
552                io.write.bits.way_en
553              )
554            }
555          case None => None
556        }
557      }
558    }
559  }
560
561  require(nWays <= 32)
562  io.cacheOp.resp.bits := DontCare
563  val cacheOpShouldResp = WireInit(false.B)
564  val eccReadResult = Wire(Vec(DCacheBanks, UInt(eccBits.W)))
565  // DCacheDupNum is 16
566  // vec: the dupIdx for every bank and every group
567  val rdata_dup_vec = Seq(0,0,1,1,2,2,3,3)
568  val rdataEcc_dup_vec = Seq(4,4,5,5,6,6,7,7)
569  val wdata_dup_vec = Seq(8,8,9,9,10,10,11,11)
570  val wdataEcc_dup_vec = Seq(12,12,13,13,14,14,15,15)
571  val cacheOpDivAddr = set_to_dcache_div(io.cacheOp.req.bits.index)
572  val cacheOpSetAddr = set_to_dcache_div_set(io.cacheOp.req.bits.index)
573  val cacheOpWayNum = io.cacheOp.req.bits.wayNum(4, 0)
574  rdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
575    for (divIdx <- 0 until DCacheSetDiv){
576      for (wayIdx <- 0 until DCacheWays) {
577        when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadData(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
578          val data_bank = data_banks(divIdx)(bankIdx)(wayIdx)
579          data_bank.io.r.en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))(wayIdx) && cacheOpDivAddr === divIdx.U
580          data_bank.io.r.addr := cacheOpSetAddr
581          cacheOpShouldResp := true.B
582        }
583      }
584    }
585  }
586  rdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
587    for (divIdx <- 0 until DCacheSetDiv) {
588      for (wayIdx <- 0 until DCacheWays) {
589        when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
590          ecc_banks match {
591            case Some(banks) =>
592              val ecc_bank = banks(divIdx)(bankIdx)(wayIdx)
593              ecc_bank.io.r.req.valid := true.B
594              ecc_bank.io.r.req.bits.setIdx := cacheOpSetAddr
595              cacheOpShouldResp := true.B
596            case None =>
597              cacheOpShouldResp := true.B
598          }
599        }
600      }
601    }
602  }
603  wdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
604    for (divIdx <- 0 until DCacheSetDiv) {
605      for (wayIdx <- 0 until DCacheWays) {
606        when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteData(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
607          val data_bank = data_banks(divIdx)(bankIdx)(wayIdx)
608          data_bank.io.w.en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))(wayIdx) && cacheOpDivAddr === divIdx.U
609          data_bank.io.w.addr := cacheOpSetAddr
610          data_bank.io.w.data := io.cacheOp.req.bits.write_data_vec(bankIdx)
611          cacheOpShouldResp := true.B
612        }
613      }
614    }
615  }
616  wdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
617    for (divIdx <- 0 until DCacheSetDiv) {
618      for (wayIdx <- 0 until DCacheWays) {
619        when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
620          ecc_banks match {
621            case Some(banks) =>
622              val ecc_bank = banks(divIdx)(bankIdx)(wayIdx)
623              ecc_bank.io.w.req.valid := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))(wayIdx) && cacheOpDivAddr === divIdx.U
624              ecc_bank.io.w.req.bits.apply(
625                setIdx = cacheOpSetAddr,
626                data = io.cacheOp.req.bits.write_data_ecc,
627                waymask = 1.U
628              )
629              cacheOpShouldResp := true.B
630            case None =>
631              cacheOpShouldResp := true.B
632          }
633        }
634      }
635    }
636  }
637  io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp)
638  for (bank_index <- 0 until DCacheBanks) {
639    val cacheOpDivAddrReg = RegEnable(cacheOpDivAddr, io.cacheOp.req.valid)
640    val cacheOpWayNumDivAddrReg = RegEnable(cacheOpWayNum, io.cacheOp.req.valid)
641    io.cacheOp.resp.bits.read_data_vec(bank_index) := read_result(cacheOpDivAddrReg)(bank_index)(cacheOpWayNumDivAddrReg).raw_data
642    eccReadResult(bank_index) := read_result(cacheOpDivAddrReg)(bank_index)(cacheOpWayNumDivAddrReg).ecc
643  }
644
645  io.cacheOp.resp.bits.read_data_ecc := Mux(io.cacheOp.resp.valid,
646    eccReadResult(RegEnable(io.cacheOp.req.bits.bank_num, io.cacheOp.req.valid)),
647    0.U
648  )
649
650  val tableName =  "BankConflict" + p(XSCoreParamsKey).HartId.toString
651  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
652  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
653  val bankConflictData = Wire(new BankConflictDB)
654  for (i <- 0 until LoadPipelineWidth) {
655    bankConflictData.set_index(i) := set_addrs(i)
656    bankConflictData.addr(i) := io.read(i).bits.addr
657  }
658
659  // FIXME: rr_bank_conflict(0)(1) no generalization
660  when(rr_bank_conflict(0)(1)) {
661    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
662      bankConflictData.bank_index(i) := bank_addrs(0)(i)
663    })
664    bankConflictData.way_index  := OHToUInt(way_en(0))
665    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1)
666  }.otherwise {
667    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
668      bankConflictData.bank_index(i) := 0.U
669    })
670    bankConflictData.way_index := 0.U
671    bankConflictData.fake_rr_bank_conflict := false.B
672  }
673
674  val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}")
675  bankConflictTable.log(
676    data = bankConflictData,
677    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
678    site = siteName,
679    clock = clock,
680    reset = reset
681  )
682
683  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
684    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x)===set_addrs(y) && div_addrs(x) === div_addrs(y))
685  ))
686
687}
688
689// the smallest access unit is bank
690class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
691  println("  DCacheType: BankedDataArray")
692  val ReduceReadlineConflict = false
693
694  io.write.ready := true.B
695  io.write_dup.foreach(_.ready := true.B)
696
697  val data_banks = List.fill(DCacheSetDiv)(List.tabulate(DCacheBanks)(i => Module(new DataSRAMBank(i))))
698  val ecc_banks = DataEccParam.map {
699    case _ =>
700      val ecc = List.fill(DCacheSetDiv)(List.fill(DCacheBanks)(
701        Module(new SRAMTemplate(
702          Bits(eccBits.W),
703          set = DCacheSets / DCacheSetDiv,
704          way = DCacheWays,
705          shouldReset = false,
706          holdRead = false,
707          singlePort = true
708        ))
709      ))
710      ecc
711  }
712
713  data_banks.map(_.map(_.dump()))
714
715  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
716  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
717  val div_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
718  val bank_addrs = Wire(Vec(LoadPipelineWidth, Vec(VLEN/DCacheSRAMRowBits, UInt())))
719  val way_en_reg = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
720  val set_addrs_reg = Wire(Vec(LoadPipelineWidth, UInt()))
721
722  val line_set_addr = addr_to_dcache_div_set(io.readline.bits.addr)
723  val line_div_addr = addr_to_dcache_div(io.readline.bits.addr)
724  val line_way_en = io.readline.bits.way_en
725
726  val write_bank_mask_reg = RegEnable(io.write.bits.wmask, io.write.valid)
727  val write_data_reg = RegEnable(io.write.bits.data, io.write.valid)
728  val write_valid_reg = RegNext(io.write.valid)
729  val write_valid_dup_reg = io.write_dup.map(x => RegNext(x.valid))
730  val write_wayen_dup_reg = io.write_dup.map(x => RegEnable(x.bits.way_en, x.valid))
731  val write_set_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div_set(x.bits.addr), x.valid))
732  val write_div_addr_dup_reg = io.write_dup.map(x => RegEnable(addr_to_dcache_div(x.bits.addr), x.valid))
733
734  // read data_banks and ecc_banks
735  // for single port SRAM, do not allow read and write in the same cycle
736  val rwhazard = RegNext(io.write.valid)
737  val rrhazard = false.B // io.readline.valid
738  (0 until LoadPipelineWidth).map(rport_index => {
739    div_addrs(rport_index) := addr_to_dcache_div(io.read(rport_index).bits.addr)
740    bank_addrs(rport_index)(0) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
741    bank_addrs(rport_index)(1) := Mux(io.is128Req(rport_index), bank_addrs(rport_index)(0) + 1.U, bank_addrs(rport_index)(0))
742    set_addrs(rport_index) := addr_to_dcache_div_set(io.read(rport_index).bits.addr)
743    set_addrs_reg(rport_index) := RegEnable(addr_to_dcache_div_set(io.read(rport_index).bits.addr), io.read(rport_index).valid)
744
745    // use way_en to select a way after data read out
746    assert(!(RegNext(io.read(rport_index).fire && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
747    way_en(rport_index) := io.read(rport_index).bits.way_en
748    way_en_reg(rport_index) := RegEnable(io.read(rport_index).bits.way_en, io.read(rport_index).valid)
749  })
750
751  // read each bank, get bank result
752  val rr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x => Seq.tabulate(LoadPipelineWidth)(y =>
753    io.read(x).valid && io.read(y).valid &&
754    div_addrs(x) === div_addrs(y) &&
755    (io.read(x).bits.bankMask & io.read(y).bits.bankMask) =/= 0.U
756  ))
757  val rrl_bank_conflict = Wire(Vec(LoadPipelineWidth, Bool()))
758  val rrl_bank_conflict_intend = Wire(Vec(LoadPipelineWidth, Bool()))
759  (0 until LoadPipelineWidth).foreach { i =>
760    val judge = if (ReduceReadlineConflict) io.read(i).valid && (io.readline.bits.rmask & io.read(i).bits.bankMask) =/= 0.U && div_addrs(i) === line_div_addr
761                else io.read(i).valid && div_addrs(i)===line_div_addr
762    rrl_bank_conflict(i) := judge && io.readline.valid
763    rrl_bank_conflict_intend(i) := judge && io.readline_intend
764  }
765  val wr_bank_conflict = Seq.tabulate(LoadPipelineWidth)(x =>
766    io.read(x).valid &&
767    write_valid_reg &&
768    div_addrs(x) === write_div_addr_dup_reg.head &&
769    (write_bank_mask_reg(bank_addrs(x)(0)) || write_bank_mask_reg(bank_addrs(x)(1)) && io.is128Req(x))
770  )
771  val wrl_bank_conflict = io.readline.valid && write_valid_reg && line_div_addr === write_div_addr_dup_reg.head
772  // ready
773  io.readline.ready := !(wrl_bank_conflict)
774  io.read.zipWithIndex.map{case(x, i) => x.ready := !(wr_bank_conflict(i) || rrhazard)}
775
776  val perf_multi_read = PopCount(io.read.map(_.valid)) >= 2.U
777  (0 until LoadPipelineWidth).foreach(i => {
778    // remove fake rr_bank_conflict situation in s2
779    val real_other_bank_conflict_reg = RegNext(wr_bank_conflict(i) || rrl_bank_conflict(i))
780    val real_rr_bank_conflict_reg = (if (i == 0) 0.B else (0 until i).map{ j =>
781      RegNext(rr_bank_conflict(j)(i)) && (set_addrs_reg(j) =/= set_addrs_reg(i))
782    }.reduce(_ || _))
783    io.bank_conflict_slow(i) := real_other_bank_conflict_reg || real_rr_bank_conflict_reg
784
785    // get result in s1
786    io.disable_ld_fast_wakeup(i) := wr_bank_conflict(i) || rrl_bank_conflict_intend(i) ||
787      (if (i == 0) 0.B else (0 until i).map(rr_bank_conflict(_)(i)).reduce(_ || _))
788  })
789  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
790  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
791    XSPerfAccumulate(s"data_array_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y))
792  ))
793  (0 until LoadPipelineWidth).foreach(i => {
794    XSPerfAccumulate(s"data_array_rrl_bank_conflict_${i}", rrl_bank_conflict(i))
795    XSPerfAccumulate(s"data_array_rw_bank_conflict_${i}", wr_bank_conflict(i))
796    XSPerfAccumulate(s"data_array_read_${i}", io.read(i).valid)
797  })
798  XSPerfAccumulate("data_array_access_total", PopCount(io.read.map(_.valid)))
799  XSPerfAccumulate("data_array_read_line", io.readline.valid)
800  XSPerfAccumulate("data_array_write", io.write.valid)
801
802  val bank_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult()))))
803  val bank_result_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, new L1BankedDataReadResult()))))
804  val ecc_result = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, UInt(eccBits.W)))))
805  val read_bank_error_delayed = Wire(Vec(DCacheSetDiv, Vec(DCacheBanks, Vec(DCacheWays, Bool()))))
806  dontTouch(bank_result)
807  dontTouch(read_bank_error_delayed)
808  for (div_index <- 0 until DCacheSetDiv) {
809    for (bank_index <- 0 until DCacheBanks) {
810      //     Set Addr & Read Way Mask
811      //
812      //    Pipe 0   ....  Pipe (n-1)
813      //      +      ....     +
814      //      |      ....     |
815      // +----+---------------+-----+
816      //  X                        X
817      //   X                      +------+ Bank Addr Match
818      //    +---------+----------+
819      //              |
820      //     +--------+--------+
821      //     |    Data Bank    |
822      //     +-----------------+
823      val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
824        io.read(i).valid && div_addrs(i) === div_index.U && (bank_addrs(i)(0) === bank_index.U || bank_addrs(i)(1) === bank_index.U && io.is128Req(i))
825      })))
826      val readline_match = Wire(Bool())
827      if (ReduceReadlineConflict) {
828        readline_match := io.readline.valid && io.readline.bits.rmask(bank_index) && line_div_addr === div_index.U
829      } else {
830        readline_match := io.readline.valid && line_div_addr === div_index.U
831      }
832
833      val bank_set_addr = Mux(readline_match,
834        line_set_addr,
835        PriorityMux(Seq.tabulate(LoadPipelineWidth)(i => bank_addr_matchs(i) -> set_addrs(i)))
836      )
837      val read_enable = bank_addr_matchs.asUInt.orR || readline_match
838
839      // read raw data
840      val data_bank = data_banks(div_index)(bank_index)
841      data_bank.io.r.en := read_enable
842      data_bank.io.r.addr := bank_set_addr
843      for (way_index <- 0 until DCacheWays) {
844        bank_result(div_index)(bank_index)(way_index).raw_data := data_bank.io.r.data(way_index)
845        bank_result_delayed(div_index)(bank_index)(way_index) := RegEnable(bank_result(div_index)(bank_index)(way_index), RegNext(read_enable))
846      }
847
848      // read ECC
849      ecc_banks match {
850        case Some(banks) =>
851          val ecc_bank = banks(div_index)(bank_index)
852          ecc_bank.io.r.req.valid := read_enable
853          ecc_bank.io.r.req.bits.apply(setIdx = bank_set_addr)
854          ecc_result(div_index)(bank_index) := ecc_bank.io.r.resp.data
855          for (way_index <- 0 until DCacheWays) {
856            bank_result(div_index)(bank_index)(way_index).ecc := ecc_bank.io.r.resp.data(way_index)
857          }
858        case None =>
859          ecc_result(div_index)(bank_index) := DontCare
860          for (way_index <- 0 until DCacheWays) {
861            bank_result(div_index)(bank_index)(way_index).ecc := DontCare
862          }
863      }
864
865      // use ECC to check error
866      ecc_banks match {
867        case Some(_) =>
868          for (way_index <- 0 until DCacheWays) {
869            val ecc_data = bank_result(div_index)(bank_index)(way_index).asECCData()
870            val ecc_data_delayed = RegEnable(ecc_data, RegNext(read_enable))
871            bank_result(div_index)(bank_index)(way_index).error_delayed := dcacheParameters.dataCode.decode(ecc_data_delayed).error
872            read_bank_error_delayed(div_index)(bank_index)(way_index) := bank_result(div_index)(bank_index)(way_index).error_delayed
873          }
874        case None =>
875          for (way_index <- 0 until DCacheWays) {
876            bank_result(div_index)(bank_index)(way_index).error_delayed := false.B
877            read_bank_error_delayed(div_index)(bank_index)(way_index) := false.B
878          }
879      }
880    }
881  }
882
883  val data_read_oh = WireInit(VecInit(Seq.fill(DCacheSetDiv)(0.U(XLEN.W))))
884  for (div_index <- 0 until DCacheSetDiv){
885    val temp = WireInit(VecInit(Seq.fill(DCacheBanks)(0.U(XLEN.W))))
886    for (bank_index <- 0 until DCacheBanks) {
887      temp(bank_index) := PopCount(Fill(DCacheWays, data_banks(div_index)(bank_index).io.r.en.asUInt))
888    }
889    data_read_oh(div_index) := temp.reduce(_ + _)
890  }
891  XSPerfAccumulate("data_read_counter", data_read_oh.foldLeft(0.U)(_ + _))
892
893  (0 until LoadPipelineWidth).map(i => {
894    // 1 cycle after read fire(load s2)
895    val r_read_fire = RegNext(io.read(i).fire)
896    val r_div_addr = RegEnable(div_addrs(i), io.read(i).fire)
897    val r_bank_addr = RegEnable(bank_addrs(i), io.read(i).fire)
898    val r_way_addr = RegEnable(OHToUInt(way_en(i)), io.read(i).fire)
899    // 2 cycles after read fire(load s3)
900    val rr_read_fire = RegNext(r_read_fire)
901    val rr_div_addr = RegEnable(RegEnable(div_addrs(i), io.read(i).fire), r_read_fire)
902    val rr_bank_addr = RegEnable(RegEnable(bank_addrs(i), io.read(i).fire), r_read_fire)
903    val rr_way_addr = RegEnable(RegEnable(OHToUInt(way_en(i)), io.read(i).fire), r_read_fire)
904    (0 until VLEN/DCacheSRAMRowBits).map( j =>{
905      io.read_resp(i)(j)          := bank_result(r_div_addr)(r_bank_addr(j))(r_way_addr)
906      // error detection
907      io.read_error_delayed(i)(j) := rr_read_fire && read_bank_error_delayed(rr_div_addr)(rr_bank_addr(j))(rr_way_addr) && !RegNext(io.bank_conflict_slow(i))
908    })
909  })
910
911  // read result: expose banked read result
912  (0 until DCacheBanks).map(i => {
913    io.readline_resp(i) := bank_result(RegEnable(line_div_addr, io.readline.valid))(i)(RegEnable(OHToUInt(io.readline.bits.way_en), io.readline.valid))
914  })
915  io.readline_error_delayed := RegNext(RegNext(io.readline.fire)) &&
916    VecInit((0 until DCacheBanks).map(i => io.readline_resp(i).error_delayed)).asUInt.orR
917
918  // write data_banks & ecc_banks
919  for (div_index <- 0 until DCacheSetDiv) {
920    for (bank_index <- 0 until DCacheBanks) {
921      // data write
922      val wen_reg = write_bank_mask_reg(bank_index) &&
923        write_valid_dup_reg(bank_index) &&
924        write_div_addr_dup_reg(bank_index) === div_index.U && RegNext(io.write.valid)
925      val data_bank = data_banks(div_index)(bank_index)
926      data_bank.io.w.en := wen_reg
927      data_bank.io.w.way_en := write_wayen_dup_reg(bank_index)
928      data_bank.io.w.addr := write_set_addr_dup_reg(bank_index)
929      data_bank.io.w.data := write_data_reg(bank_index)
930
931      // ecc write
932      ecc_banks match {
933        case Some(banks) =>
934          val ecc_bank = banks(div_index)(bank_index)
935          ecc_bank.io.w.req.valid := wen_reg
936          ecc_bank.io.w.req.bits.apply(
937            setIdx = write_set_addr_dup_reg(bank_index),
938            data = RegEnable(getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))), io.write.valid),
939            waymask = write_wayen_dup_reg(bank_index)
940          )
941          when(ecc_bank.io.w.req.valid) {
942            XSDebug("write in ecc sram: bank %x set %x data %x waymask %x\n",
943              bank_index.U,
944              addr_to_dcache_div_set(io.write.bits.addr),
945              getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
946              io.write.bits.way_en
947            )
948          }
949        case None => None
950      }
951    }
952  }
953
954  // deal with customized cache op
955  require(nWays <= 32)
956  io.cacheOp.resp.bits := DontCare
957  val cacheOpShouldResp = WireInit(false.B)
958  val eccReadResult = Wire(Vec(DCacheBanks, UInt(eccBits.W)))
959  // DCacheDupNum is 16
960  // vec: the dupIdx for every bank and every group
961  val rdata_dup_vec = Seq(0, 0, 1, 1, 2, 2, 3, 3)
962  val rdataEcc_dup_vec = Seq(4, 4, 5, 5, 6, 6, 7, 7)
963  val wdata_dup_vec = Seq(8, 8, 9, 9, 10, 10, 11, 11)
964  val wdataEcc_dup_vec = Seq(12, 12, 13, 13, 14, 14, 15, 15)
965  val cacheOpDivAddr = set_to_dcache_div(io.cacheOp.req.bits.index)
966  val cacheOpSetAddr = set_to_dcache_div_set(io.cacheOp.req.bits.index)
967  val cacheOpWayMask = UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
968  rdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
969    for (divIdx <- 0 until DCacheSetDiv) {
970      when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadData(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
971        val data_bank = data_banks(divIdx)(bankIdx)
972        data_bank.io.r.en := true.B
973        data_bank.io.r.addr := cacheOpSetAddr
974        cacheOpShouldResp := true.B
975      }
976    }
977  }
978  rdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
979    for (divIdx <- 0 until DCacheSetDiv) {
980      when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isReadDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
981        ecc_banks match {
982          case Some(banks) =>
983            val ecc_bank = banks(divIdx)(bankIdx)
984            ecc_bank.io.r.req.valid := true.B
985            ecc_bank.io.r.req.bits.setIdx := cacheOpSetAddr
986            cacheOpShouldResp := true.B
987          case None =>
988            cacheOpShouldResp := true.B
989        }
990      }
991    }
992  }
993  wdata_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
994    for (divIdx <- 0 until DCacheSetDiv) {
995      when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteData(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
996        val data_bank = data_banks(divIdx)(bankIdx)
997        data_bank.io.w.en := cacheOpDivAddr === divIdx.U
998        data_bank.io.w.way_en := cacheOpWayMask
999        data_bank.io.w.addr := cacheOpSetAddr
1000        data_bank.io.w.data := io.cacheOp.req.bits.write_data_vec(bankIdx)
1001        cacheOpShouldResp := true.B
1002      }
1003    }
1004  }
1005  wdataEcc_dup_vec.zipWithIndex.map{ case(dupIdx, bankIdx) =>
1006    for (divIdx <- 0 until DCacheSetDiv) {
1007      when(io.cacheOp_req_dup(dupIdx).valid && CacheInstrucion.isWriteDataECC(io.cacheOp_req_bits_opCode_dup(dupIdx))) {
1008        ecc_banks match {
1009          case Some(banks) =>
1010            val ecc_bank = banks(divIdx)(bankIdx)
1011            ecc_bank.io.w.req.valid := cacheOpDivAddr === divIdx.U
1012            ecc_bank.io.w.req.bits.apply(
1013              setIdx = cacheOpSetAddr,
1014              data = io.cacheOp.req.bits.write_data_ecc,
1015              waymask = cacheOpWayMask
1016            )
1017            cacheOpShouldResp := true.B
1018          case None =>
1019            cacheOpShouldResp := true.B
1020        }
1021      }
1022    }
1023  }
1024
1025  io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp)
1026  for (bank_index <- 0 until DCacheBanks) {
1027    val cacheOpDivAddrReg = RegEnable(cacheOpDivAddr, io.cacheOp.req.valid)
1028    val cacheOpWayMaskReg = RegEnable(cacheOpWayMask, io.cacheOp.req.valid)
1029    io.cacheOp.resp.bits.read_data_vec(bank_index) := bank_result(cacheOpDivAddrReg)(bank_index)(cacheOpWayMaskReg).raw_data
1030    eccReadResult(bank_index) := Mux1H(cacheOpWayMaskReg, ecc_result(cacheOpDivAddrReg)(bank_index))
1031  }
1032
1033  io.cacheOp.resp.bits.read_data_ecc := Mux(io.cacheOp.resp.valid,
1034    eccReadResult(RegEnable(io.cacheOp.req.bits.bank_num, io.cacheOp.req.valid)),
1035    0.U
1036  )
1037
1038  val tableName = "BankConflict" + p(XSCoreParamsKey).HartId.toString
1039  val siteName = "BankedDataArray" + p(XSCoreParamsKey).HartId.toString
1040  val bankConflictTable = ChiselDB.createTable(tableName, new BankConflictDB)
1041  val bankConflictData = Wire(new BankConflictDB)
1042  for (i <- 0 until LoadPipelineWidth) {
1043    bankConflictData.set_index(i) := set_addrs(i)
1044    bankConflictData.addr(i) := io.read(i).bits.addr
1045  }
1046
1047  // FIXME: rr_bank_conflict(0)(1) no generalization
1048  when(rr_bank_conflict(0)(1)) {
1049    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
1050      bankConflictData.bank_index(i) := bank_addrs(0)(i)
1051    })
1052    bankConflictData.way_index := OHToUInt(way_en(0))
1053    bankConflictData.fake_rr_bank_conflict := set_addrs(0) === set_addrs(1) && div_addrs(0) === div_addrs(1)
1054  }.otherwise {
1055    (0 until (VLEN/DCacheSRAMRowBits)).map(i => {
1056      bankConflictData.bank_index(i) := 0.U
1057    })
1058    bankConflictData.way_index := 0.U
1059    bankConflictData.fake_rr_bank_conflict := false.B
1060  }
1061
1062  val isWriteBankConflictTable = Constantin.createRecord(s"isWriteBankConflictTable${p(XSCoreParamsKey).HartId}")
1063  bankConflictTable.log(
1064    data = bankConflictData,
1065    en = isWriteBankConflictTable.orR && rr_bank_conflict(0)(1),
1066    site = siteName,
1067    clock = clock,
1068    reset = reset
1069  )
1070
1071  (1 until LoadPipelineWidth).foreach(y => (0 until y).foreach(x =>
1072    XSPerfAccumulate(s"data_array_fake_rr_bank_conflict_${x}_${y}", rr_bank_conflict(x)(y) && set_addrs(x) === set_addrs(y) && div_addrs(x) === div_addrs(y))
1073  ))
1074
1075}
1076