xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala (revision 066ac8a465b27b54ba22458ff1a67bcd28215d73)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.tilelink.{ClientMetadata, TLClientParameters, TLEdgeOut}
23import utils.{Code, ParallelOR, ReplacementPolicy, SRAMTemplate, XSDebug, XSPerfAccumulate}
24import xiangshan.L1CacheErrorInfo
25
26import scala.math.max
27
28class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle
29{
30  val way_en = Bits(DCacheWays.W)
31  val addr = Bits(PAddrBits.W)
32}
33
34class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq
35{
36  val rmask = Bits(DCacheBanks.W)
37}
38
39// Now, we can write a cache-block in a single cycle
40class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq
41{
42  val wmask = Bits(DCacheBanks.W)
43  val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W))
44}
45
46class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle
47{
48  // you can choose which bank to read to save power
49  val ecc = Bits(eccBits.W)
50  val raw_data = Bits(DCacheSRAMRowBits.W)
51
52  def asECCData() = {
53    Cat(ecc, raw_data)
54  }
55}
56
57//                     Banked DCache Data
58// -----------------------------------------------------------------
59// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 |
60// -----------------------------------------------------------------
61// | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  |
62// | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  |
63// | ....  | ....  | ....  | ....  | ....  | ....  | ....  | ....  |
64// -----------------------------------------------------------------
65abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule
66{
67  val io = IO(new DCacheBundle {
68    // load pipeline read word req
69    val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReq)))
70    // main pipeline read / write line req
71    val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq))
72    val write = Flipped(DecoupledIO(new L1BankedDataWriteReq))
73    // data bank read resp (all banks)
74    val resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult()))
75    // val nacks = Output(Vec(LoadPipelineWidth, Bool()))
76    val errors = Output(Vec(LoadPipelineWidth, new L1CacheErrorInfo))
77    // when bank_conflict, read (1) port should be ignored
78    val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool()))
79    val bank_conflict_fast = Output(Vec(LoadPipelineWidth, Bool()))
80    // customized cache op port
81    val cacheOp = Flipped(new DCacheInnerOpIO)
82  })
83  assert(LoadPipelineWidth == 2) // BankedDataArray is designed for 2 port
84
85  def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f))
86
87  def dumpRead() = {
88    (0 until LoadPipelineWidth) map { w =>
89      when(io.read(w).valid) {
90        XSDebug(s"DataArray Read channel: $w valid way_en: %x addr: %x\n",
91          io.read(w).bits.way_en, io.read(w).bits.addr)
92      }
93    }
94    when(io.readline.valid) {
95      XSDebug(s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n",
96        io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask)
97    }
98  }
99
100  def dumpWrite() = {
101    when(io.write.valid) {
102      XSDebug(s"DataArray Write valid way_en: %x addr: %x\n",
103        io.write.bits.way_en, io.write.bits.addr)
104
105      (0 until DCacheBanks) map { r =>
106        XSDebug(s"cycle: $r data: %x wmask: %x\n",
107          io.write.bits.data(r), io.write.bits.wmask(r))
108      }
109    }
110  }
111
112  def dumpResp() = {
113    XSDebug(s"DataArray ReadeResp channel:\n")
114    (0 until DCacheBanks) map { r =>
115      XSDebug(s"cycle: $r data: %x\n", io.resp(r).raw_data)
116    }
117  }
118
119  def dump() = {
120    dumpRead
121    dumpWrite
122    dumpResp
123  }
124}
125
126class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
127  def getECCFromEncWord(encWord: UInt) = {
128    require(encWord.getWidth == encWordBits)
129    encWord(encWordBits - 1, wordBits)
130  }
131
132  val ReduceReadlineConflict = false
133
134  io.write.ready := true.B
135
136  // wrap data rows of 8 ways
137  class DataSRAMBank(index: Int) extends Module {
138    val io = IO(new Bundle() {
139      val w = new Bundle() {
140        val en = Input(Bool())
141        val addr = Input(UInt())
142        val way_en = Input(UInt(DCacheWays.W))
143        val data = Input(UInt(DCacheSRAMRowBits.W))
144      }
145
146      val r = new Bundle() {
147        val en = Input(Bool())
148        val addr = Input(UInt())
149        val way_en = Input(UInt(DCacheWays.W))
150        val data = Output(UInt(DCacheSRAMRowBits.W))
151      }
152    })
153
154    val r_way_en_reg = RegNext(io.r.way_en)
155
156    // multiway data bank
157    val data_bank = Array.fill(DCacheWays) {
158      Module(new SRAMTemplate(
159        Bits(DCacheSRAMRowBits.W),
160        set = DCacheSets,
161        way = 1,
162        shouldReset = false,
163        holdRead = false,
164        singlePort = true
165      ))
166    }
167
168    for (w <- 0 until DCacheWays) {
169      val wen = io.w.en && io.w.way_en(w)
170      data_bank(w).io.w.req.valid := wen
171      data_bank(w).io.w.req.bits.apply(
172        setIdx = io.w.addr,
173        data = io.w.data,
174        waymask = 1.U
175      )
176      data_bank(w).io.r.req.valid := io.r.en
177      data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr)
178    }
179
180    val half = nWays / 2
181    val data_read = data_bank.map(_.io.r.resp.data(0))
182    val data_left = Mux1H(r_way_en_reg.tail(half), data_read.take(half))
183    val data_right = Mux1H(r_way_en_reg.head(half), data_read.drop(half))
184
185    val sel_low = r_way_en_reg.tail(half).orR()
186    val row_data = Mux(sel_low, data_left, data_right)
187
188    io.r.data := row_data
189
190    def dump_r() = {
191      when(RegNext(io.r.en)) {
192        XSDebug("bank read addr %x way_en %x data %x\n",
193          RegNext(io.r.addr),
194          RegNext(io.r.way_en),
195          io.r.data
196        )
197      }
198    }
199
200    def dump_w() = {
201      when(io.w.en) {
202        XSDebug("bank write addr %x way_en %x data %x\n",
203          io.w.addr,
204          io.w.way_en,
205          io.w.data
206        )
207      }
208    }
209
210    def dump() = {
211      dump_w()
212      dump_r()
213    }
214  }
215
216  val data_banks = List.tabulate(DCacheBanks)(i => Module(new DataSRAMBank(i)))
217  val ecc_banks = List.fill(DCacheBanks)(Module(new SRAMTemplate(
218    Bits(eccBits.W),
219    set = DCacheSets,
220    way = DCacheWays,
221    shouldReset = false,
222    holdRead = false,
223    singlePort = true
224  )))
225
226  data_banks.map(_.dump())
227
228  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
229  val way_en_reg = RegNext(way_en)
230  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
231  val bank_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
232
233  // read data_banks and ecc_banks
234  // for single port SRAM, do not allow read and write in the same cycle
235  val rwhazard = io.write.valid
236  val rrhazard = false.B // io.readline.valid
237  (0 until LoadPipelineWidth).map(rport_index => {
238    set_addrs(rport_index) := addr_to_dcache_set(io.read(rport_index).bits.addr)
239    bank_addrs(rport_index) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
240
241    io.read(rport_index).ready := !(rwhazard || rrhazard)
242
243    // use way_en to select a way after data read out
244    assert(!(RegNext(io.read(rport_index).fire() && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
245    way_en(rport_index) := io.read(rport_index).bits.way_en
246  })
247  io.readline.ready := !(rwhazard)
248
249  // read each bank, get bank result
250  val bank_result = Wire(Vec(DCacheBanks, new L1BankedDataReadResult()))
251  dontTouch(bank_result)
252  val row_error = Wire(Vec(DCacheBanks, Bool()))
253  dontTouch(row_error)
254  val rr_bank_conflict = bank_addrs(0) === bank_addrs(1) && io.read(0).valid && io.read(1).valid
255  val rrl_bank_conflict_0 = Wire(Bool())
256  val rrl_bank_conflict_1 = Wire(Bool())
257  if (ReduceReadlineConflict) {
258    rrl_bank_conflict_0 := io.read(0).valid && io.readline.valid && io.readline.bits.rmask(bank_addrs(0))
259    rrl_bank_conflict_1 := io.read(1).valid && io.readline.valid && io.readline.bits.rmask(bank_addrs(1))
260  } else {
261    rrl_bank_conflict_0 := io.read(0).valid && io.readline.valid
262    rrl_bank_conflict_1 := io.read(1).valid && io.readline.valid
263  }
264
265  val rw_bank_conflict_0 = io.read(0).valid && rwhazard
266  val rw_bank_conflict_1 = io.read(1).valid && rwhazard
267  val perf_multi_read = io.read(0).valid && io.read(1).valid
268  io.bank_conflict_fast(0) := rw_bank_conflict_0 || rrl_bank_conflict_0
269  io.bank_conflict_slow(0) := RegNext(io.bank_conflict_fast(0))
270  io.bank_conflict_fast(1) := rw_bank_conflict_1 || rrl_bank_conflict_1 || rr_bank_conflict
271  io.bank_conflict_slow(1) := RegNext(io.bank_conflict_fast(1))
272  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
273  XSPerfAccumulate("data_array_rr_bank_conflict", rr_bank_conflict)
274  XSPerfAccumulate("data_array_rrl_bank_conflict_0", rrl_bank_conflict_0)
275  XSPerfAccumulate("data_array_rrl_bank_conflict_1", rrl_bank_conflict_1)
276  XSPerfAccumulate("data_array_rw_bank_conflict_0", rw_bank_conflict_0)
277  XSPerfAccumulate("data_array_rw_bank_conflict_1", rw_bank_conflict_1)
278  XSPerfAccumulate("data_array_access_total", io.read(0).valid +& io.read(1).valid)
279  XSPerfAccumulate("data_array_read_0", io.read(0).valid)
280  XSPerfAccumulate("data_array_read_1", io.read(1).valid)
281  XSPerfAccumulate("data_array_read_line", io.readline.valid)
282  XSPerfAccumulate("data_array_write", io.write.valid)
283
284  for (bank_index <- 0 until DCacheBanks) {
285    //     Set Addr & Read Way Mask
286    //
287    //      Pipe 0      Pipe 1
288    //        +           +
289    //        |           |
290    // +------+-----------+-------+
291    //  X                        X
292    //   X                      +------+ Bank Addr Match
293    //    +---------+----------+
294    //              |
295    //     +--------+--------+
296    //     |    Data Bank    |
297    //     +-----------------+
298    val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
299      bank_addrs(i) === bank_index.U && io.read(i).valid
300    })))
301    val readline_match = Wire(Bool())
302    if (ReduceReadlineConflict) {
303      readline_match := io.readline.valid && io.readline.bits.rmask(bank_index)
304    } else {
305      readline_match := io.readline.valid
306    }
307    val bank_way_en = Mux(readline_match,
308      io.readline.bits.way_en,
309      Mux(bank_addr_matchs(0), way_en(0), way_en(1))
310    )
311    val bank_set_addr = Mux(readline_match,
312      addr_to_dcache_set(io.readline.bits.addr),
313      Mux(bank_addr_matchs(0), set_addrs(0), set_addrs(1))
314    )
315
316    // read raw data
317    val data_bank = data_banks(bank_index)
318    data_bank.io.r.en := bank_addr_matchs.asUInt.orR || readline_match
319    data_bank.io.r.way_en := bank_way_en
320    data_bank.io.r.addr := bank_set_addr
321    bank_result(bank_index).raw_data := data_bank.io.r.data
322
323    // read ECC
324    val ecc_bank = ecc_banks(bank_index)
325    ecc_bank.io.r.req.valid := bank_addr_matchs.asUInt.orR
326    ecc_bank.io.r.req.bits.apply(setIdx = bank_set_addr)
327    bank_result(bank_index).ecc := Mux1H(RegNext(bank_way_en), ecc_bank.io.r.resp.data)
328
329    // use ECC to check error
330    val data = bank_result(bank_index).asECCData()
331    row_error(bank_index) := dcacheParameters.dataCode.decode(data).error && RegNext(bank_addr_matchs.asUInt.orR)
332  }
333
334  // Select final read result
335  (0 until LoadPipelineWidth).map(rport_index => {
336    io.errors(rport_index).ecc_error.valid := RegNext(io.read(rport_index).fire()) && row_error.asUInt.orR()
337    io.errors(rport_index).ecc_error.bits := true.B
338    io.errors(rport_index).paddr.valid := io.errors(rport_index).ecc_error.valid
339    io.errors(rport_index).paddr.bits := RegNext(io.read(rport_index).bits.addr)
340  })
341  io.resp := bank_result
342
343  // write data_banks & ecc_banks
344  val sram_waddr = addr_to_dcache_set(io.write.bits.addr)
345  for (bank_index <- 0 until DCacheBanks) {
346    // data write
347    val data_bank = data_banks(bank_index)
348    data_bank.io.w.en := io.write.valid && io.write.bits.wmask(bank_index)
349    data_bank.io.w.way_en := io.write.bits.way_en
350    data_bank.io.w.addr := sram_waddr
351    data_bank.io.w.data := io.write.bits.data(bank_index)
352
353    // ecc write
354    val ecc_bank = ecc_banks(bank_index)
355    ecc_bank.io.w.req.valid := io.write.valid && io.write.bits.wmask(bank_index)
356    ecc_bank.io.w.req.bits.apply(
357      setIdx = sram_waddr,
358      data = getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
359      waymask = io.write.bits.way_en
360    )
361    when(ecc_bank.io.w.req.valid) {
362      XSDebug("write in ecc sram: bank %x set %x data %x waymask %x\n",
363        bank_index.U,
364        sram_waddr,
365        getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
366        io.write.bits.way_en
367      );
368    }
369  }
370
371  // deal with customized cache op
372  require(nWays <= 32)
373  io.cacheOp.resp.bits := DontCare
374  val cacheOpShouldResp = WireInit(false.B)
375  when(io.cacheOp.req.valid){
376    when(
377      CacheInstrucion.isReadData(io.cacheOp.req.bits.opCode) ||
378      CacheInstrucion.isReadDataECC(io.cacheOp.req.bits.opCode)
379    ){
380      for (bank_index <- 0 until DCacheBanks) {
381        val data_bank = data_banks(bank_index)
382        data_bank.io.r.en := true.B
383        data_bank.io.r.way_en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
384        data_bank.io.r.addr := io.cacheOp.req.bits.index
385      }
386      cacheOpShouldResp := true.B
387    }
388    when(CacheInstrucion.isWriteData(io.cacheOp.req.bits.opCode)){
389      for (bank_index <- 0 until DCacheBanks) {
390        val data_bank = data_banks(bank_index)
391        data_bank.io.w.en := true.B
392        data_bank.io.w.way_en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
393        data_bank.io.w.addr := io.cacheOp.req.bits.index
394        data_bank.io.w.data := io.cacheOp.req.bits.write_data_vec(bank_index)
395      }
396      cacheOpShouldResp := true.B
397    }
398    when(CacheInstrucion.isWriteDataECC(io.cacheOp.req.bits.opCode)){
399      for (bank_index <- 0 until DCacheBanks) {
400        val ecc_bank = ecc_banks(bank_index)
401        ecc_bank.io.w.req.valid := true.B
402        ecc_bank.io.w.req.bits.apply(
403          setIdx = io.cacheOp.req.bits.index,
404          data = io.cacheOp.req.bits.write_data_ecc,
405          waymask = UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
406        )
407      }
408      cacheOpShouldResp := true.B
409    }
410  }
411  io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp)
412  for (bank_index <- 0 until DCacheBanks) {
413    io.cacheOp.resp.bits.read_data_vec(bank_index) := bank_result(bank_index).raw_data
414  }
415  io.cacheOp.resp.bits.read_data_ecc := Mux(io.cacheOp.resp.valid,
416    bank_result(io.cacheOp.req.bits.bank_num).ecc,
417    0.U
418  )
419}
420