xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/data/BankedDataArray.scala (revision 12e221b1295bd5e50822d86f9ccd637ebeaffc2f)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import utils._
22import chisel3.util._
23import freechips.rocketchip.tilelink.{ClientMetadata, TLClientParameters, TLEdgeOut}
24import utils.{Code, ParallelOR, ReplacementPolicy, SRAMTemplate, XSDebug, XSPerfAccumulate}
25import xiangshan.L1CacheErrorInfo
26
27import scala.math.max
28
29class L1BankedDataReadReq(implicit p: Parameters) extends DCacheBundle
30{
31  val way_en = Bits(DCacheWays.W)
32  val addr = Bits(PAddrBits.W)
33}
34
35class L1BankedDataReadLineReq(implicit p: Parameters) extends L1BankedDataReadReq
36{
37  val rmask = Bits(DCacheBanks.W)
38}
39
40// Now, we can write a cache-block in a single cycle
41class L1BankedDataWriteReq(implicit p: Parameters) extends L1BankedDataReadReq
42{
43  val wmask = Bits(DCacheBanks.W)
44  val data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W))
45}
46
47class L1BankedDataReadResult(implicit p: Parameters) extends DCacheBundle
48{
49  // you can choose which bank to read to save power
50  val ecc = Bits(eccBits.W)
51  val raw_data = Bits(DCacheSRAMRowBits.W)
52  val error = Bool() // slow to generate, use it with care
53
54  def asECCData() = {
55    Cat(ecc, raw_data)
56  }
57}
58
59//                     Banked DCache Data
60// -----------------------------------------------------------------
61// | Bank0 | Bank1 | Bank2 | Bank3 | Bank4 | Bank5 | Bank6 | Bank7 |
62// -----------------------------------------------------------------
63// | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  | Way0  |
64// | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  | Way1  |
65// | ....  | ....  | ....  | ....  | ....  | ....  | ....  | ....  |
66// -----------------------------------------------------------------
67abstract class AbstractBankedDataArray(implicit p: Parameters) extends DCacheModule
68{
69  val ReadlinePortErrorIndex = LoadPipelineWidth
70  val io = IO(new DCacheBundle {
71    // load pipeline read word req
72    val read = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new L1BankedDataReadReq)))
73    // main pipeline read / write line req
74    val readline = Flipped(DecoupledIO(new L1BankedDataReadLineReq))
75    val write = Flipped(DecoupledIO(new L1BankedDataWriteReq))
76    // data bank read resp (all banks)
77    val resp = Output(Vec(DCacheBanks, new L1BankedDataReadResult()))
78    // val nacks = Output(Vec(LoadPipelineWidth, Bool()))
79    // val errors = Output(Vec(LoadPipelineWidth + 1, new L1CacheErrorInfo)) // read ports + readline port
80    val read_error = Output(Vec(LoadPipelineWidth, Bool()))
81    val readline_error = Output(Bool())
82    // when bank_conflict, read (1) port should be ignored
83    val bank_conflict_slow = Output(Vec(LoadPipelineWidth, Bool()))
84    val bank_conflict_fast = Output(Vec(LoadPipelineWidth, Bool()))
85    // customized cache op port
86    val cacheOp = Flipped(new L1CacheInnerOpIO)
87  })
88  assert(LoadPipelineWidth <= 2) // BankedDataArray is designed for no more than 2 read ports
89
90  def pipeMap[T <: Data](f: Int => T) = VecInit((0 until LoadPipelineWidth).map(f))
91
92  def dumpRead() = {
93    (0 until LoadPipelineWidth) map { w =>
94      when(io.read(w).valid) {
95        XSDebug(s"DataArray Read channel: $w valid way_en: %x addr: %x\n",
96          io.read(w).bits.way_en, io.read(w).bits.addr)
97      }
98    }
99    when(io.readline.valid) {
100      XSDebug(s"DataArray Read Line, valid way_en: %x addr: %x rmask %x\n",
101        io.readline.bits.way_en, io.readline.bits.addr, io.readline.bits.rmask)
102    }
103  }
104
105  def dumpWrite() = {
106    when(io.write.valid) {
107      XSDebug(s"DataArray Write valid way_en: %x addr: %x\n",
108        io.write.bits.way_en, io.write.bits.addr)
109
110      (0 until DCacheBanks) map { r =>
111        XSDebug(s"cycle: $r data: %x wmask: %x\n",
112          io.write.bits.data(r), io.write.bits.wmask(r))
113      }
114    }
115  }
116
117  def dumpResp() = {
118    XSDebug(s"DataArray ReadeResp channel:\n")
119    (0 until DCacheBanks) map { r =>
120      XSDebug(s"cycle: $r data: %x\n", io.resp(r).raw_data)
121    }
122  }
123
124  def dump() = {
125    dumpRead
126    dumpWrite
127    dumpResp
128  }
129}
130
131class BankedDataArray(implicit p: Parameters) extends AbstractBankedDataArray {
132  def getECCFromEncWord(encWord: UInt) = {
133    require(encWord.getWidth == encWordBits)
134    encWord(encWordBits - 1, wordBits)
135  }
136
137  val ReduceReadlineConflict = false
138
139  io.write.ready := true.B
140
141  // wrap data rows of 8 ways
142  class DataSRAMBank(index: Int) extends Module {
143    val io = IO(new Bundle() {
144      val w = new Bundle() {
145        val en = Input(Bool())
146        val addr = Input(UInt())
147        val way_en = Input(UInt(DCacheWays.W))
148        val data = Input(UInt(DCacheSRAMRowBits.W))
149      }
150
151      val r = new Bundle() {
152        val en = Input(Bool())
153        val addr = Input(UInt())
154        val way_en = Input(UInt(DCacheWays.W))
155        val data = Output(UInt(DCacheSRAMRowBits.W))
156      }
157    })
158
159    val r_way_en_reg = RegNext(io.r.way_en)
160
161    // multiway data bank
162    val data_bank = Array.fill(DCacheWays) {
163      Module(new SRAMTemplate(
164        Bits(DCacheSRAMRowBits.W),
165        set = DCacheSets,
166        way = 1,
167        shouldReset = false,
168        holdRead = false,
169        singlePort = true
170      ))
171    }
172
173    for (w <- 0 until DCacheWays) {
174      val wen = io.w.en && io.w.way_en(w)
175      data_bank(w).io.w.req.valid := wen
176      data_bank(w).io.w.req.bits.apply(
177        setIdx = io.w.addr,
178        data = io.w.data,
179        waymask = 1.U
180      )
181      data_bank(w).io.r.req.valid := io.r.en
182      data_bank(w).io.r.req.bits.apply(setIdx = io.r.addr)
183    }
184
185    val half = nWays / 2
186    val data_read = data_bank.map(_.io.r.resp.data(0))
187    val data_left = Mux1H(r_way_en_reg.tail(half), data_read.take(half))
188    val data_right = Mux1H(r_way_en_reg.head(half), data_read.drop(half))
189
190    val sel_low = r_way_en_reg.tail(half).orR()
191    val row_data = Mux(sel_low, data_left, data_right)
192
193    io.r.data := row_data
194
195    def dump_r() = {
196      when(RegNext(io.r.en)) {
197        XSDebug("bank read addr %x way_en %x data %x\n",
198          RegNext(io.r.addr),
199          RegNext(io.r.way_en),
200          io.r.data
201        )
202      }
203    }
204
205    def dump_w() = {
206      when(io.w.en) {
207        XSDebug("bank write addr %x way_en %x data %x\n",
208          io.w.addr,
209          io.w.way_en,
210          io.w.data
211        )
212      }
213    }
214
215    def dump() = {
216      dump_w()
217      dump_r()
218    }
219  }
220
221  val data_banks = List.tabulate(DCacheBanks)(i => Module(new DataSRAMBank(i)))
222  val ecc_banks = List.fill(DCacheBanks)(Module(new SRAMTemplate(
223    Bits(eccBits.W),
224    set = DCacheSets,
225    way = DCacheWays,
226    shouldReset = false,
227    holdRead = false,
228    singlePort = true
229  )))
230
231  data_banks.map(_.dump())
232
233  val way_en = Wire(Vec(LoadPipelineWidth, io.read(0).bits.way_en.cloneType))
234  val way_en_reg = RegNext(way_en)
235  val set_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
236  val bank_addrs = Wire(Vec(LoadPipelineWidth, UInt()))
237
238  // read data_banks and ecc_banks
239  // for single port SRAM, do not allow read and write in the same cycle
240  val rwhazard = io.write.valid
241  val rrhazard = false.B // io.readline.valid
242  (0 until LoadPipelineWidth).map(rport_index => {
243    set_addrs(rport_index) := addr_to_dcache_set(io.read(rport_index).bits.addr)
244    bank_addrs(rport_index) := addr_to_dcache_bank(io.read(rport_index).bits.addr)
245
246    io.read(rport_index).ready := !(rwhazard || rrhazard)
247
248    // use way_en to select a way after data read out
249    assert(!(RegNext(io.read(rport_index).fire() && PopCount(io.read(rport_index).bits.way_en) > 1.U)))
250    way_en(rport_index) := io.read(rport_index).bits.way_en
251  })
252  io.readline.ready := !(rwhazard)
253
254  // read each bank, get bank result
255  val bank_result = Wire(Vec(DCacheBanks, new L1BankedDataReadResult()))
256  dontTouch(bank_result)
257  val read_bank_error = Wire(Vec(DCacheBanks, Bool()))
258  dontTouch(read_bank_error)
259  val rr_bank_conflict = bank_addrs(0) === bank_addrs(1) && io.read(0).valid && io.read(1).valid
260  val rrl_bank_conflict_0 = Wire(Bool())
261  val rrl_bank_conflict_1 = Wire(Bool())
262  if (ReduceReadlineConflict) {
263    rrl_bank_conflict_0 := io.read(0).valid && io.readline.valid && io.readline.bits.rmask(bank_addrs(0))
264    rrl_bank_conflict_1 := io.read(1).valid && io.readline.valid && io.readline.bits.rmask(bank_addrs(1))
265  } else {
266    rrl_bank_conflict_0 := io.read(0).valid && io.readline.valid
267    rrl_bank_conflict_1 := io.read(1).valid && io.readline.valid
268  }
269
270  val rw_bank_conflict_0 = io.read(0).valid && rwhazard
271  val rw_bank_conflict_1 = io.read(1).valid && rwhazard
272  val perf_multi_read = io.read(0).valid && io.read(1).valid
273  io.bank_conflict_fast(0) := rw_bank_conflict_0 || rrl_bank_conflict_0
274  io.bank_conflict_slow(0) := RegNext(io.bank_conflict_fast(0))
275  io.bank_conflict_fast(1) := rw_bank_conflict_1 || rrl_bank_conflict_1 || rr_bank_conflict
276  io.bank_conflict_slow(1) := RegNext(io.bank_conflict_fast(1))
277  XSPerfAccumulate("data_array_multi_read", perf_multi_read)
278  XSPerfAccumulate("data_array_rr_bank_conflict", rr_bank_conflict)
279  XSPerfAccumulate("data_array_rrl_bank_conflict_0", rrl_bank_conflict_0)
280  XSPerfAccumulate("data_array_rrl_bank_conflict_1", rrl_bank_conflict_1)
281  XSPerfAccumulate("data_array_rw_bank_conflict_0", rw_bank_conflict_0)
282  XSPerfAccumulate("data_array_rw_bank_conflict_1", rw_bank_conflict_1)
283  XSPerfAccumulate("data_array_access_total", io.read(0).valid +& io.read(1).valid)
284  XSPerfAccumulate("data_array_read_0", io.read(0).valid)
285  XSPerfAccumulate("data_array_read_1", io.read(1).valid)
286  XSPerfAccumulate("data_array_read_line", io.readline.valid)
287  XSPerfAccumulate("data_array_write", io.write.valid)
288
289  for (bank_index <- 0 until DCacheBanks) {
290    //     Set Addr & Read Way Mask
291    //
292    //      Pipe 0      Pipe 1
293    //        +           +
294    //        |           |
295    // +------+-----------+-------+
296    //  X                        X
297    //   X                      +------+ Bank Addr Match
298    //    +---------+----------+
299    //              |
300    //     +--------+--------+
301    //     |    Data Bank    |
302    //     +-----------------+
303    val bank_addr_matchs = WireInit(VecInit(List.tabulate(LoadPipelineWidth)(i => {
304      bank_addrs(i) === bank_index.U && io.read(i).valid
305    })))
306    val readline_match = Wire(Bool())
307    if (ReduceReadlineConflict) {
308      readline_match := io.readline.valid && io.readline.bits.rmask(bank_index)
309    } else {
310      readline_match := io.readline.valid
311    }
312    val bank_way_en = Mux(readline_match,
313      io.readline.bits.way_en,
314      Mux(bank_addr_matchs(0), way_en(0), way_en(1))
315    )
316    val bank_set_addr = Mux(readline_match,
317      addr_to_dcache_set(io.readline.bits.addr),
318      Mux(bank_addr_matchs(0), set_addrs(0), set_addrs(1))
319    )
320
321    // read raw data
322    val data_bank = data_banks(bank_index)
323    data_bank.io.r.en := bank_addr_matchs.asUInt.orR || readline_match
324    data_bank.io.r.way_en := bank_way_en
325    data_bank.io.r.addr := bank_set_addr
326    bank_result(bank_index).raw_data := data_bank.io.r.data
327
328    // read ECC
329    val ecc_bank = ecc_banks(bank_index)
330    ecc_bank.io.r.req.valid := bank_addr_matchs.asUInt.orR || readline_match
331    ecc_bank.io.r.req.bits.apply(setIdx = bank_set_addr)
332    bank_result(bank_index).ecc := Mux1H(RegNext(bank_way_en), ecc_bank.io.r.resp.data)
333
334    // use ECC to check error
335    val data = bank_result(bank_index).asECCData()
336    bank_result(bank_index).error := dcacheParameters.dataCode.decode(data).error
337    read_bank_error(bank_index) := bank_result(bank_index).error
338  }
339
340  // read result: expose banked read result
341  io.resp := bank_result
342
343  // error detection
344  // normal read ports
345  (0 until LoadPipelineWidth).map(rport_index => {
346    io.read_error(rport_index) := RegNext(io.read(rport_index).fire()) &&
347      read_bank_error(RegNext(bank_addrs(rport_index))) &&
348      !io.bank_conflict_slow(rport_index)
349  })
350  // readline port
351  io.readline_error := RegNext(io.readline.fire()) &&
352    VecInit((0 until DCacheBanks).map(i => io.resp(i).error)).asUInt().orR
353
354  // write data_banks & ecc_banks
355  val sram_waddr = addr_to_dcache_set(io.write.bits.addr)
356  for (bank_index <- 0 until DCacheBanks) {
357    // data write
358    val data_bank = data_banks(bank_index)
359    data_bank.io.w.en := io.write.valid && io.write.bits.wmask(bank_index)
360    data_bank.io.w.way_en := io.write.bits.way_en
361    data_bank.io.w.addr := sram_waddr
362    data_bank.io.w.data := io.write.bits.data(bank_index)
363
364    // ecc write
365    val ecc_bank = ecc_banks(bank_index)
366    ecc_bank.io.w.req.valid := io.write.valid && io.write.bits.wmask(bank_index)
367    ecc_bank.io.w.req.bits.apply(
368      setIdx = sram_waddr,
369      data = getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
370      waymask = io.write.bits.way_en
371    )
372    when(ecc_bank.io.w.req.valid) {
373      XSDebug("write in ecc sram: bank %x set %x data %x waymask %x\n",
374        bank_index.U,
375        sram_waddr,
376        getECCFromEncWord(cacheParams.dataCode.encode((io.write.bits.data(bank_index)))),
377        io.write.bits.way_en
378      );
379    }
380  }
381
382  // deal with customized cache op
383  require(nWays <= 32)
384  io.cacheOp.resp.bits := DontCare
385  val cacheOpShouldResp = WireInit(false.B)
386  val eccReadResult = Wire(Vec(DCacheBanks, UInt(eccBits.W)))
387  when(io.cacheOp.req.valid){
388    when (CacheInstrucion.isReadData(io.cacheOp.req.bits.opCode)) {
389      for (bank_index <- 0 until DCacheBanks) {
390        val data_bank = data_banks(bank_index)
391        data_bank.io.r.en := true.B
392        data_bank.io.r.way_en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
393        data_bank.io.r.addr := io.cacheOp.req.bits.index
394      }
395      cacheOpShouldResp := true.B
396    }
397	when (CacheInstrucion.isReadDataECC(io.cacheOp.req.bits.opCode)) {
398      for (bank_index <- 0 until DCacheBanks) {
399        val ecc_bank = ecc_banks(bank_index)
400		ecc_bank.io.r.req.valid := true.B
401		ecc_bank.io.r.req.bits.setIdx := io.cacheOp.req.bits.index
402	  }
403	  cacheOpShouldResp := true.B
404	}
405    when(CacheInstrucion.isWriteData(io.cacheOp.req.bits.opCode)){
406      for (bank_index <- 0 until DCacheBanks) {
407        val data_bank = data_banks(bank_index)
408        data_bank.io.w.en := true.B
409        data_bank.io.w.way_en := UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
410        data_bank.io.w.addr := io.cacheOp.req.bits.index
411        data_bank.io.w.data := io.cacheOp.req.bits.write_data_vec(bank_index)
412      }
413      cacheOpShouldResp := true.B
414    }
415    when(CacheInstrucion.isWriteDataECC(io.cacheOp.req.bits.opCode)){
416      for (bank_index <- 0 until DCacheBanks) {
417        val ecc_bank = ecc_banks(bank_index)
418        ecc_bank.io.w.req.valid := true.B
419        ecc_bank.io.w.req.bits.apply(
420          setIdx = io.cacheOp.req.bits.index,
421          data = io.cacheOp.req.bits.write_data_ecc,
422          waymask = UIntToOH(io.cacheOp.req.bits.wayNum(4, 0))
423        )
424      }
425      cacheOpShouldResp := true.B
426    }
427  }
428  io.cacheOp.resp.valid := RegNext(io.cacheOp.req.valid && cacheOpShouldResp)
429  for (bank_index <- 0 until DCacheBanks) {
430    io.cacheOp.resp.bits.read_data_vec(bank_index) := bank_result(bank_index).raw_data
431	eccReadResult(bank_index) := ecc_banks(bank_index).io.r.resp.data(RegNext(io.cacheOp.req.bits.wayNum(4, 0)))
432  }
433  io.cacheOp.resp.bits.read_data_ecc := Mux(io.cacheOp.resp.valid,
434    eccReadResult(RegNext(io.cacheOp.req.bits.bank_num)),
435    0.U
436  )
437}
438