1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.cache._ 26import xiangshan.cache.{DCacheWordIO, DCacheLineIO, MemoryOpConstants} 27import xiangshan.mem._ 28import xiangshan.backend.rob.RobPtr 29 30class LQDataEntryWoPaddr(implicit p: Parameters) extends XSBundle { 31 val mask = UInt(8.W) 32 val data = UInt(XLEN.W) 33 val fwdMask = Vec(8, Bool()) 34} 35 36class LQDataEntry(implicit p: Parameters) extends LQDataEntryWoPaddr { 37 val paddr = UInt(PAddrBits.W) 38} 39 40// Data module define 41// These data modules are like SyncDataModuleTemplate, but support cam-like ops 42 43// load queue paddr module 44// 45// It supports 2 cam sources: 46// * st-ld violation addr cam 47// * data release addr cam 48class LQPaddrModule(numEntries: Int, numRead: Int, numWrite: Int, numWBanks: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters { 49 val io = IO(new Bundle { 50 // normal read/write ports 51 val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W))) 52 val rdata = Output(Vec(numRead, UInt((PAddrBits).W))) 53 val wen = Input(Vec(numWrite, Bool())) 54 val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W))) 55 val wdata = Input(Vec(numWrite, UInt((PAddrBits).W))) 56 // violation cam: hit if addr is in the same word 57 val violationMdata = Input(Vec(StorePipelineWidth, UInt((PAddrBits).W))) // addr 58 val violationMmask = Output(Vec(StorePipelineWidth, Vec(numEntries, Bool()))) // cam result mask 59 // release cam: hit if addr is in the same cacheline 60 val releaseMdata = Input(Vec(LoadPipelineWidth, UInt((PAddrBits).W))) 61 val releaseMmask = Output(Vec(LoadPipelineWidth, Vec(numEntries, Bool()))) 62 }) 63 64 require(isPow2(numWBanks)) 65 require(numWBanks >= 2) 66 67 val numEntryPerBank = numEntries / numWBanks 68 69 val data = Reg(Vec(numEntries, UInt((PAddrBits).W))) 70 71 // read ports 72 for (i <- 0 until numRead) { 73 io.rdata(i) := data(RegNext(io.raddr(i))) 74 } 75 76 // write ports 77 val waddr_dec = io.waddr.map(a => UIntToOH(a)) 78 def selectBankMask(in: UInt, bank: Int): UInt = { 79 in((bank + 1) * numEntryPerBank - 1, bank * numEntryPerBank) 80 } 81 for (bank <- 0 until numWBanks) { 82 // write ports 83 // s0: write to bank level buffer 84 val s0_bank_waddr_dec = waddr_dec.map(a => selectBankMask(a, bank)) 85 val s0_bank_write_en = io.wen.zip(s0_bank_waddr_dec).map(w => w._1 && w._2.orR) 86 s0_bank_waddr_dec.zipWithIndex.map(a => 87 a._1.suggestName("s0_bank_waddr_dec" + bank + "_" + a._2) 88 ) 89 s0_bank_write_en.zipWithIndex.map(a => 90 a._1.suggestName("s0_bank_write_en" + bank + "_" + a._2) 91 ) 92 // s1: write data to entries 93 val s1_bank_waddr_dec = s0_bank_waddr_dec.zip(s0_bank_write_en).map(w => RegEnable(w._1, w._2)) 94 val s1_bank_wen = RegNext(VecInit(s0_bank_write_en)) 95 val s1_wdata = io.wdata.zip(s0_bank_write_en).map(w => RegEnable(w._1, w._2)) 96 s1_bank_waddr_dec.zipWithIndex.map(a => 97 a._1.suggestName("s1_bank_waddr_dec" + bank + "_" + a._2) 98 ) 99 s1_bank_wen.zipWithIndex.map(a => 100 a._1.suggestName("s1_bank_wen" + bank + "_" + a._2) 101 ) 102 s1_wdata.zipWithIndex.map(a => 103 a._1.suggestName("s1_wdata" + bank + "_" + a._2) 104 ) 105 106 // entry write 107 for (entry <- 0 until numEntryPerBank) { 108 // write ports 109 val s1_entry_write_en_vec = s1_bank_wen.zip(s1_bank_waddr_dec).map(w => w._1 && w._2(entry)) 110 val s1_entry_write_en = VecInit(s1_entry_write_en_vec).asUInt.orR 111 val s1_entry_write_data = Mux1H(s1_entry_write_en_vec, s1_wdata) 112 when (s1_entry_write_en) { 113 data(bank * numEntryPerBank + entry) := s1_entry_write_data 114 } 115 s1_entry_write_en_vec.zipWithIndex.map(a => 116 a._1.suggestName("s1_entry_write_en_vec" + bank + "_" + entry + "_" + a._2) 117 ) 118 s1_entry_write_en.suggestName("s1_entry_write_en" + bank + "_" + entry) 119 s1_entry_write_data.suggestName("s1_entry_write_data" + bank + "_" + entry) 120 } 121 } 122 123 // content addressed match 124 for (i <- 0 until StorePipelineWidth) { 125 for (j <- 0 until numEntries) { 126 io.violationMmask(i)(j) := io.violationMdata(i)(PAddrBits-1, DCacheWordOffset) === data(j)(PAddrBits-1, DCacheWordOffset) 127 } 128 } 129 for (i <- 0 until LoadPipelineWidth) { 130 for (j <- 0 until numEntries) { 131 io.releaseMmask(i)(j) := io.releaseMdata(i)(PAddrBits-1, DCacheLineOffset) === data(j)(PAddrBits-1, DCacheLineOffset) 132 } 133 } 134 135 // DataModuleTemplate should not be used when there're any write conflicts 136 for (i <- 0 until numWrite) { 137 for (j <- i+1 until numWrite) { 138 assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j))) 139 } 140 } 141} 142 143// load queue load mask module 144class LQMaskModule(numEntries: Int, numRead: Int, numWrite: Int)(implicit p: Parameters) extends XSModule { 145 val io = IO(new Bundle { 146 val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W))) 147 val rdata = Output(Vec(numRead, UInt(8.W))) 148 val wen = Input(Vec(numWrite, Bool())) 149 val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W))) 150 val wdata = Input(Vec(numWrite, UInt(8.W))) 151 // st-ld violation check wmask compare 152 val violationMdata = Input(Vec(StorePipelineWidth, UInt(8.W))) // input 8-bit wmask 153 val violationMmask = Output(Vec(StorePipelineWidth, Vec(numEntries, Bool()))) // output wmask overlap vector 154 }) 155 156 val data = Reg(Vec(numEntries, UInt(8.W))) 157 158 // read ports 159 for (i <- 0 until numRead) { 160 io.rdata(i) := data(RegNext(io.raddr(i))) 161 } 162 163 // write ports 164 val waddr_dec = io.waddr.map(a => UIntToOH(a)) 165 for (j <- 0 until numEntries) { 166 val write_wen = io.wen.zip(waddr_dec).map(w => w._1 && w._2(j)) 167 when (VecInit(write_wen).asUInt.orR) { 168 data(j) := Mux1H(write_wen, io.wdata) 169 } 170 } 171 172 // st-ld violation check wmask compare 173 for (i <- 0 until StorePipelineWidth) { 174 for (j <- 0 until numEntries) { 175 io.violationMmask(i)(j) := (io.violationMdata(i) & data(j)).orR 176 } 177 } 178 179 // DataModuleTemplate should not be used when there're any write conflicts 180 for (i <- 0 until numWrite) { 181 for (j <- i+1 until numWrite) { 182 assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j))) 183 } 184 } 185} 186 187// SQDataModule is a wrapper of 8 bit MaskedSyncDataModuleTemplates 188// NOTE: SQDataModule is not used now 189// 190// It also contains: 191// * fwdMask, which is used to merge refill data and forwarded data 192// * word index extracted from paddr, which is used to select data from refill data (a cacheline) 193class LQDataModule(numEntries: Int, numRead: Int, numWrite: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters { 194 val io = IO(new Bundle { 195 // sync read 196 val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W))) 197 val rdata = Output(Vec(numRead, UInt(XLEN.W))) 198 199 // address indexed write 200 val wen = Input(Vec(numWrite, Bool())) 201 val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W))) 202 val wdata = Input(Vec(numWrite, UInt(XLEN.W))) 203 // forward mask needs to be recorded to merge data 204 val fwdMaskWdata = Input(Vec(numWrite, UInt(8.W))) 205 // refillOffBits - wordOffBits bits in paddr need to be stored in LQDataModule for refilling 206 val paddrWdata = Input(Vec(numWrite, UInt((PAddrBits).W))) 207 208 // masked write 209 val mwmask = Input(Vec(numEntries, Bool())) 210 val refillData = Input(UInt(l1BusDataWidth.W)) 211 }) 212 213 val data8 = Seq.fill(8)(Module(new MaskedBankedSyncDataModuleTemplate( 214 UInt(8.W), numEntries, numRead, numWrite, numMWrite = refillWords, numWBanks = LoadQueueNWriteBanks 215 ))) 216 val fwdMask = Reg(Vec(numEntries, UInt(8.W))) 217 val wordIndex = Reg(Vec(numEntries, UInt((refillOffBits - wordOffBits).W))) 218 219 // read ports 220 for (i <- 0 until numRead) { 221 for (j <- 0 until 8) { 222 data8(j).io.raddr(i) := io.raddr(i) 223 } 224 io.rdata(i) := VecInit((0 until 8).map(j => data8(j).io.rdata(i))).asUInt 225 } 226 227 // below is the write ports (with priorities) 228 for (i <- 0 until numWrite) { 229 // write to data8 230 for (j <- 0 until 8) { 231 data8(j).io.waddr(i) := io.waddr(i) 232 data8(j).io.wdata(i) := io.wdata(i)(8*(j+1)-1, 8*j) 233 data8(j).io.wen(i) := io.wen(i) 234 } 235 236 // write ctrl info 237 // TODO: optimize that 238 when (io.wen(i)) { 239 fwdMask(io.waddr(i)) := io.fwdMaskWdata(i) 240 } 241 when (io.wen(i)) { 242 wordIndex(io.waddr(i)) := get_word(io.paddrWdata(i)) 243 } 244 } 245 246 // write refilled data to data8 247 248 // select refill data 249 // split dcache result into words 250 val words = VecInit((0 until refillWords) map { i => io.refillData(DataBits * (i + 1) - 1, DataBits * i)}) 251 // select refill data according to wordIndex (paddr) 252 for (i <- 0 until 8) { 253 for (j <- 0 until refillWords) { 254 data8(i).io.mwdata(j) := words(j)(8*(i+1)-1, 8*i) 255 } 256 } 257 258 // gen refill wmask 259 for (j <- 0 until refillWords) { 260 for (k <- 0 until numEntries) { 261 val wordMatch = wordIndex(k) === j.U 262 for (i <- 0 until 8) { 263 data8(i).io.mwmask(j)(k) := wordMatch && io.mwmask(k) && !fwdMask(k)(i) 264 } 265 } 266 } 267 268 // DataModuleTemplate should not be used when there're any write conflicts 269 for (i <- 0 until numWrite) { 270 for (j <- i+1 until numWrite) { 271 assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j))) 272 } 273 } 274} 275 276// LoadQueueDataWrapper wraps: 277// * load queue paddrModule 278// * load queue maskModule 279// and their interconnect 280class LoadQueueDataWrapper(size: Int, wbNumWrite: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper { 281 val io = IO(new Bundle() { 282 val paddr = new Bundle() { 283 val wen = Vec(wbNumWrite, Input(Bool())) 284 val waddr = Input(Vec(wbNumWrite, UInt(log2Up(size).W))) 285 val wdata = Input(Vec(wbNumWrite, UInt(PAddrBits.W))) 286 } 287 val wb = new Bundle() { 288 val wen = Vec(wbNumWrite, Input(Bool())) 289 val waddr = Input(Vec(wbNumWrite, UInt(log2Up(size).W))) 290 val wdata = Input(Vec(wbNumWrite, UInt(8.W))) 291 } 292 val uncache = new Bundle() { 293 val raddr = Input(UInt(log2Up(size).W)) 294 val rdata = Output(new LQDataEntry) 295 } 296 // st-ld violation query, word level cam 297 val violation = Vec(StorePipelineWidth, new Bundle() { 298 val paddr = Input(UInt(PAddrBits.W)) 299 val mask = Input(UInt(8.W)) 300 val violationMask = Output(Vec(size, Bool())) 301 }) 302 // ld-ld violation query, cache line level cam 303 val release_violation = Vec(LoadPipelineWidth, new Bundle() { 304 val paddr = Input(UInt(PAddrBits.W)) 305 val match_mask = Output(Vec(size, Bool())) 306 // if ld-ld violation does happened, we replay from the elder load 307 }) 308 val debug = Output(Vec(size, new LQDataEntry)) 309 310 def wbWrite(channel: Int, waddr: UInt, wdata: UInt): Unit = { 311 require(channel < wbNumWrite && wbNumWrite >= 0) 312 // need extra "this.wb(channel).wen := true.B" 313 this.wb.waddr(channel) := waddr 314 this.wb.wdata(channel) := wdata 315 } 316 }) 317 318 // data module 319 val paddrModule = Module(new LQPaddrModule(size, numRead = 1, numWrite = LoadPipelineWidth, numWBanks = LoadQueueNWriteBanks)) 320 val maskModule = Module(new LQMaskModule(size, numRead = 1, numWrite = LoadPipelineWidth)) 321 322 // read port for uncache 323 paddrModule.io.raddr(0) := io.uncache.raddr 324 maskModule.io.raddr(0) := io.uncache.raddr 325 326 io.uncache.rdata.paddr := paddrModule.io.rdata(0) 327 io.uncache.rdata.mask := maskModule.io.rdata(0) 328 io.uncache.rdata.data := DontCare 329 io.uncache.rdata.fwdMask := DontCare 330 331 // write mask and paddr 332 // write port 0 -> wbNumWrite-1 333 (0 until wbNumWrite).map(i => { 334 paddrModule.io.wen(i) := false.B 335 maskModule.io.wen(i) := false.B 336 337 maskModule.io.waddr(i) := io.wb.waddr(i) 338 maskModule.io.wdata(i) := io.wb.wdata(i) 339 340 when(io.wb.wen(i)){ 341 maskModule.io.wen(i) := true.B 342 } 343 344 paddrModule.io.wen(i) := io.paddr.wen(i) 345 paddrModule.io.waddr(i) := io.paddr.waddr(i) 346 paddrModule.io.wdata(i) := io.paddr.wdata(i) 347 }) 348 349 // st-ld mem access violation check, gen violationMask 350 (0 until StorePipelineWidth).map(i => { 351 paddrModule.io.violationMdata(i) := io.violation(i).paddr 352 maskModule.io.violationMdata(i) := io.violation(i).mask 353 io.violation(i).violationMask := (paddrModule.io.violationMmask(i).asUInt & maskModule.io.violationMmask(i).asUInt).asBools 354 }) 355 356 // ld-ld mem access violation check, gen violationMask (cam match mask) 357 (0 until LoadPipelineWidth).map(i => { 358 paddrModule.io.releaseMdata(i) := io.release_violation(i).paddr 359 io.release_violation(i).match_mask := paddrModule.io.releaseMmask(i) 360 }) 361 362 // debug data read 363 io.debug := DontCare 364} 365