1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import chisel3._ 20import chisel3.util._ 21import org.chipsalliance.cde.config.Parameters 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.mem._ 26import coupledL2.MemBackTypeMM 27import coupledL2.MemPageTypeNC 28import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes} 29import freechips.rocketchip.tilelink.{TLArbiter, TLBundleA, TLBundleD, TLClientNode, TLEdgeOut, TLMasterParameters, TLMasterPortParameters} 30import coupledL2.{MemBackTypeMMField, MemPageTypeNCField} 31 32class UncacheFlushBundle extends Bundle { 33 val valid = Output(Bool()) 34 val empty = Input(Bool()) 35} 36 37class UncacheEntry(implicit p: Parameters) extends DCacheBundle { 38 val cmd = UInt(M_SZ.W) 39 val addr = UInt(PAddrBits.W) 40 val vaddr = UInt(VAddrBits.W) 41 val data = UInt(XLEN.W) 42 val mask = UInt(DataBytes.W) 43 val id = UInt(uncacheIdxBits.W) 44 val nc = Bool() 45 val atomic = Bool() 46 val memBackTypeMM = Bool() 47 48 val resp_nderr = Bool() 49 50 /* NOTE: if it support the internal forward logic, here can uncomment */ 51 // val fwd_data = UInt(XLEN.W) 52 // val fwd_mask = UInt(DataBytes.W) 53 54 def set(x: UncacheWordReq): Unit = { 55 cmd := x.cmd 56 addr := x.addr 57 vaddr := x.vaddr 58 data := x.data 59 mask := x.mask 60 id := x.id 61 nc := x.nc 62 memBackTypeMM := x.memBackTypeMM 63 atomic := x.atomic 64 resp_nderr := false.B 65 // fwd_data := 0.U 66 // fwd_mask := 0.U 67 } 68 69 def update(x: TLBundleD): Unit = { 70 when(cmd === MemoryOpConstants.M_XRD) { 71 data := x.data 72 } 73 resp_nderr := x.denied || x.corrupt 74 } 75 76 // def update(forwardData: UInt, forwardMask: UInt): Unit = { 77 // fwd_data := forwardData 78 // fwd_mask := forwardMask 79 // } 80 81 def toUncacheWordResp(): UncacheWordResp = { 82 // val resp_fwd_data = VecInit((0 until DataBytes).map(j => 83 // Mux(fwd_mask(j), fwd_data(8*(j+1)-1, 8*j), data(8*(j+1)-1, 8*j)) 84 // )).asUInt 85 val resp_fwd_data = data 86 val r = Wire(new UncacheWordResp) 87 r := DontCare 88 r.data := resp_fwd_data 89 r.id := id 90 r.nderr := resp_nderr 91 r.nc := nc 92 r.is2lq := cmd === MemoryOpConstants.M_XRD 93 r.miss := false.B 94 r.replay := false.B 95 r.tag_error := false.B 96 r.error := false.B 97 r 98 } 99} 100 101class UncacheEntryState(implicit p: Parameters) extends DCacheBundle { 102 // valid (-> waitSame) -> inflight -> waitReturn 103 val valid = Bool() 104 val inflight = Bool() // uncache -> L2 105 val waitSame = Bool() 106 val waitReturn = Bool() // uncache -> LSQ 107 108 def init: Unit = { 109 valid := false.B 110 inflight := false.B 111 waitSame := false.B 112 waitReturn := false.B 113 } 114 115 def isValid(): Bool = valid 116 def isInflight(): Bool = inflight 117 def isWaitReturn(): Bool = waitReturn 118 def isWaitSame(): Bool = waitSame 119 def can2Uncache(): Bool = valid && !inflight && !waitSame && !waitReturn 120 def can2Lsq(): Bool = valid && waitReturn 121 122 def setValid(x: Bool): Unit = { valid := x} 123 def setInflight(x: Bool): Unit = { inflight := x} 124 def setWaitReturn(x: Bool): Unit = { waitReturn := x } 125 def setWaitSame(x: Bool): Unit = { waitSame := x} 126 127 def updateUncacheResp(): Unit = { 128 assert(inflight, "The request was not sent and a response was received") 129 inflight := false.B 130 waitReturn := true.B 131 } 132 def updateReturn(): Unit = { 133 valid := false.B 134 inflight := false.B 135 waitSame := false.B 136 waitReturn := false.B 137 } 138} 139 140class UncacheIO(implicit p: Parameters) extends DCacheBundle { 141 val hartId = Input(UInt()) 142 val enableOutstanding = Input(Bool()) 143 val flush = Flipped(new UncacheFlushBundle) 144 val lsq = Flipped(new UncacheWordIO) 145 val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO)) 146} 147 148// convert DCacheIO to TileLink 149// for Now, we only deal with TL-UL 150 151class Uncache()(implicit p: Parameters) extends LazyModule with HasXSParameter { 152 override def shouldBeInlined: Boolean = false 153 def idRange: Int = UncacheBufferSize 154 155 val clientParameters = TLMasterPortParameters.v1( 156 clients = Seq(TLMasterParameters.v1( 157 "uncache", 158 sourceId = IdRange(0, idRange) 159 )), 160 requestFields = Seq(MemBackTypeMMField(), MemPageTypeNCField()) 161 ) 162 val clientNode = TLClientNode(Seq(clientParameters)) 163 164 lazy val module = new UncacheImp(this) 165} 166 167/* Uncache Buffer */ 168class UncacheImp(outer: Uncache)extends LazyModuleImp(outer) 169 with HasTLDump 170 with HasXSParameter 171 with HasPerfEvents 172{ 173 private val INDEX_WIDTH = log2Up(UncacheBufferSize) 174 println(s"Uncahe Buffer Size: $UncacheBufferSize entries") 175 val io = IO(new UncacheIO) 176 177 val (bus, edge) = outer.clientNode.out.head 178 179 val req = io.lsq.req 180 val resp = io.lsq.resp 181 val mem_acquire = bus.a 182 val mem_grant = bus.d 183 val req_ready = WireInit(false.B) 184 185 // assign default values to output signals 186 bus.b.ready := false.B 187 bus.c.valid := false.B 188 bus.c.bits := DontCare 189 bus.d.ready := false.B 190 bus.e.valid := false.B 191 bus.e.bits := DontCare 192 io.lsq.req.ready := req_ready 193 io.lsq.resp.valid := false.B 194 io.lsq.resp.bits := DontCare 195 196 197 /****************************************************************** 198 * Data Structure 199 ******************************************************************/ 200 201 val entries = Reg(Vec(UncacheBufferSize, new UncacheEntry)) 202 val states = RegInit(VecInit(Seq.fill(UncacheBufferSize)(0.U.asTypeOf(new UncacheEntryState)))) 203 val fence = RegInit(Bool(), false.B) 204 val s_idle :: s_refill_req :: s_refill_resp :: s_send_resp :: Nil = Enum(4) 205 val uState = RegInit(s_idle) 206 207 def sizeMap[T <: Data](f: Int => T) = VecInit((0 until UncacheBufferSize).map(f)) 208 def isStore(e: UncacheEntry): Bool = e.cmd === MemoryOpConstants.M_XWR 209 def isStore(x: UInt): Bool = x === MemoryOpConstants.M_XWR 210 def addrMatch(x: UncacheEntry, y: UncacheWordReq): Bool = x.addr(PAddrBits - 1, 3) === y.addr(PAddrBits - 1, 3) 211 def addrMatch(x: UncacheWordReq, y: UncacheEntry): Bool = x.addr(PAddrBits - 1, 3) === y.addr(PAddrBits - 1, 3) 212 def addrMatch(x: UncacheEntry, y: UncacheEntry): Bool = x.addr(PAddrBits - 1, 3) === y.addr(PAddrBits - 1, 3) 213 def addrMatch(x: UInt, y: UInt): Bool = x(PAddrBits - 1, 3) === y(PAddrBits - 1, 3) 214 215 // drain buffer 216 val empty = Wire(Bool()) 217 val f1_needDrain = Wire(Bool()) 218 val do_uarch_drain = RegNext(f1_needDrain) 219 220 val q0_entry = Wire(new UncacheEntry) 221 val q0_canSentIdx = Wire(UInt(INDEX_WIDTH.W)) 222 val q0_canSent = Wire(Bool()) 223 224 225 /****************************************************************** 226 * uState for non-outstanding 227 ******************************************************************/ 228 229 switch(uState){ 230 is(s_idle){ 231 when(req.fire){ 232 uState := s_refill_req 233 } 234 } 235 is(s_refill_req){ 236 when(mem_acquire.fire){ 237 uState := s_refill_resp 238 } 239 } 240 is(s_refill_resp){ 241 when(mem_grant.fire){ 242 uState := s_send_resp 243 } 244 } 245 is(s_send_resp){ 246 when(resp.fire){ 247 uState := s_idle 248 } 249 } 250 } 251 252 253 /****************************************************************** 254 * Enter Buffer 255 * Version 0 (better timing) 256 * e0 judge: alloc/merge write vec 257 * e1 alloc 258 * 259 * Version 1 (better performance) 260 * solved in one cycle for achieving the original performance. 261 ******************************************************************/ 262 263 /** 264 TODO lyq: how to merge 265 1. same addr 266 2. same cmd 267 3. valid 268 FIXME lyq: not merge now due to the following issues 269 1. load cann't be merged 270 2. how to merge store and response precisely 271 */ 272 273 val e0_fire = req.fire 274 val e0_req_valid = req.valid 275 val e0_req = req.bits 276 /** 277 TODO lyq: block or wait or forward? 278 NOW: strict block by same address; otherwise: exhaustive consideration is needed. 279 - ld->ld wait 280 - ld->st forward 281 - st->ld forward 282 - st->st block 283 */ 284 val e0_existSame = sizeMap(j => e0_req_valid && states(j).isValid() && addrMatch(e0_req, entries(j))).asUInt.orR 285 val e0_invalidVec = sizeMap(i => !states(i).isValid()) 286 val (e0_allocIdx, e0_canAlloc) = PriorityEncoderWithFlag(e0_invalidVec) 287 val e0_alloc = e0_canAlloc && !e0_existSame && e0_fire 288 req_ready := e0_invalidVec.asUInt.orR && !e0_existSame && !do_uarch_drain 289 290 when (e0_alloc) { 291 entries(e0_allocIdx).set(e0_req) 292 states(e0_allocIdx).setValid(true.B) 293 294 // judge whether wait same block: e0 & q0 295 val waitSameVec = sizeMap(j => 296 e0_req_valid && states(j).isValid() && states(j).isInflight() && addrMatch(e0_req, entries(j)) 297 ) 298 val waitQ0 = q0_canSent && addrMatch(e0_req, q0_entry) 299 when (waitSameVec.reduce(_ || _) || waitQ0) { 300 states(e0_allocIdx).setWaitSame(true.B) 301 } 302 303 } 304 305 306 /****************************************************************** 307 * Uncache Req 308 * Version 0 (better timing) 309 * q0: choose which one is sent 310 * q0: sent 311 * 312 * Version 1 (better performance) 313 * solved in one cycle for achieving the original performance. 314 * NOTE: "Enter Buffer" & "Uncache Req" not a continuous pipeline, 315 * because there is no guarantee that mem_aquire will be always ready. 316 ******************************************************************/ 317 318 val q0_canSentVec = sizeMap(i => 319 (io.enableOutstanding || uState === s_refill_req) && 320 states(i).can2Uncache() 321 ) 322 val q0_res = PriorityEncoderWithFlag(q0_canSentVec) 323 q0_canSentIdx := q0_res._1 324 q0_canSent := q0_res._2 325 q0_entry := entries(q0_canSentIdx) 326 327 val size = PopCount(q0_entry.mask) 328 val (lgSize, legal) = PriorityMuxWithFlag(Seq( 329 1.U -> 0.U, 330 2.U -> 1.U, 331 4.U -> 2.U, 332 8.U -> 3.U 333 ).map(m => (size===m._1) -> m._2)) 334 assert(!(q0_canSent && !legal)) 335 336 val q0_load = edge.Get( 337 fromSource = q0_canSentIdx, 338 toAddress = q0_entry.addr, 339 lgSize = lgSize 340 )._2 341 342 val q0_store = edge.Put( 343 fromSource = q0_canSentIdx, 344 toAddress = q0_entry.addr, 345 lgSize = lgSize, 346 data = q0_entry.data, 347 mask = q0_entry.mask 348 )._2 349 350 val q0_isStore = q0_entry.cmd === MemoryOpConstants.M_XWR 351 352 mem_acquire.valid := q0_canSent 353 mem_acquire.bits := Mux(q0_isStore, q0_store, q0_load) 354 mem_acquire.bits.user.lift(MemBackTypeMM).foreach(_ := q0_entry.memBackTypeMM) 355 mem_acquire.bits.user.lift(MemPageTypeNC).foreach(_ := q0_entry.nc) 356 when(mem_acquire.fire){ 357 states(q0_canSentIdx).setInflight(true.B) 358 359 // q0 should judge whether wait same block 360 (0 until UncacheBufferSize).map(j => 361 when(states(j).isValid() && !states(j).isWaitReturn() && addrMatch(q0_entry, entries(j))){ 362 states(j).setWaitSame(true.B) 363 } 364 ) 365 } 366 367 368 /****************************************************************** 369 * Uncache Resp 370 ******************************************************************/ 371 372 val (_, _, refill_done, _) = edge.addr_inc(mem_grant) 373 374 mem_grant.ready := true.B 375 when (mem_grant.fire) { 376 val id = mem_grant.bits.source 377 entries(id).update(mem_grant.bits) 378 states(id).updateUncacheResp() 379 assert(refill_done, "Uncache response should be one beat only!") 380 381 // remove state of wait same block 382 (0 until UncacheBufferSize).map(j => 383 when(states(j).isValid() && states(j).isWaitSame() && addrMatch(entries(id), entries(j))){ 384 states(j).setWaitSame(false.B) 385 } 386 ) 387 } 388 389 390 /****************************************************************** 391 * Return to LSQ 392 ******************************************************************/ 393 394 val r0_canSentVec = sizeMap(i => states(i).can2Lsq()) 395 val (r0_canSentIdx, r0_canSent) = PriorityEncoderWithFlag(r0_canSentVec) 396 resp.valid := r0_canSent 397 resp.bits := entries(r0_canSentIdx).toUncacheWordResp() 398 when(resp.fire){ 399 states(r0_canSentIdx).updateReturn() 400 } 401 402 403 /****************************************************************** 404 * Buffer Flush 405 * 1. when io.flush.valid is true: drain store queue and ubuffer 406 * 2. when io.lsq.req.bits.atomic is true: not support temporarily 407 ******************************************************************/ 408 empty := !VecInit(states.map(_.isValid())).asUInt.orR 409 io.flush.empty := empty 410 411 412 /****************************************************************** 413 * Load Data Forward 414 * 415 * 0. ld in ldu pipeline 416 * f0: vaddr match, mask & data select, fast resp 417 * f1: paddr match, resp 418 * 419 * 1. ld in buffer (in "Enter Buffer") 420 * ld(en) -> st(in): ld entry.update, state.updateUncacheResp 421 * st(en) -> ld(in): ld entry.update, state.updateUncacheResp 422 * NOW: strict block by same address; there is no such forward. 423 * 424 ******************************************************************/ 425 426 val f0_validMask = sizeMap(i => isStore(entries(i)) && states(i).isValid()) 427 val f0_fwdMaskCandidates = VecInit(entries.map(e => e.mask)) 428 val f0_fwdDataCandidates = VecInit(entries.map(e => e.data)) 429 val f1_tagMismatchVec = Wire(Vec(LoadPipelineWidth, Bool())) 430 f1_needDrain := f1_tagMismatchVec.asUInt.orR && !empty 431 432 for ((forward, i) <- io.forward.zipWithIndex) { 433 val f0_fwdValid = forward.valid 434 val f1_fwdValid = RegNext(f0_fwdValid) 435 436 // f0 vaddr match 437 val f0_vtagMatches = sizeMap(w => addrMatch(entries(w).vaddr, forward.vaddr)) 438 val f0_validTagMatches = sizeMap(w => f0_vtagMatches(w) && f0_validMask(w) && f0_fwdValid) 439 // f0 select 440 val f0_fwdMask = shiftMaskToHigh( 441 forward.vaddr, 442 Mux1H(f0_validTagMatches, f0_fwdMaskCandidates) 443 ).asTypeOf(Vec(VDataBytes, Bool())) 444 val f0_fwdData = shiftDataToHigh( 445 forward.vaddr, 446 Mux1H(f0_validTagMatches, f0_fwdDataCandidates) 447 ).asTypeOf(Vec(VDataBytes, UInt(8.W))) 448 449 // f1 paddr match 450 val f1_fwdMask = RegEnable(f0_fwdMask, f0_fwdValid) 451 val f1_fwdData = RegEnable(f0_fwdData, f0_fwdValid) 452 // forward.paddr from dtlb, which is far from uncache 453 val f1_ptagMatches = sizeMap(w => addrMatch(RegEnable(entries(w).addr, f0_fwdValid), RegEnable(forward.paddr, f0_fwdValid))) 454 f1_tagMismatchVec(i) := sizeMap(w => 455 RegEnable(f0_vtagMatches(w), f0_fwdValid) =/= f1_ptagMatches(w) && RegEnable(f0_validMask(w), f0_fwdValid) && f1_fwdValid 456 ).asUInt.orR 457 when(f1_tagMismatchVec(i)) { 458 XSDebug("forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n", 459 f1_ptagMatches.asUInt, 460 RegEnable(f0_vtagMatches.asUInt, f0_fwdValid), 461 RegEnable(forward.vaddr, f0_fwdValid), 462 RegEnable(forward.paddr, f0_fwdValid) 463 ) 464 } 465 // f1 output 466 forward.addrInvalid := false.B // addr in ubuffer is always ready 467 forward.dataInvalid := false.B // data in ubuffer is always ready 468 forward.matchInvalid := f1_tagMismatchVec(i) // paddr / vaddr cam result does not match 469 for (j <- 0 until VDataBytes) { 470 forward.forwardMaskFast(j) := f0_fwdMask(j) 471 472 forward.forwardData(j) := f1_fwdData(j) 473 forward.forwardMask(j) := false.B 474 when(f1_fwdMask(j) && f1_fwdValid) { 475 forward.forwardMask(j) := true.B 476 } 477 } 478 479 } 480 481 482 /****************************************************************** 483 * Debug / Performance 484 ******************************************************************/ 485 486 /* Debug Counters */ 487 // print all input/output requests for debug purpose 488 // print req/resp 489 XSDebug(req.fire, "req cmd: %x addr: %x data: %x mask: %x\n", 490 req.bits.cmd, req.bits.addr, req.bits.data, req.bits.mask) 491 XSDebug(resp.fire, "data: %x\n", req.bits.data) 492 // print tilelink messages 493 XSDebug(mem_acquire.valid, "mem_acquire valid, ready=%d ", mem_acquire.ready) 494 mem_acquire.bits.dump(mem_acquire.valid) 495 496 XSDebug(mem_grant.fire, "mem_grant fire ") 497 mem_grant.bits.dump(mem_grant.fire) 498 499 /* Performance Counters */ 500 XSPerfAccumulate("uncache_memBackTypeMM", io.lsq.req.fire && io.lsq.req.bits.memBackTypeMM) 501 XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc) 502 XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc) 503 XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc) 504 XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc) 505 XSPerfAccumulate("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire) 506 XSPerfAccumulate("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR))) 507 XSPerfAccumulate("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec)) 508 509 val perfEvents = Seq( 510 ("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc), 511 ("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc), 512 ("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc), 513 ("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc), 514 ("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire), 515 ("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR))), 516 ("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec)) 517 ) 518 519 generatePerfEvent() 520 // End 521} 522