1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15* 16* 17* Acknowledgement 18* 19* This implementation is inspired by several key papers: 20* [1] Binh Pham, Viswanathan Vaidyanathan, Aamer Jaleel, and Abhishek Bhattacharjee. "[Colt: Coalesced large-reach 21* tlbs.](https://doi.org/10.1109/MICRO.2012.32)" 45th Annual IEEE/ACM International Symposium on Microarchitecture 22* (MICRO). 2012. 23***************************************************************************************/ 24 25package xiangshan.cache.mmu 26 27import org.chipsalliance.cde.config.Parameters 28import chisel3._ 29import chisel3.util._ 30import difftest._ 31import freechips.rocketchip.util.SRAMAnnotation 32import xiangshan._ 33import utils._ 34import utility._ 35import xiangshan.backend.fu.{PMPChecker, PMPReqBundle, PMPConfig => XSPMPConfig} 36import xiangshan.backend.rob.RobPtr 37import xiangshan.backend.fu.util.HasCSRConst 38import freechips.rocketchip.rocket.PMPConfig 39 40/** TLB module 41 * support block request and non-block request io at the same time 42 * return paddr at next cycle, then go for pmp/pma check 43 * @param Width: The number of requestors 44 * @param Block: Blocked or not for each requestor ports 45 * @param q: TLB Parameters, like entry number, each TLB has its own parameters 46 * @param p: XiangShan Paramemters, like XLEN 47 */ 48 49class TLB(Width: Int, nRespDups: Int = 1, Block: Seq[Boolean], q: TLBParameters)(implicit p: Parameters) extends TlbModule 50 with HasCSRConst 51 with HasPerfEvents 52{ 53 val io = IO(new TlbIO(Width, nRespDups, q)) 54 55 val req = io.requestor.map(_.req) 56 val resp = io.requestor.map(_.resp) 57 val ptw = io.ptw 58 val pmp = io.pmp 59 val refill_to_mem = io.refill_to_mem 60 61 /** Sfence.vma & Svinval 62 * Sfence.vma will 1. flush old entries 2. flush inflight 3. flush pipe 63 * Svinval will 1. flush old entries 2. flush inflight 64 * So, Svinval will not flush pipe, which means 65 * it should not drop reqs from pipe and should return right resp 66 */ 67 val sfence = DelayN(io.sfence, q.fenceDelay) 68 val csr = DelayN(io.csr, q.fenceDelay) 69 70 val flush_mmu = sfence.valid || csr.satp.changed || csr.vsatp.changed || csr.hgatp.changed 71 val mmu_flush_pipe = sfence.valid && sfence.bits.flushPipe // for svinval, won't flush pipe 72 val flush_pipe = io.flushPipe 73 val redirect = io.redirect 74 val EffectiveVa = Wire(Vec(Width, UInt(XLEN.W))) 75 val req_in = req 76 val req_out = Reg(Vec(Width, new TlbReq)) 77 for (i <- 0 until Width) { 78 when (req(i).fire) { 79 req_out(i) := req(i).bits 80 req_out(i).fullva := EffectiveVa(i) 81 } 82 } 83 val req_out_v = (0 until Width).map(i => ValidHold(req_in(i).fire && !req_in(i).bits.kill, resp(i).fire, flush_pipe(i))) 84 85 val isHyperInst = (0 until Width).map(i => req_out_v(i) && req_out(i).hyperinst) 86 87 // ATTENTION: csr and flush from backend are delayed. csr should not be later than flush. 88 // because, csr will influence tlb behavior. 89 val ifetch = if (q.fetchi) true.B else false.B 90 val mode_tmp = if (q.useDmode) csr.priv.dmode else csr.priv.imode 91 val mode = (0 until Width).map(i => Mux(isHyperInst(i), csr.priv.spvp, mode_tmp)) 92 val virt_in = csr.priv.virt 93 val virt_out = req.map(a => RegEnable(csr.priv.virt, a.fire)) 94 val sum = (0 until Width).map(i => Mux(virt_out(i) || isHyperInst(i), csr.priv.vsum, csr.priv.sum)) 95 val mxr = (0 until Width).map(i => Mux(virt_out(i) || isHyperInst(i), csr.priv.vmxr || csr.priv.mxr, csr.priv.mxr)) 96 val req_in_s2xlate = (0 until Width).map(i => MuxCase(noS2xlate, Seq( 97 (!(virt_in || req_in(i).bits.hyperinst)) -> noS2xlate, 98 (csr.vsatp.mode =/= 0.U && csr.hgatp.mode =/= 0.U) -> allStage, 99 (csr.vsatp.mode === 0.U) -> onlyStage2, 100 (csr.hgatp.mode === 0.U) -> onlyStage1 101 ))) 102 val req_out_s2xlate = (0 until Width).map(i => MuxCase(noS2xlate, Seq( 103 (!(virt_out(i) || isHyperInst(i))) -> noS2xlate, 104 (csr.vsatp.mode =/= 0.U && csr.hgatp.mode =/= 0.U) -> allStage, 105 (csr.vsatp.mode === 0.U) -> onlyStage2, 106 (csr.hgatp.mode === 0.U) -> onlyStage1 107 ))) 108 val need_gpa = RegInit(false.B) 109 val need_gpa_wire = WireInit(false.B) 110 val need_gpa_robidx = Reg(new RobPtr) 111 val need_gpa_vpn = Reg(UInt(vpnLen.W)) 112 val resp_gpa_gvpn = Reg(UInt(ptePPNLen.W)) 113 val resp_gpa_refill = RegInit(false.B) 114 val resp_s1_level = RegInit(0.U(log2Up(Level + 1).W)) 115 val resp_s1_isLeaf = RegInit(false.B) 116 val resp_s1_isFakePte = RegInit(false.B) 117 val hasGpf = Wire(Vec(Width, Bool())) 118 119 val Sv39Enable = csr.satp.mode === 8.U 120 val Sv48Enable = csr.satp.mode === 9.U 121 val Sv39vsEnable = csr.vsatp.mode === 8.U 122 val Sv48vsEnable = csr.vsatp.mode === 9.U 123 val Sv39x4Enable = csr.hgatp.mode === 8.U 124 val Sv48x4Enable = csr.hgatp.mode === 9.U 125 126 val vmEnable = (0 until Width).map(i => !(isHyperInst(i) || virt_out(i)) && ( 127 if (EnbaleTlbDebug) (Sv39Enable || Sv48Enable) 128 else (Sv39Enable || Sv48Enable) && (mode(i) < ModeM)) 129 ) 130 val s2xlateEnable = (0 until Width).map(i => 131 (isHyperInst(i) || virt_out(i)) && 132 (Sv39vsEnable || Sv48vsEnable || Sv39x4Enable || Sv48x4Enable) && 133 (mode(i) < ModeM) 134 ) 135 val portTranslateEnable = (0 until Width).map(i => (vmEnable(i) || s2xlateEnable(i)) && RegEnable(!req(i).bits.no_translate, req(i).valid)) 136 137 // pre fault: check fault before real do translate 138 val prepf = WireInit(VecInit(Seq.fill(Width)(false.B))) 139 val pregpf = WireInit(VecInit(Seq.fill(Width)(false.B))) 140 val preaf = WireInit(VecInit(Seq.fill(Width)(false.B))) 141 val premode = (0 until Width).map(i => Mux(req_in(i).bits.hyperinst, csr.priv.spvp, mode_tmp)) 142 for (i <- 0 until Width) { 143 resp(i).bits.fullva := RegEnable(EffectiveVa(i), req(i).valid) 144 } 145 val prevmEnable = (0 until Width).map(i => !(virt_in || req_in(i).bits.hyperinst) && ( 146 if (EnbaleTlbDebug) (Sv39Enable || Sv48Enable) 147 else (Sv39Enable || Sv48Enable) && (premode(i) < ModeM)) 148 ) 149 val pres2xlateEnable = (0 until Width).map(i => 150 (virt_in || req_in(i).bits.hyperinst) && 151 (Sv39vsEnable || Sv48vsEnable || Sv39x4Enable || Sv48x4Enable) && 152 (premode(i) < ModeM) 153 ) 154 155 (0 until Width).foreach{i => 156 157 val pmm = WireInit(0.U(2.W)) 158 159 when (ifetch || req(i).bits.hlvx) { 160 pmm := 0.U 161 } .elsewhen (premode(i) === ModeM) { 162 pmm := csr.pmm.mseccfg 163 } .elsewhen (!(virt_in || req_in(i).bits.hyperinst) && premode(i) === ModeS) { 164 pmm := csr.pmm.menvcfg 165 } .elsewhen ((virt_in || req_in(i).bits.hyperinst) && premode(i) === ModeS) { 166 pmm := csr.pmm.henvcfg 167 } .elsewhen (req_in(i).bits.hyperinst && csr.priv.imode === ModeU) { 168 pmm := csr.pmm.hstatus 169 } .elsewhen (premode(i) === ModeU) { 170 pmm := csr.pmm.senvcfg 171 } 172 173 when (prevmEnable(i) || (pres2xlateEnable(i) && csr.vsatp.mode =/= 0.U)) { 174 when (pmm === PMLEN7) { 175 EffectiveVa(i) := SignExt(req_in(i).bits.fullva(56, 0), XLEN) 176 } .elsewhen (pmm === PMLEN16) { 177 EffectiveVa(i) := SignExt(req_in(i).bits.fullva(47, 0), XLEN) 178 } .otherwise { 179 EffectiveVa(i) := req_in(i).bits.fullva 180 } 181 } .otherwise { 182 when (pmm === PMLEN7) { 183 EffectiveVa(i) := ZeroExt(req_in(i).bits.fullva(56, 0), XLEN) 184 } .elsewhen (pmm === PMLEN16) { 185 EffectiveVa(i) := ZeroExt(req_in(i).bits.fullva(47, 0), XLEN) 186 } .otherwise { 187 EffectiveVa(i) := req_in(i).bits.fullva 188 } 189 } 190 191 val pf48 = SignExt(EffectiveVa(i)(47, 0), XLEN) =/= EffectiveVa(i) 192 val pf39 = SignExt(EffectiveVa(i)(38, 0), XLEN) =/= EffectiveVa(i) 193 val gpf48 = EffectiveVa(i)(XLEN - 1, 48 + 2) =/= 0.U 194 val gpf39 = EffectiveVa(i)(XLEN - 1, 39 + 2) =/= 0.U 195 val af = EffectiveVa(i)(XLEN - 1, PAddrBits) =/= 0.U 196 when (req(i).valid && req(i).bits.checkfullva) { 197 when (prevmEnable(i) || pres2xlateEnable(i)) { 198 when (req_in_s2xlate(i) === onlyStage2) { 199 when (Sv48x4Enable) { 200 pregpf(i) := gpf48 201 } .elsewhen (Sv39x4Enable) { 202 pregpf(i) := gpf39 203 } 204 } .elsewhen (req_in_s2xlate(i) === onlyStage1 || req_in_s2xlate(i) === allStage) { 205 when (Sv48vsEnable) { 206 prepf(i) := pf48 207 } .elsewhen (Sv39vsEnable) { 208 prepf(i) := pf39 209 } 210 } .otherwise { // noS2xlate 211 when (Sv48Enable) { 212 prepf(i) := pf48 213 } .elsewhen (Sv39Enable) { 214 prepf(i) := pf39 215 } 216 } 217 } .otherwise { 218 preaf(i) := af 219 } 220 } 221 } 222 223 val refill = ptw.resp.fire && !(ptw.resp.bits.getGpa) && !need_gpa && !need_gpa_wire && !flush_mmu 224 // prevent ptw refill when: 1) it's a getGpa request; 2) l1tlb is in need_gpa state; 3) mmu is being flushed. 225 226 refill_to_mem := DontCare 227 val entries = Module(new TlbStorageWrapper(Width, q, nRespDups)) 228 entries.io.base_connect(sfence, csr, csr.satp) 229 if (q.outReplace) { io.replace <> entries.io.replace } 230 for (i <- 0 until Width) { 231 entries.io.r_req_apply(io.requestor(i).req.valid, get_pn(req_in(i).bits.vaddr), i, req_in_s2xlate(i)) 232 entries.io.w_apply(refill, ptw.resp.bits) 233 // TODO: RegNext enable:req.valid 234 resp(i).bits.debug.isFirstIssue := RegEnable(req(i).bits.debug.isFirstIssue, req(i).valid) 235 resp(i).bits.debug.robIdx := RegEnable(req(i).bits.debug.robIdx, req(i).valid) 236 } 237 238 // read TLB, get hit/miss, paddr, perm bits 239 val readResult = (0 until Width).map(TLBRead(_)) 240 val hitVec = readResult.map(_._1) 241 val missVec = readResult.map(_._2) 242 val pmp_addr = readResult.map(_._3) 243 val perm = readResult.map(_._4) 244 val g_perm = readResult.map(_._5) 245 val pbmt = readResult.map(_._6) 246 val g_pbmt = readResult.map(_._7) 247 // check pmp use paddr (for timing optization, use pmp_addr here) 248 // check permisson 249 (0 until Width).foreach{i => 250 val noTranslateReg = RegNext(req(i).bits.no_translate) 251 val addr = Mux(noTranslateReg, req(i).bits.pmp_addr, pmp_addr(i)) 252 pmp_check(addr, req_out(i).size, req_out(i).cmd, noTranslateReg, i) 253 for (d <- 0 until nRespDups) { 254 pbmt_check(i, d, pbmt(i)(d), g_pbmt(i)(d), req_out_s2xlate(i)) 255 perm_check(perm(i)(d), req_out(i).cmd, i, d, g_perm(i)(d), req_out(i).hlvx, req_out_s2xlate(i), prepf(i), pregpf(i), preaf(i)) 256 } 257 hasGpf(i) := hitVec(i) && (resp(i).bits.excp(0).gpf.ld || resp(i).bits.excp(0).gpf.st || resp(i).bits.excp(0).gpf.instr) 258 } 259 260 // handle block or non-block io 261 // for non-block io, just return the above result, send miss to ptw 262 // for block io, hold the request, send miss to ptw, 263 // when ptw back, return the result 264 (0 until Width) foreach {i => 265 if (Block(i)) handle_block(i) 266 else handle_nonblock(i) 267 } 268 io.ptw.resp.ready := true.B 269 270 /************************ main body above | method/log/perf below ****************************/ 271 def TLBRead(i: Int) = { 272 val (e_hit, e_ppn, e_perm, e_g_perm, e_s2xlate, e_pbmt, e_g_pbmt) = entries.io.r_resp_apply(i) 273 val (p_hit, p_ppn, p_pbmt, p_perm, p_gvpn, p_g_pbmt, p_g_perm, p_s2xlate, p_s1_level, p_s1_isLeaf, p_s1_isFakePte) = ptw_resp_bypass(get_pn(req_in(i).bits.vaddr), req_in_s2xlate(i)) 274 val enable = portTranslateEnable(i) 275 val isOnlys2xlate = req_out_s2xlate(i) === onlyStage2 276 val need_gpa_vpn_hit = need_gpa_vpn === get_pn(req_out(i).vaddr) 277 val isitlb = TlbCmd.isExec(req_out(i).cmd) 278 val isPrefetch = req_out(i).isPrefetch 279 val currentRedirect = req_out(i).debug.robIdx.needFlush(redirect) 280 val lastCycleRedirect = req_out(i).debug.robIdx.needFlush(RegNext(redirect)) 281 282 when (!isitlb && need_gpa_robidx.needFlush(redirect) || isitlb && flush_pipe(i)){ 283 need_gpa := false.B 284 resp_gpa_refill := false.B 285 need_gpa_vpn := 0.U 286 }.elsewhen (req_out_v(i) && !p_hit && !(resp_gpa_refill && need_gpa_vpn_hit) && !isOnlys2xlate && hasGpf(i) && need_gpa === false.B && !io.requestor(i).req_kill && !isPrefetch && !currentRedirect && !lastCycleRedirect) { 287 need_gpa_wire := true.B 288 need_gpa := true.B 289 need_gpa_vpn := get_pn(req_out(i).vaddr) 290 resp_gpa_refill := false.B 291 need_gpa_robidx := req_out(i).debug.robIdx 292 }.elsewhen (ptw.resp.fire && need_gpa && need_gpa_vpn === ptw.resp.bits.getVpn(need_gpa_vpn)) { 293 resp_gpa_gvpn := Mux(ptw.resp.bits.s2xlate === onlyStage2, ptw.resp.bits.s2.entry.tag, ptw.resp.bits.s1.genGVPN(need_gpa_vpn)) 294 resp_s1_level := ptw.resp.bits.s1.entry.level.get 295 resp_s1_isLeaf := ptw.resp.bits.s1.isLeaf() 296 resp_s1_isFakePte := ptw.resp.bits.s1.isFakePte() 297 resp_gpa_refill := true.B 298 } 299 300 when (req_out_v(i) && hasGpf(i) && resp_gpa_refill && need_gpa_vpn_hit){ 301 need_gpa := false.B 302 } 303 304 val hit = e_hit || p_hit 305 val miss = (!hit && enable) || hasGpf(i) && !p_hit && !(resp_gpa_refill && need_gpa_vpn_hit) && !isOnlys2xlate && !isPrefetch && !lastCycleRedirect 306 hit.suggestName(s"hit_read_${i}") 307 miss.suggestName(s"miss_read_${i}") 308 309 val vaddr = SignExt(req_out(i).vaddr, PAddrBits) 310 resp(i).bits.miss := miss 311 resp(i).bits.ptwBack := ptw.resp.fire 312 resp(i).bits.memidx := RegEnable(req_in(i).bits.memidx, req_in(i).valid) 313 resp(i).bits.fastMiss := !hit && enable 314 315 val ppn = WireInit(VecInit(Seq.fill(nRespDups)(0.U(ppnLen.W)))) 316 val pbmt = WireInit(VecInit(Seq.fill(nRespDups)(0.U(ptePbmtLen.W)))) 317 val perm = WireInit(VecInit(Seq.fill(nRespDups)(0.U.asTypeOf(new TlbPermBundle)))) 318 val gvpn = WireInit(VecInit(Seq.fill(nRespDups)(0.U(ptePPNLen.W)))) 319 val level = WireInit(VecInit(Seq.fill(nRespDups)(0.U(log2Up(Level + 1).W)))) 320 val isLeaf = WireInit(VecInit(Seq.fill(nRespDups)(false.B))) 321 val isFakePte = WireInit(VecInit(Seq.fill(nRespDups)(false.B))) 322 val g_pbmt = WireInit(VecInit(Seq.fill(nRespDups)(0.U(ptePbmtLen.W)))) 323 val g_perm = WireInit(VecInit(Seq.fill(nRespDups)(0.U.asTypeOf(new TlbPermBundle)))) 324 val r_s2xlate = WireInit(VecInit(Seq.fill(nRespDups)(0.U(2.W)))) 325 for (d <- 0 until nRespDups) { 326 ppn(d) := Mux(p_hit, p_ppn, e_ppn(d)) 327 pbmt(d) := Mux(p_hit, p_pbmt, e_pbmt(d)) 328 perm(d) := Mux(p_hit, p_perm, e_perm(d)) 329 gvpn(d) := Mux(p_hit, p_gvpn, resp_gpa_gvpn) 330 level(d) := Mux(p_hit, p_s1_level, resp_s1_level) 331 isLeaf(d) := Mux(p_hit, p_s1_isLeaf, resp_s1_isLeaf) 332 isFakePte(d) := Mux(p_hit, p_s1_isFakePte, resp_s1_isFakePte) 333 g_pbmt(d) := Mux(p_hit, p_g_pbmt, e_g_pbmt(d)) 334 g_perm(d) := Mux(p_hit, p_g_perm, e_g_perm(d)) 335 r_s2xlate(d) := Mux(p_hit, p_s2xlate, e_s2xlate(d)) 336 val paddr = Cat(ppn(d), get_off(req_out(i).vaddr)) 337 val vpn_idx = Mux1H(Seq( 338 (isFakePte(d) && csr.vsatp.mode === Sv39) -> 2.U, 339 (isFakePte(d) && csr.vsatp.mode === Sv48) -> 3.U, 340 (!isFakePte(d)) -> (level(d) - 1.U), 341 )) 342 // We use `fullva` here when `isLeaf`, in order to cope with the situation of an unaligned load/store cross page 343 // for example, a `ld` instruction on address 0x81000ffb will be splited into two loads 344 // 1. ld 0x81000ff8. vaddr = 0x81000ff8, fullva = 0x80000ffb 345 // 2. ld 0x81001000. vaddr = 0x81001000, fullva = 0x80000ffb 346 // When load 1 trigger a guest page fault, we should use offset of fullva when generate gpaddr 347 // and when load 2 trigger a guest page fault, we should just use offset of vaddr(all zero). 348 // Also, when onlyS2, if crosspage, gpaddr = vaddr(start address of a new page), else gpaddr = fullva(original vaddr) 349 // By the way, frontend handles the cross page instruction fetch by itself, so TLB doesn't need to do anything extra. 350 // Also, the fullva of iTLB is not used and always zero. crossPageVaddr should never use fullva in iTLB. 351 val crossPageVaddr = Mux(isitlb || req_out(i).fullva(12) =/= vaddr(12), vaddr, req_out(i).fullva) 352 val gpaddr_offset = Mux(isLeaf(d), get_off(crossPageVaddr), Cat(getVpnn(get_pn(crossPageVaddr), vpn_idx), 0.U(log2Up(XLEN/8).W))) 353 val gpaddr = Cat(gvpn(d), gpaddr_offset) 354 resp(i).bits.paddr(d) := Mux(enable, paddr, vaddr) 355 resp(i).bits.gpaddr(d) := Mux(r_s2xlate(d) === onlyStage2, crossPageVaddr, gpaddr) 356 } 357 358 XSDebug(req_out_v(i), p"(${i.U}) hit:${hit} miss:${miss} ppn:${Hexadecimal(ppn(0))} perm:${perm(0)}\n") 359 360 val pmp_paddr = resp(i).bits.paddr(0) 361 362 (hit, miss, pmp_paddr, perm, g_perm, pbmt, g_pbmt) 363 } 364 365 def getVpnn(vpn: UInt, idx: UInt): UInt = { 366 MuxLookup(idx, 0.U)(Seq( 367 0.U -> vpn(vpnnLen - 1, 0), 368 1.U -> vpn(vpnnLen * 2 - 1, vpnnLen), 369 2.U -> vpn(vpnnLen * 3 - 1, vpnnLen * 2), 370 3.U -> vpn(vpnnLen * 4 - 1, vpnnLen * 3)) 371 ) 372 } 373 374 def pmp_check(addr: UInt, size: UInt, cmd: UInt, noTranslate: Bool, idx: Int): Unit = { 375 pmp(idx).valid := resp(idx).valid || noTranslate 376 pmp(idx).bits.addr := addr 377 pmp(idx).bits.size := size 378 pmp(idx).bits.cmd := cmd 379 } 380 381 def pbmt_check(idx: Int, d: Int, pbmt: UInt, g_pbmt: UInt, s2xlate: UInt):Unit = { 382 val onlyS1 = s2xlate === onlyStage1 || s2xlate === noS2xlate 383 val pbmtRes = pbmt 384 val gpbmtRes = g_pbmt 385 val res = MuxLookup(s2xlate, 0.U)(Seq( 386 onlyStage1 -> pbmtRes, 387 onlyStage2 -> gpbmtRes, 388 allStage -> Mux(pbmtRes =/= 0.U, pbmtRes, gpbmtRes), 389 noS2xlate -> pbmtRes 390 )) 391 resp(idx).bits.pbmt(d) := Mux(portTranslateEnable(idx), res, 0.U) 392 } 393 394 // for timing optimization, pmp check is divided into dynamic and static 395 def perm_check(perm: TlbPermBundle, cmd: UInt, idx: Int, nDups: Int, g_perm: TlbPermBundle, hlvx: Bool, s2xlate: UInt, prepf: Bool = false.B, pregpf: Bool = false.B, preaf: Bool = false.B) = { 396 // dynamic: superpage (or full-connected reg entries) -> check pmp when translation done 397 // static: 4K pages (or sram entries) -> check pmp with pre-checked results 398 val hasS2xlate = s2xlate =/= noS2xlate 399 val onlyS1 = s2xlate === onlyStage1 400 val onlyS2 = s2xlate === onlyStage2 401 val allS2xlate = s2xlate === allStage 402 // noS2xlate || onlyS1 -> perm.af 403 // onlyS2 -> g_perm.af 404 // allS2xlate -> perm.af || g_perm.af 405 val af = (!onlyS2 && perm.af) || ((onlyS2 || allS2xlate) && g_perm.af) 406 407 // Stage 1 perm check 408 val pf = perm.pf 409 val isLd = TlbCmd.isRead(cmd) && !TlbCmd.isAmo(cmd) 410 val isSt = TlbCmd.isWrite(cmd) || TlbCmd.isAmo(cmd) 411 val isInst = TlbCmd.isExec(cmd) 412 val ldUpdate = !perm.a && isLd // update A/D through exception 413 val stUpdate = (!perm.a || !perm.d) && isSt // update A/D through exception 414 val instrUpdate = !perm.a && isInst // update A/D through exception 415 val modeCheck = !(mode(idx) === ModeU && !perm.u || mode(idx) === ModeS && perm.u && (!sum(idx) || ifetch)) 416 val ldPermFail = !(modeCheck && Mux(hlvx, perm.x, perm.r || mxr(idx) && perm.x)) 417 val stPermFail = !(modeCheck && perm.w) 418 val instrPermFail = !(modeCheck && perm.x) 419 val ldPf = (ldPermFail || pf) && isLd 420 val stPf = (stPermFail || pf) && isSt 421 val instrPf = (instrPermFail || pf) && isInst 422 val isFakePte = !perm.v && !perm.pf && !perm.af && !onlyS2 423 val isNonLeaf = !(perm.r || perm.w || perm.x) && perm.v && !perm.pf && !perm.af 424 val s1_valid = portTranslateEnable(idx) && !onlyS2 425 426 // Stage 2 perm check 427 val gpf = g_perm.pf 428 val g_ldUpdate = !g_perm.a && isLd 429 val g_stUpdate = (!g_perm.a || !g_perm.d) && isSt 430 val g_instrUpdate = !g_perm.a && isInst 431 val g_ldPermFail = !Mux(hlvx, g_perm.x, (g_perm.r || csr.priv.mxr && g_perm.x)) 432 val g_stPermFail = !g_perm.w 433 val g_instrPermFail = !g_perm.x 434 val ldGpf = (g_ldPermFail || gpf) && isLd 435 val stGpf = (g_stPermFail || gpf) && isSt 436 val instrGpf = (g_instrPermFail || gpf) && isInst 437 val s2_valid = portTranslateEnable(idx) && (onlyS2 || allS2xlate) 438 439 val fault_valid = s1_valid || s2_valid 440 441 // when pf and gpf can't happens simultaneously 442 val hasPf = (ldPf || ldUpdate || stPf || stUpdate || instrPf || instrUpdate) && s1_valid && !af && !isFakePte && !isNonLeaf 443 // Only lsu need check related to high address truncation 444 when (RegNext(prepf || pregpf || preaf)) { 445 resp(idx).bits.isForVSnonLeafPTE := false.B 446 resp(idx).bits.excp(nDups).pf.ld := RegNext(prepf) && isLd 447 resp(idx).bits.excp(nDups).pf.st := RegNext(prepf) && isSt 448 resp(idx).bits.excp(nDups).pf.instr := false.B 449 450 resp(idx).bits.excp(nDups).gpf.ld := RegNext(pregpf) && isLd 451 resp(idx).bits.excp(nDups).gpf.st := RegNext(pregpf) && isSt 452 resp(idx).bits.excp(nDups).gpf.instr := false.B 453 454 resp(idx).bits.excp(nDups).af.ld := RegNext(preaf) && TlbCmd.isRead(cmd) 455 resp(idx).bits.excp(nDups).af.st := RegNext(preaf) && TlbCmd.isWrite(cmd) 456 resp(idx).bits.excp(nDups).af.instr := false.B 457 458 resp(idx).bits.excp(nDups).vaNeedExt := false.B 459 // overwrite miss & gpaddr when exception related to high address truncation happens 460 resp(idx).bits.miss := false.B 461 resp(idx).bits.gpaddr(nDups) := req_out(idx).fullva 462 } .otherwise { 463 // isForVSnonLeafPTE is used only when gpf happens and it caused by a G-stage translation which supports VS-stage translation 464 // it will be sent to CSR in order to modify the m/htinst. 465 // Ref: The RISC-V Instruction Set Manual: Volume II: Privileged Architecture - 19.6.3. Transformed Instruction or Pseudoinstruction for mtinst or htinst 466 val isForVSnonLeafPTE = isNonLeaf || isFakePte 467 resp(idx).bits.isForVSnonLeafPTE := isForVSnonLeafPTE 468 resp(idx).bits.excp(nDups).pf.ld := (ldPf || ldUpdate) && s1_valid && !af && !isFakePte && !isNonLeaf 469 resp(idx).bits.excp(nDups).pf.st := (stPf || stUpdate) && s1_valid && !af && !isFakePte && !isNonLeaf 470 resp(idx).bits.excp(nDups).pf.instr := (instrPf || instrUpdate) && s1_valid && !af && !isFakePte && !isNonLeaf 471 // NOTE: pf need && with !af, page fault has higher priority than access fault 472 // but ptw may also have access fault, then af happens, the translation is wrong. 473 // In this case, pf has lower priority than af 474 475 resp(idx).bits.excp(nDups).gpf.ld := (ldGpf || g_ldUpdate) && s2_valid && !af && !hasPf 476 resp(idx).bits.excp(nDups).gpf.st := (stGpf || g_stUpdate) && s2_valid && !af && !hasPf 477 resp(idx).bits.excp(nDups).gpf.instr := (instrGpf || g_instrUpdate) && s2_valid && !af && !hasPf 478 479 resp(idx).bits.excp(nDups).af.ld := af && TlbCmd.isRead(cmd) && fault_valid 480 resp(idx).bits.excp(nDups).af.st := af && TlbCmd.isWrite(cmd) && fault_valid 481 resp(idx).bits.excp(nDups).af.instr := af && TlbCmd.isExec(cmd) && fault_valid 482 483 resp(idx).bits.excp(nDups).vaNeedExt := true.B 484 } 485 486 resp(idx).bits.excp(nDups).isHyper := isHyperInst(idx) 487 } 488 489 def handle_nonblock(idx: Int): Unit = { 490 io.requestor(idx).resp.valid := req_out_v(idx) 491 io.requestor(idx).req.ready := io.requestor(idx).resp.ready // should always be true 492 XSError(!io.requestor(idx).resp.ready, s"${q.name} port ${idx} is non-block, resp.ready must be true.B") 493 494 val req_need_gpa = hasGpf(idx) 495 val req_s2xlate = Wire(UInt(2.W)) 496 req_s2xlate := MuxCase(noS2xlate, Seq( 497 (!(virt_out(idx) || req_out(idx).hyperinst)) -> noS2xlate, 498 (csr.vsatp.mode =/= 0.U && csr.hgatp.mode =/= 0.U) -> allStage, 499 (csr.vsatp.mode === 0.U) -> onlyStage2, 500 (csr.hgatp.mode === 0.U) -> onlyStage1 501 )) 502 503 val ptw_just_back = ptw.resp.fire && req_s2xlate === ptw.resp.bits.s2xlate && ptw.resp.bits.hit(get_pn(req_out(idx).vaddr), csr.satp.asid, csr.vsatp.asid, csr.hgatp.vmid, true, false) 504 // TODO: RegNext enable: ptw.resp.valid ? req.valid 505 val ptw_resp_bits_reg = RegEnable(ptw.resp.bits, ptw.resp.valid) 506 val ptw_already_back = GatedValidRegNext(ptw.resp.fire) && req_s2xlate === ptw_resp_bits_reg.s2xlate && ptw_resp_bits_reg.hit(get_pn(req_out(idx).vaddr), csr.satp.asid, csr.vsatp.asid, csr.hgatp.vmid, allType = true) 507 val ptw_getGpa = req_need_gpa && hitVec(idx) 508 val need_gpa_vpn_hit = need_gpa_vpn === get_pn(req_out(idx).vaddr) 509 510 io.ptw.req(idx).valid := false.B; 511 io.tlbreplay(idx) := false.B; 512 513 when (req_out_v(idx) && missVec(idx)) { 514 // NOTE: for an miss tlb request: either send a ptw request, or ask for a replay 515 when (ptw_just_back || ptw_already_back) { 516 io.tlbreplay(idx) := true.B; 517 } .elsewhen (need_gpa && !need_gpa_vpn_hit && !resp_gpa_refill) { 518 // not send any unrelated ptw request when l1tlb is in need_gpa state 519 io.tlbreplay(idx) := true.B; 520 } .otherwise { 521 io.ptw.req(idx).valid := true.B; 522 } 523 } 524 525 when (io.requestor(idx).req_kill && GatedValidRegNext(io.requestor(idx).req.fire)) { 526 io.ptw.req(idx).valid := false.B 527 io.tlbreplay(idx) := true.B 528 } 529 530 io.ptw.req(idx).bits.vpn := get_pn(req_out(idx).vaddr) 531 io.ptw.req(idx).bits.s2xlate := req_s2xlate 532 io.ptw.req(idx).bits.getGpa := ptw_getGpa 533 io.ptw.req(idx).bits.memidx := req_out(idx).memidx 534 } 535 536 def handle_block(idx: Int): Unit = { 537 // three valid: 1.if exist a entry; 2.if sent to ptw; 3.unset resp.valid 538 io.requestor(idx).req.ready := !req_out_v(idx) || io.requestor(idx).resp.fire 539 // req_out_v for if there is a request, may long latency, fixme 540 541 // miss request entries 542 val req_need_gpa = hasGpf(idx) 543 val miss_req_vpn = get_pn(req_out(idx).vaddr) 544 val miss_req_memidx = req_out(idx).memidx 545 val miss_req_s2xlate = Wire(UInt(2.W)) 546 miss_req_s2xlate := MuxCase(noS2xlate, Seq( 547 (!(virt_out(idx) || req_out(idx).hyperinst)) -> noS2xlate, 548 (csr.vsatp.mode =/= 0.U && csr.hgatp.mode =/= 0.U) -> allStage, 549 (csr.vsatp.mode === 0.U) -> onlyStage2, 550 (csr.hgatp.mode === 0.U) -> onlyStage1 551 )) 552 val miss_req_s2xlate_reg = RegEnable(miss_req_s2xlate, io.ptw.req(idx).fire) 553 val hasS2xlate = miss_req_s2xlate_reg =/= noS2xlate 554 val onlyS2 = miss_req_s2xlate_reg === onlyStage2 555 val hit_s1 = io.ptw.resp.bits.s1.hit(miss_req_vpn, Mux(hasS2xlate, csr.vsatp.asid, csr.satp.asid), csr.hgatp.vmid, allType = true, false, hasS2xlate) 556 val hit_s2 = io.ptw.resp.bits.s2.hit(miss_req_vpn, csr.hgatp.vmid) 557 val hit = Mux(onlyS2, hit_s2, hit_s1) && io.ptw.resp.valid && miss_req_s2xlate_reg === io.ptw.resp.bits.s2xlate 558 559 val new_coming_valid = WireInit(false.B) 560 new_coming_valid := req_in(idx).fire && !req_in(idx).bits.kill && !flush_pipe(idx) 561 val new_coming = GatedValidRegNext(new_coming_valid) 562 val miss_wire = new_coming && missVec(idx) 563 val miss_v = ValidHoldBypass(miss_wire, resp(idx).fire, flush_pipe(idx)) 564 val miss_req_v = ValidHoldBypass(miss_wire || (miss_v && flush_mmu && !mmu_flush_pipe), 565 io.ptw.req(idx).fire || resp(idx).fire, flush_pipe(idx)) 566 567 // when ptw resp, check if hit, reset miss_v, resp to lsu/ifu 568 resp(idx).valid := req_out_v(idx) && !(miss_v && portTranslateEnable(idx)) 569 when (io.ptw.resp.fire && hit && req_out_v(idx) && portTranslateEnable(idx)) { 570 val stage1 = io.ptw.resp.bits.s1 571 val stage2 = io.ptw.resp.bits.s2 572 val s2xlate = io.ptw.resp.bits.s2xlate 573 resp(idx).valid := true.B 574 resp(idx).bits.miss := false.B 575 val s1_paddr = Cat(stage1.genPPN(get_pn(req_out(idx).vaddr)), get_off(req_out(idx).vaddr)) 576 val s2_paddr = Cat(stage2.genPPNS2(get_pn(req_out(idx).vaddr)), get_off(req_out(idx).vaddr)) 577 for (d <- 0 until nRespDups) { 578 resp(idx).bits.paddr(d) := Mux(s2xlate =/= noS2xlate, s2_paddr, s1_paddr) 579 resp(idx).bits.gpaddr(d) := s1_paddr 580 pbmt_check(idx, d, io.ptw.resp.bits.s1.entry.pbmt, io.ptw.resp.bits.s2.entry.pbmt, s2xlate) 581 perm_check(stage1, req_out(idx).cmd, idx, d, stage2, req_out(idx).hlvx, s2xlate) 582 } 583 pmp_check(resp(idx).bits.paddr(0), req_out(idx).size, req_out(idx).cmd, false.B, idx) 584 585 // NOTE: the unfiltered req would be handled by Repeater 586 } 587 assert(RegNext(!resp(idx).valid || resp(idx).ready, true.B), "when tlb resp valid, ready should be true, must") 588 assert(RegNext(req_out_v(idx) || !(miss_v || miss_req_v), true.B), "when not req_out_v, should not set miss_v/miss_req_v") 589 590 val ptw_req = io.ptw.req(idx) 591 ptw_req.valid := miss_req_v 592 ptw_req.bits.vpn := miss_req_vpn 593 ptw_req.bits.s2xlate := miss_req_s2xlate 594 ptw_req.bits.getGpa := req_need_gpa && hitVec(idx) 595 ptw_req.bits.memidx := miss_req_memidx 596 597 io.tlbreplay(idx) := false.B 598 599 // NOTE: when flush pipe, tlb should abandon last req 600 // however, some outside modules like icache, dont care flushPipe, and still waiting for tlb resp 601 // just resp valid and raise page fault to go through. The pipe(ifu) will abandon it. 602 if (!q.outsideRecvFlush) { 603 when (req_out_v(idx) && flush_pipe(idx) && portTranslateEnable(idx)) { 604 resp(idx).valid := true.B 605 for (d <- 0 until nRespDups) { 606 resp(idx).bits.pbmt(d) := 0.U 607 resp(idx).bits.excp(d).pf.ld := true.B // sfence happened, pf for not to use this addr 608 resp(idx).bits.excp(d).pf.st := true.B 609 resp(idx).bits.excp(d).pf.instr := true.B 610 } 611 } 612 } 613 } 614 615 // when ptw resp, tlb at refill_idx maybe set to miss by force. 616 // Bypass ptw resp to check. 617 def ptw_resp_bypass(vpn: UInt, s2xlate: UInt) = { 618 // TODO: RegNext enable: ptw.resp.valid 619 val hasS2xlate = s2xlate =/= noS2xlate 620 val onlyS2 = s2xlate === onlyStage2 621 val onlyS1 = s2xlate === onlyStage1 622 val s2xlate_hit = s2xlate === ptw.resp.bits.s2xlate 623 val resp_hit = ptw.resp.bits.hit(vpn, csr.satp.asid, csr.vsatp.asid, csr.hgatp.vmid, true, false) 624 val p_hit = GatedValidRegNext(resp_hit && io.ptw.resp.fire && s2xlate_hit) 625 val ppn_s1 = ptw.resp.bits.s1.genPPN(vpn)(ppnLen - 1, 0) 626 val gvpn = Mux(onlyS2, vpn, ppn_s1) 627 val ppn_s2 = ptw.resp.bits.s2.genPPNS2(gvpn)(ppnLen - 1, 0) 628 val p_ppn = RegEnable(Mux(s2xlate === onlyStage2 || s2xlate === allStage, ppn_s2, ppn_s1), io.ptw.resp.fire) 629 val p_pbmt = RegEnable(ptw.resp.bits.s1.entry.pbmt,io.ptw.resp.fire) 630 val p_perm = RegEnable(ptwresp_to_tlbperm(ptw.resp.bits.s1), io.ptw.resp.fire) 631 val p_gvpn = RegEnable(Mux(onlyS2, ptw.resp.bits.s2.entry.tag, ptw.resp.bits.s1.genGVPN(vpn)), io.ptw.resp.fire) 632 val p_g_pbmt = RegEnable(ptw.resp.bits.s2.entry.pbmt,io.ptw.resp.fire) 633 val p_g_perm = RegEnable(hptwresp_to_tlbperm(ptw.resp.bits.s2), io.ptw.resp.fire) 634 val p_s2xlate = RegEnable(ptw.resp.bits.s2xlate, io.ptw.resp.fire) 635 val p_s1_level = RegEnable(ptw.resp.bits.s1.entry.level.get, io.ptw.resp.fire) 636 val p_s1_isLeaf = RegEnable(ptw.resp.bits.s1.isLeaf(), io.ptw.resp.fire) 637 val p_s1_isFakePte = RegEnable(ptw.resp.bits.s1.isFakePte(), io.ptw.resp.fire) 638 (p_hit, p_ppn, p_pbmt, p_perm, p_gvpn, p_g_pbmt, p_g_perm, p_s2xlate, p_s1_level, p_s1_isLeaf, p_s1_isFakePte) 639 } 640 641 // perf event 642 val result_ok = req_in.map(a => GatedValidRegNext(a.fire)) 643 val perfEvents = 644 Seq( 645 ("access", PopCount((0 until Width).map{i => if (Block(i)) io.requestor(i).req.fire else portTranslateEnable(i) && result_ok(i) })), 646 ("miss ", PopCount((0 until Width).map{i => if (Block(i)) portTranslateEnable(i) && result_ok(i) && missVec(i) else ptw.req(i).fire })), 647 ) 648 generatePerfEvent() 649 650 // perf log 651 for (i <- 0 until Width) { 652 if (Block(i)) { 653 XSPerfAccumulate(s"access${i}",result_ok(i) && portTranslateEnable(i)) 654 XSPerfAccumulate(s"miss${i}", result_ok(i) && missVec(i)) 655 } else { 656 XSPerfAccumulate("first_access" + Integer.toString(i, 10), result_ok(i) && portTranslateEnable(i) && RegEnable(req(i).bits.debug.isFirstIssue, req(i).valid)) 657 XSPerfAccumulate("access" + Integer.toString(i, 10), result_ok(i) && portTranslateEnable(i)) 658 XSPerfAccumulate("first_miss" + Integer.toString(i, 10), result_ok(i) && portTranslateEnable(i) && missVec(i) && RegEnable(req(i).bits.debug.isFirstIssue, req(i).valid)) 659 XSPerfAccumulate("miss" + Integer.toString(i, 10), result_ok(i) && portTranslateEnable(i) && missVec(i)) 660 } 661 } 662 XSPerfAccumulate("ptw_resp_count", ptw.resp.fire) 663 XSPerfAccumulate("ptw_resp_pf_count", ptw.resp.fire && ptw.resp.bits.s1.pf) 664 665 // Log 666 for(i <- 0 until Width) { 667 XSDebug(req(i).valid, p"req(${i.U}): (${req(i).valid} ${req(i).ready}) ${req(i).bits}\n") 668 XSDebug(resp(i).valid, p"resp(${i.U}): (${resp(i).valid} ${resp(i).ready}) ${resp(i).bits}\n") 669 } 670 671 XSDebug(io.sfence.valid, p"Sfence: ${io.sfence}\n") 672 XSDebug(ParallelOR(req_out_v) || ptw.resp.valid, p"vmEnable:${vmEnable} hit:${Binary(VecInit(hitVec).asUInt)} miss:${Binary(VecInit(missVec).asUInt)}\n") 673 for (i <- ptw.req.indices) { 674 XSDebug(ptw.req(i).fire, p"L2TLB req:${ptw.req(i).bits}\n") 675 } 676 XSDebug(ptw.resp.valid, p"L2TLB resp:${ptw.resp.bits} (v:${ptw.resp.valid}r:${ptw.resp.ready}) \n") 677 678 println(s"${q.name}: page: ${q.NWays} ${q.Associative} ${q.Replacer.get}") 679 680 if (env.EnableDifftest) { 681 for (i <- 0 until Width) { 682 val pf = io.requestor(i).resp.bits.excp(0).pf.instr || io.requestor(i).resp.bits.excp(0).pf.st || io.requestor(i).resp.bits.excp(0).pf.ld 683 val gpf = io.requestor(i).resp.bits.excp(0).gpf.instr || io.requestor(i).resp.bits.excp(0).gpf.st || io.requestor(i).resp.bits.excp(0).gpf.ld 684 val af = io.requestor(i).resp.bits.excp(0).af.instr || io.requestor(i).resp.bits.excp(0).af.st || io.requestor(i).resp.bits.excp(0).af.ld 685 val difftest = DifftestModule(new DiffL1TLBEvent) 686 difftest.coreid := io.hartId 687 difftest.valid := RegNext(io.requestor(i).req.fire) && !io.requestor(i).req_kill && io.requestor(i).resp.fire && !io.requestor(i).resp.bits.miss && !pf && !af && !gpf && portTranslateEnable(i) 688 if (!Seq("itlb", "ldtlb", "sttlb").contains(q.name)) { 689 difftest.valid := false.B 690 } 691 difftest.index := TLBDiffId(p(XSCoreParamsKey).HartId).U 692 difftest.vpn := RegEnable(get_pn(req_in(i).bits.vaddr), req_in(i).valid) 693 difftest.ppn := get_pn(io.requestor(i).resp.bits.paddr(0)) 694 difftest.satp := Cat(csr.satp.mode, csr.satp.asid, csr.satp.ppn) 695 difftest.vsatp := Cat(csr.vsatp.mode, csr.vsatp.asid, csr.vsatp.ppn) 696 difftest.hgatp := Cat(csr.hgatp.mode, csr.hgatp.vmid, csr.hgatp.ppn) 697 val req_need_gpa = gpf 698 val req_s2xlate = Wire(UInt(2.W)) 699 req_s2xlate := MuxCase(noS2xlate, Seq( 700 (!RegNext(virt_in || req_in(i).bits.hyperinst)) -> noS2xlate, 701 (csr.vsatp.mode =/= 0.U && csr.hgatp.mode =/= 0.U) -> allStage, 702 (csr.vsatp.mode === 0.U) -> onlyStage2, 703 (csr.hgatp.mode === 0.U) -> onlyStage1 704 )) 705 difftest.s2xlate := req_s2xlate 706 } 707 } 708} 709 710object TLBDiffId { 711 var i: Int = 0 712 var lastHartId: Int = -1 713 def apply(hartId: Int): Int = { 714 if (lastHartId != hartId) { 715 i = 0 716 lastHartId = hartId 717 } 718 i += 1 719 i - 1 720 } 721} 722 723class TLBNonBlock(Width: Int, nRespDups: Int = 1, q: TLBParameters)(implicit p: Parameters) extends TLB(Width, nRespDups, Seq.fill(Width)(false), q) 724class TLBBLock(Width: Int, nRespDups: Int = 1, q: TLBParameters)(implicit p: Parameters) extends TLB(Width, nRespDups, Seq.fill(Width)(true), q) 725 726class TlbReplace(Width: Int, q: TLBParameters)(implicit p: Parameters) extends TlbModule { 727 val io = IO(new TlbReplaceIO(Width, q)) 728 729 if (q.Associative == "fa") { 730 val re = ReplacementPolicy.fromString(q.Replacer, q.NWays) 731 re.access(io.page.access.map(_.touch_ways)) 732 io.page.refillIdx := re.way 733 } else { // set-acco && plru 734 val re = ReplacementPolicy.fromString(q.Replacer, q.NSets, q.NWays) 735 re.access(io.page.access.map(_.sets), io.page.access.map(_.touch_ways)) 736 io.page.refillIdx := { if (q.NWays == 1) 0.U else re.way(io.page.chosen_set) } 737 } 738} 739